swr_draw.cpp revision 7ec681f3
1/****************************************************************************
2 * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24#include "swr_screen.h"
25#include "swr_context.h"
26#include "swr_resource.h"
27#include "swr_fence.h"
28#include "swr_query.h"
29#include "jit_api.h"
30
31#include "util/u_draw.h"
32#include "util/u_prim.h"
33
34#include <algorithm>
35#include <iostream>
36/*
37 * Draw vertex arrays, with optional indexing, optional instancing.
38 */
39static void
40swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
41             unsigned drawid_offset,
42             const struct pipe_draw_indirect_info *indirect,
43             const struct pipe_draw_start_count_bias *draws,
44             unsigned num_draws)
45{
46   if (num_draws > 1) {
47      struct pipe_draw_info tmp_info = *info;
48      unsigned drawid = drawid_offset;
49
50      for (unsigned i = 0; i < num_draws; i++) {
51         swr_draw_vbo(pipe, &tmp_info, drawid, indirect, &draws[i], 1);
52         if (tmp_info.increment_draw_id)
53            drawid++;
54      }
55      return;
56   }
57
58   if (!indirect && (!draws[0].count || !info->instance_count))
59      return;
60
61   struct swr_context *ctx = swr_context(pipe);
62
63   if (!indirect &&
64       !info->primitive_restart &&
65       !u_trim_pipe_prim((enum pipe_prim_type)info->mode, (unsigned*)&draws[0].count))
66      return;
67
68   if (!swr_check_render_cond(pipe))
69      return;
70
71   if (indirect && indirect->buffer) {
72      util_draw_indirect(pipe, info, indirect);
73      return;
74   }
75
76   /* If indexed draw, force vertex validation since index buffer comes
77    * from draw info. */
78   if (info->index_size)
79      ctx->dirty |= SWR_NEW_VERTEX;
80
81   /* Update derived state, pass draw info to update function. */
82   swr_update_derived(pipe, info, draws);
83
84   swr_update_draw_context(ctx);
85
86   struct pipe_draw_info resolved_info;
87   struct pipe_draw_start_count_bias resolved_draw;
88   /* DrawTransformFeedback */
89   if (indirect && indirect->count_from_stream_output) {
90      // trick copied from softpipe to modify const struct *info
91      memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));
92      resolved_draw.start = draws[0].start;
93      resolved_draw.count = ctx->so_primCounter * ctx->patch_vertices;
94      resolved_info.max_index = resolved_draw.count - 1;
95      info = &resolved_info;
96      indirect = NULL;
97      draws = &resolved_draw;
98   }
99
100   if (ctx->vs->pipe.stream_output.num_outputs) {
101      if (!ctx->vs->soFunc[info->mode]) {
102         STREAMOUT_COMPILE_STATE state = {0};
103         struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
104
105         state.numVertsPerPrim = u_vertices_per_prim((enum pipe_prim_type)info->mode);
106
107         uint32_t offsets[MAX_SO_STREAMS] = {0};
108         uint32_t num = 0;
109
110         for (uint32_t i = 0; i < so->num_outputs; i++) {
111            assert(so->output[i].stream == 0); // @todo
112            uint32_t output_buffer = so->output[i].output_buffer;
113            if (so->output[i].dst_offset != offsets[output_buffer]) {
114               // hole - need to fill
115               state.stream.decl[num].bufferIndex = output_buffer;
116               state.stream.decl[num].hole = true;
117               state.stream.decl[num].componentMask =
118                  (1 << (so->output[i].dst_offset - offsets[output_buffer]))
119                  - 1;
120               num++;
121               offsets[output_buffer] = so->output[i].dst_offset;
122            }
123
124            unsigned attrib_slot = so->output[i].register_index;
125            attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
126
127            state.stream.decl[num].bufferIndex = output_buffer;
128            state.stream.decl[num].attribSlot = attrib_slot;
129            state.stream.decl[num].componentMask =
130               ((1 << so->output[i].num_components) - 1)
131               << so->output[i].start_component;
132            state.stream.decl[num].hole = false;
133            num++;
134
135            offsets[output_buffer] += so->output[i].num_components;
136         }
137
138         state.stream.numDecls = num;
139
140         HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
141         ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
142         debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
143         assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
144      }
145
146      ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
147   }
148
149   struct swr_vertex_element_state *velems = ctx->velems;
150   if (info->primitive_restart)
151      velems->fsState.cutIndex = info->restart_index;
152   else
153      velems->fsState.cutIndex = 0;
154   velems->fsState.bEnableCutIndex = info->primitive_restart;
155   velems->fsState.bPartialVertexBuffer = (info->index_bounds_valid && info->min_index > 0);
156
157   swr_jit_fetch_key key;
158   swr_generate_fetch_key(key, velems);
159   auto search = velems->map.find(key);
160   if (search != velems->map.end()) {
161      velems->fsFunc = search->second;
162   } else {
163      HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
164      velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
165
166      debug_printf("fetch shader %p\n", velems->fsFunc);
167      assert(velems->fsFunc && "Error: FetchShader = NULL");
168
169      velems->map.insert(std::make_pair(key, velems->fsFunc));
170   }
171
172   ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
173
174   /* Set up frontend state
175    * XXX setup provokingVertex & topologyProvokingVertex */
176   SWR_FRONTEND_STATE feState = {0};
177
178   // feState.vsVertexSize seeds the PA size that is used as an interface
179   // between all the shader stages, so it has to be large enough to
180   // incorporate all interfaces between stages
181
182   // max of frontend shaders num_outputs
183   feState.vsVertexSize = ctx->vs->info.base.num_outputs;
184   if (ctx->gs) {
185      feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->gs->info.base.num_outputs);
186   }
187   if (ctx->tcs) {
188      feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tcs->info.base.num_outputs);
189   }
190   if (ctx->tes) {
191      feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tes->info.base.num_outputs);
192   }
193
194
195   if (ctx->vs->info.base.num_outputs) {
196      // gs does not adjust for position in SGV slot at input from vs
197      if (!ctx->gs && !ctx->tcs && !ctx->tes)
198         feState.vsVertexSize--;
199   }
200
201   // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
202   feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
203
204   // The PA in the clipper does not handle BE vertex sizes
205   // different from FE. Increase vertexsize only for the cases that needed it
206
207   // primid needs a slot
208   if (ctx->fs->info.base.uses_primid)
209      feState.vsVertexSize++;
210   // sprite coord enable
211   if (ctx->rasterizer->sprite_coord_enable)
212      feState.vsVertexSize++;
213
214   if (ctx->rasterizer->flatshade_first) {
215      feState.provokingVertex = {1, 0, 0};
216   } else {
217      feState.provokingVertex = {2, 1, 2};
218   }
219
220   enum pipe_prim_type topology;
221   if (ctx->gs)
222      topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
223   else
224      topology = (enum pipe_prim_type)info->mode;
225
226   switch (topology) {
227   case PIPE_PRIM_TRIANGLE_FAN:
228      feState.topologyProvokingVertex = feState.provokingVertex.triFan;
229      break;
230   case PIPE_PRIM_TRIANGLE_STRIP:
231   case PIPE_PRIM_TRIANGLES:
232      feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
233      break;
234   case PIPE_PRIM_QUAD_STRIP:
235   case PIPE_PRIM_QUADS:
236      if (ctx->rasterizer->flatshade_first)
237         feState.topologyProvokingVertex = 0;
238      else
239         feState.topologyProvokingVertex = 3;
240      break;
241   case PIPE_PRIM_LINES:
242   case PIPE_PRIM_LINE_LOOP:
243   case PIPE_PRIM_LINE_STRIP:
244      feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
245      break;
246   default:
247      feState.topologyProvokingVertex = 0;
248   }
249
250   feState.bEnableCutIndex = info->primitive_restart;
251   ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
252
253   if (info->index_size)
254      ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
255                                          swr_convert_prim_topology(info->mode, ctx->patch_vertices),
256                                          draws[0].count,
257                                          info->instance_count,
258                                          draws[0].start,
259                                          draws->index_bias,
260                                          info->start_instance);
261   else
262      ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
263                                   swr_convert_prim_topology(info->mode, ctx->patch_vertices),
264                                   draws[0].count,
265                                   info->instance_count,
266                                   draws[0].start,
267                                   info->start_instance);
268
269   /* On client-buffer draw, we used client buffer directly, without
270    * copy.  Block until draw is finished.
271    * VMD is an example application that benefits from this. */
272   if (ctx->dirty & SWR_BLOCK_CLIENT_DRAW) {
273      struct swr_screen *screen = swr_screen(pipe->screen);
274      swr_fence_submit(ctx, screen->flush_fence);
275      swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
276   }
277}
278
279
280static void
281swr_flush(struct pipe_context *pipe,
282          struct pipe_fence_handle **fence,
283          unsigned flags)
284{
285   struct swr_context *ctx = swr_context(pipe);
286   struct swr_screen *screen = swr_screen(pipe->screen);
287
288   for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
289      struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
290      if (cb) {
291         swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
292      }
293   }
294   if (ctx->framebuffer.zsbuf) {
295      swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
296                               SWR_TILE_RESOLVED);
297   }
298
299   if (fence)
300      swr_fence_reference(pipe->screen, fence, screen->flush_fence);
301}
302
303void
304swr_finish(struct pipe_context *pipe)
305{
306   struct pipe_fence_handle *fence = nullptr;
307
308   swr_flush(pipe, &fence, 0);
309   swr_fence_finish(pipe->screen, NULL, fence, 0);
310   swr_fence_reference(pipe->screen, &fence, NULL);
311}
312
313/*
314 * Invalidate tiles so they can be reloaded back when needed
315 */
316void
317swr_invalidate_render_target(struct pipe_context *pipe,
318                             uint32_t attachment,
319                             uint16_t width, uint16_t height)
320{
321   struct swr_context *ctx = swr_context(pipe);
322
323   /* grab the rect from the passed in arguments */
324   swr_update_draw_context(ctx);
325   SWR_RECT full_rect =
326      {0, 0, (int32_t)width, (int32_t)height};
327   ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
328                                  1 << attachment,
329                                  full_rect);
330}
331
332
333/*
334 * Store SWR HotTiles back to renderTarget surface.
335 */
336void
337swr_store_render_target(struct pipe_context *pipe,
338                        uint32_t attachment,
339                        enum SWR_TILE_STATE post_tile_state)
340{
341   struct swr_context *ctx = swr_context(pipe);
342   struct swr_draw_context *pDC = &ctx->swrDC;
343   struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
344
345   /* Only proceed if there's a valid surface to store to */
346   if (renderTarget->xpBaseAddress) {
347      swr_update_draw_context(ctx);
348      SWR_RECT full_rect =
349         {0, 0,
350          (int32_t)u_minify(renderTarget->width, renderTarget->lod),
351          (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
352      ctx->api.pfnSwrStoreTiles(ctx->swrContext,
353                                1 << attachment,
354                                post_tile_state,
355                                full_rect);
356   }
357}
358
359void
360swr_store_dirty_resource(struct pipe_context *pipe,
361                         struct pipe_resource *resource,
362                         enum SWR_TILE_STATE post_tile_state)
363{
364   /* Only store resource if it has been written to */
365   if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
366      struct swr_context *ctx = swr_context(pipe);
367      struct swr_screen *screen = swr_screen(pipe->screen);
368      struct swr_resource *spr = swr_resource(resource);
369
370      swr_draw_context *pDC = &ctx->swrDC;
371      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
372      for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
373         if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
374             (spr->secondary.xpBaseAddress &&
375              renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
376            swr_store_render_target(pipe, i, post_tile_state);
377
378            /* Mesa thinks depth/stencil are fused, so we'll never get an
379             * explicit resource for stencil.  So, if checking depth, then
380             * also check for stencil. */
381            if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
382               swr_store_render_target(
383                  pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
384            }
385
386            /* This fence signals StoreTiles completion */
387            swr_fence_submit(ctx, screen->flush_fence);
388
389            break;
390         }
391   }
392}
393
394void
395swr_draw_init(struct pipe_context *pipe)
396{
397   pipe->draw_vbo = swr_draw_vbo;
398   pipe->flush = swr_flush;
399}
400