swr_draw.cpp revision b8e80941
1/****************************************************************************
2 * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24#include "swr_screen.h"
25#include "swr_context.h"
26#include "swr_resource.h"
27#include "swr_fence.h"
28#include "swr_query.h"
29#include "jit_api.h"
30
31#include "util/u_draw.h"
32#include "util/u_prim.h"
33
34/*
35 * Draw vertex arrays, with optional indexing, optional instancing.
36 */
37static void
38swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
39{
40   struct swr_context *ctx = swr_context(pipe);
41
42   if (!info->count_from_stream_output && !info->indirect &&
43       !info->primitive_restart &&
44       !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
45      return;
46
47   if (!swr_check_render_cond(pipe))
48      return;
49
50   if (info->indirect) {
51      util_draw_indirect(pipe, info);
52      return;
53   }
54
55   /* If indexed draw, force vertex validation since index buffer comes
56    * from draw info. */
57   if (info->index_size)
58      ctx->dirty |= SWR_NEW_VERTEX;
59
60   /* Update derived state, pass draw info to update function. */
61   swr_update_derived(pipe, info);
62
63   swr_update_draw_context(ctx);
64
65   if (ctx->vs->pipe.stream_output.num_outputs) {
66      if (!ctx->vs->soFunc[info->mode]) {
67         STREAMOUT_COMPILE_STATE state = {0};
68         struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
69
70         state.numVertsPerPrim = u_vertices_per_prim(info->mode);
71
72         uint32_t offsets[MAX_SO_STREAMS] = {0};
73         uint32_t num = 0;
74
75         for (uint32_t i = 0; i < so->num_outputs; i++) {
76            assert(so->output[i].stream == 0); // @todo
77            uint32_t output_buffer = so->output[i].output_buffer;
78            if (so->output[i].dst_offset != offsets[output_buffer]) {
79               // hole - need to fill
80               state.stream.decl[num].bufferIndex = output_buffer;
81               state.stream.decl[num].hole = true;
82               state.stream.decl[num].componentMask =
83                  (1 << (so->output[i].dst_offset - offsets[output_buffer]))
84                  - 1;
85               num++;
86               offsets[output_buffer] = so->output[i].dst_offset;
87            }
88
89            unsigned attrib_slot = so->output[i].register_index;
90            attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
91
92            state.stream.decl[num].bufferIndex = output_buffer;
93            state.stream.decl[num].attribSlot = attrib_slot;
94            state.stream.decl[num].componentMask =
95               ((1 << so->output[i].num_components) - 1)
96               << so->output[i].start_component;
97            state.stream.decl[num].hole = false;
98            num++;
99
100            offsets[output_buffer] += so->output[i].num_components;
101         }
102
103         state.stream.numDecls = num;
104
105         HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
106         ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
107         debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
108         assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
109      }
110
111      ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
112   }
113
114   struct swr_vertex_element_state *velems = ctx->velems;
115   if (info->primitive_restart)
116      velems->fsState.cutIndex = info->restart_index;
117   else
118      velems->fsState.cutIndex = 0;
119   velems->fsState.bEnableCutIndex = info->primitive_restart;
120   velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
121
122   swr_jit_fetch_key key;
123   swr_generate_fetch_key(key, velems);
124   auto search = velems->map.find(key);
125   if (search != velems->map.end()) {
126      velems->fsFunc = search->second;
127   } else {
128      HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
129      velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
130
131      debug_printf("fetch shader %p\n", velems->fsFunc);
132      assert(velems->fsFunc && "Error: FetchShader = NULL");
133
134      velems->map.insert(std::make_pair(key, velems->fsFunc));
135   }
136
137   ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
138
139   /* Set up frontend state
140    * XXX setup provokingVertex & topologyProvokingVertex */
141   SWR_FRONTEND_STATE feState = {0};
142
143   // feState.vsVertexSize seeds the PA size that is used as an interface
144   // between all the shader stages, so it has to be large enough to
145   // incorporate all interfaces between stages
146
147   // max of gs and vs num_outputs
148   feState.vsVertexSize = ctx->vs->info.base.num_outputs;
149   if (ctx->gs &&
150       ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
151      feState.vsVertexSize = ctx->gs->info.base.num_outputs;
152   }
153
154   if (ctx->vs->info.base.num_outputs) {
155      // gs does not adjust for position in SGV slot at input from vs
156      if (!ctx->gs)
157         feState.vsVertexSize--;
158   }
159
160   // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
161   feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
162
163   // The PA in the clipper does not handle BE vertex sizes
164   // different from FE. Increase vertexsize only for the cases that needed it
165
166   // primid needs a slot
167   if (ctx->fs->info.base.uses_primid)
168      feState.vsVertexSize++;
169   // sprite coord enable
170   if (ctx->rasterizer->sprite_coord_enable)
171      feState.vsVertexSize++;
172
173
174   if (ctx->rasterizer->flatshade_first) {
175      feState.provokingVertex = {1, 0, 0};
176   } else {
177      feState.provokingVertex = {2, 1, 2};
178   }
179
180   enum pipe_prim_type topology;
181   if (ctx->gs)
182      topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
183   else
184      topology = info->mode;
185
186   switch (topology) {
187   case PIPE_PRIM_TRIANGLE_FAN:
188      feState.topologyProvokingVertex = feState.provokingVertex.triFan;
189      break;
190   case PIPE_PRIM_TRIANGLE_STRIP:
191   case PIPE_PRIM_TRIANGLES:
192      feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
193      break;
194   case PIPE_PRIM_QUAD_STRIP:
195   case PIPE_PRIM_QUADS:
196      if (ctx->rasterizer->flatshade_first)
197         feState.topologyProvokingVertex = 0;
198      else
199         feState.topologyProvokingVertex = 3;
200      break;
201   case PIPE_PRIM_LINES:
202   case PIPE_PRIM_LINE_LOOP:
203   case PIPE_PRIM_LINE_STRIP:
204      feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
205      break;
206   default:
207      feState.topologyProvokingVertex = 0;
208   }
209
210   feState.bEnableCutIndex = info->primitive_restart;
211   ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
212
213   if (info->index_size)
214      ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
215                                          swr_convert_prim_topology(info->mode),
216                                          info->count,
217                                          info->instance_count,
218                                          info->start,
219                                          info->index_bias,
220                                          info->start_instance);
221   else
222      ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
223                                   swr_convert_prim_topology(info->mode),
224                                   info->count,
225                                   info->instance_count,
226                                   info->start,
227                                   info->start_instance);
228
229   /* On large client-buffer draw, we used client buffer directly, without
230    * copy.  Block until draw is finished.
231    * VMD is an example application that benefits from this. */
232   if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
233      struct swr_screen *screen = swr_screen(pipe->screen);
234      swr_fence_submit(ctx, screen->flush_fence);
235      swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
236   }
237}
238
239
240static void
241swr_flush(struct pipe_context *pipe,
242          struct pipe_fence_handle **fence,
243          unsigned flags)
244{
245   struct swr_context *ctx = swr_context(pipe);
246   struct swr_screen *screen = swr_screen(pipe->screen);
247
248   for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
249      struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
250      if (cb) {
251         swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
252      }
253   }
254   if (ctx->framebuffer.zsbuf) {
255      swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
256                               SWR_TILE_RESOLVED);
257   }
258
259   if (fence)
260      swr_fence_reference(pipe->screen, fence, screen->flush_fence);
261}
262
263void
264swr_finish(struct pipe_context *pipe)
265{
266   struct pipe_fence_handle *fence = nullptr;
267
268   swr_flush(pipe, &fence, 0);
269   swr_fence_finish(pipe->screen, NULL, fence, 0);
270   swr_fence_reference(pipe->screen, &fence, NULL);
271}
272
273/*
274 * Invalidate tiles so they can be reloaded back when needed
275 */
276void
277swr_invalidate_render_target(struct pipe_context *pipe,
278                             uint32_t attachment,
279                             uint16_t width, uint16_t height)
280{
281   struct swr_context *ctx = swr_context(pipe);
282
283   /* grab the rect from the passed in arguments */
284   swr_update_draw_context(ctx);
285   SWR_RECT full_rect =
286      {0, 0, (int32_t)width, (int32_t)height};
287   ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
288                                  1 << attachment,
289                                  full_rect);
290}
291
292
293/*
294 * Store SWR HotTiles back to renderTarget surface.
295 */
296void
297swr_store_render_target(struct pipe_context *pipe,
298                        uint32_t attachment,
299                        enum SWR_TILE_STATE post_tile_state)
300{
301   struct swr_context *ctx = swr_context(pipe);
302   struct swr_draw_context *pDC = &ctx->swrDC;
303   struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
304
305   /* Only proceed if there's a valid surface to store to */
306   if (renderTarget->xpBaseAddress) {
307      swr_update_draw_context(ctx);
308      SWR_RECT full_rect =
309         {0, 0,
310          (int32_t)u_minify(renderTarget->width, renderTarget->lod),
311          (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
312      ctx->api.pfnSwrStoreTiles(ctx->swrContext,
313                                1 << attachment,
314                                post_tile_state,
315                                full_rect);
316   }
317}
318
319void
320swr_store_dirty_resource(struct pipe_context *pipe,
321                         struct pipe_resource *resource,
322                         enum SWR_TILE_STATE post_tile_state)
323{
324   /* Only store resource if it has been written to */
325   if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
326      struct swr_context *ctx = swr_context(pipe);
327      struct swr_screen *screen = swr_screen(pipe->screen);
328      struct swr_resource *spr = swr_resource(resource);
329
330      swr_draw_context *pDC = &ctx->swrDC;
331      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
332      for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
333         if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
334             (spr->secondary.xpBaseAddress &&
335              renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
336            swr_store_render_target(pipe, i, post_tile_state);
337
338            /* Mesa thinks depth/stencil are fused, so we'll never get an
339             * explicit resource for stencil.  So, if checking depth, then
340             * also check for stencil. */
341            if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
342               swr_store_render_target(
343                  pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
344            }
345
346            /* This fence signals StoreTiles completion */
347            swr_fence_submit(ctx, screen->flush_fence);
348
349            break;
350         }
351   }
352}
353
354void
355swr_draw_init(struct pipe_context *pipe)
356{
357   pipe->draw_vbo = swr_draw_vbo;
358   pipe->flush = swr_flush;
359}
360