1b8e80941Smrg/****************************************************************************
2b8e80941Smrg * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg ***************************************************************************/
23b8e80941Smrg
24b8e80941Smrg#include "swr_screen.h"
25b8e80941Smrg#include "swr_context.h"
26b8e80941Smrg#include "swr_resource.h"
27b8e80941Smrg#include "swr_fence.h"
28b8e80941Smrg#include "swr_query.h"
29b8e80941Smrg#include "jit_api.h"
30b8e80941Smrg
31b8e80941Smrg#include "util/u_draw.h"
32b8e80941Smrg#include "util/u_prim.h"
33b8e80941Smrg
34b8e80941Smrg/*
35b8e80941Smrg * Draw vertex arrays, with optional indexing, optional instancing.
36b8e80941Smrg */
37b8e80941Smrgstatic void
38b8e80941Smrgswr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
39b8e80941Smrg{
40b8e80941Smrg   struct swr_context *ctx = swr_context(pipe);
41b8e80941Smrg
42b8e80941Smrg   if (!info->count_from_stream_output && !info->indirect &&
43b8e80941Smrg       !info->primitive_restart &&
44b8e80941Smrg       !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
45b8e80941Smrg      return;
46b8e80941Smrg
47b8e80941Smrg   if (!swr_check_render_cond(pipe))
48b8e80941Smrg      return;
49b8e80941Smrg
50b8e80941Smrg   if (info->indirect) {
51b8e80941Smrg      util_draw_indirect(pipe, info);
52b8e80941Smrg      return;
53b8e80941Smrg   }
54b8e80941Smrg
55b8e80941Smrg   /* If indexed draw, force vertex validation since index buffer comes
56b8e80941Smrg    * from draw info. */
57b8e80941Smrg   if (info->index_size)
58b8e80941Smrg      ctx->dirty |= SWR_NEW_VERTEX;
59b8e80941Smrg
60b8e80941Smrg   /* Update derived state, pass draw info to update function. */
61b8e80941Smrg   swr_update_derived(pipe, info);
62b8e80941Smrg
63b8e80941Smrg   swr_update_draw_context(ctx);
64b8e80941Smrg
65b8e80941Smrg   if (ctx->vs->pipe.stream_output.num_outputs) {
66b8e80941Smrg      if (!ctx->vs->soFunc[info->mode]) {
67b8e80941Smrg         STREAMOUT_COMPILE_STATE state = {0};
68b8e80941Smrg         struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;
69b8e80941Smrg
70b8e80941Smrg         state.numVertsPerPrim = u_vertices_per_prim(info->mode);
71b8e80941Smrg
72b8e80941Smrg         uint32_t offsets[MAX_SO_STREAMS] = {0};
73b8e80941Smrg         uint32_t num = 0;
74b8e80941Smrg
75b8e80941Smrg         for (uint32_t i = 0; i < so->num_outputs; i++) {
76b8e80941Smrg            assert(so->output[i].stream == 0); // @todo
77b8e80941Smrg            uint32_t output_buffer = so->output[i].output_buffer;
78b8e80941Smrg            if (so->output[i].dst_offset != offsets[output_buffer]) {
79b8e80941Smrg               // hole - need to fill
80b8e80941Smrg               state.stream.decl[num].bufferIndex = output_buffer;
81b8e80941Smrg               state.stream.decl[num].hole = true;
82b8e80941Smrg               state.stream.decl[num].componentMask =
83b8e80941Smrg                  (1 << (so->output[i].dst_offset - offsets[output_buffer]))
84b8e80941Smrg                  - 1;
85b8e80941Smrg               num++;
86b8e80941Smrg               offsets[output_buffer] = so->output[i].dst_offset;
87b8e80941Smrg            }
88b8e80941Smrg
89b8e80941Smrg            unsigned attrib_slot = so->output[i].register_index;
90b8e80941Smrg            attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
91b8e80941Smrg
92b8e80941Smrg            state.stream.decl[num].bufferIndex = output_buffer;
93b8e80941Smrg            state.stream.decl[num].attribSlot = attrib_slot;
94b8e80941Smrg            state.stream.decl[num].componentMask =
95b8e80941Smrg               ((1 << so->output[i].num_components) - 1)
96b8e80941Smrg               << so->output[i].start_component;
97b8e80941Smrg            state.stream.decl[num].hole = false;
98b8e80941Smrg            num++;
99b8e80941Smrg
100b8e80941Smrg            offsets[output_buffer] += so->output[i].num_components;
101b8e80941Smrg         }
102b8e80941Smrg
103b8e80941Smrg         state.stream.numDecls = num;
104b8e80941Smrg
105b8e80941Smrg         HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;
106b8e80941Smrg         ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);
107b8e80941Smrg         debug_printf("so shader    %p\n", ctx->vs->soFunc[info->mode]);
108b8e80941Smrg         assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");
109b8e80941Smrg      }
110b8e80941Smrg
111b8e80941Smrg      ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);
112b8e80941Smrg   }
113b8e80941Smrg
114b8e80941Smrg   struct swr_vertex_element_state *velems = ctx->velems;
115b8e80941Smrg   if (info->primitive_restart)
116b8e80941Smrg      velems->fsState.cutIndex = info->restart_index;
117b8e80941Smrg   else
118b8e80941Smrg      velems->fsState.cutIndex = 0;
119b8e80941Smrg   velems->fsState.bEnableCutIndex = info->primitive_restart;
120b8e80941Smrg   velems->fsState.bPartialVertexBuffer = (info->min_index > 0);
121b8e80941Smrg
122b8e80941Smrg   swr_jit_fetch_key key;
123b8e80941Smrg   swr_generate_fetch_key(key, velems);
124b8e80941Smrg   auto search = velems->map.find(key);
125b8e80941Smrg   if (search != velems->map.end()) {
126b8e80941Smrg      velems->fsFunc = search->second;
127b8e80941Smrg   } else {
128b8e80941Smrg      HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;
129b8e80941Smrg      velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);
130b8e80941Smrg
131b8e80941Smrg      debug_printf("fetch shader %p\n", velems->fsFunc);
132b8e80941Smrg      assert(velems->fsFunc && "Error: FetchShader = NULL");
133b8e80941Smrg
134b8e80941Smrg      velems->map.insert(std::make_pair(key, velems->fsFunc));
135b8e80941Smrg   }
136b8e80941Smrg
137b8e80941Smrg   ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);
138b8e80941Smrg
139b8e80941Smrg   /* Set up frontend state
140b8e80941Smrg    * XXX setup provokingVertex & topologyProvokingVertex */
141b8e80941Smrg   SWR_FRONTEND_STATE feState = {0};
142b8e80941Smrg
143b8e80941Smrg   // feState.vsVertexSize seeds the PA size that is used as an interface
144b8e80941Smrg   // between all the shader stages, so it has to be large enough to
145b8e80941Smrg   // incorporate all interfaces between stages
146b8e80941Smrg
147b8e80941Smrg   // max of gs and vs num_outputs
148b8e80941Smrg   feState.vsVertexSize = ctx->vs->info.base.num_outputs;
149b8e80941Smrg   if (ctx->gs &&
150b8e80941Smrg       ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
151b8e80941Smrg      feState.vsVertexSize = ctx->gs->info.base.num_outputs;
152b8e80941Smrg   }
153b8e80941Smrg
154b8e80941Smrg   if (ctx->vs->info.base.num_outputs) {
155b8e80941Smrg      // gs does not adjust for position in SGV slot at input from vs
156b8e80941Smrg      if (!ctx->gs)
157b8e80941Smrg         feState.vsVertexSize--;
158b8e80941Smrg   }
159b8e80941Smrg
160b8e80941Smrg   // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
161b8e80941Smrg   feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
162b8e80941Smrg
163b8e80941Smrg   // The PA in the clipper does not handle BE vertex sizes
164b8e80941Smrg   // different from FE. Increase vertexsize only for the cases that needed it
165b8e80941Smrg
166b8e80941Smrg   // primid needs a slot
167b8e80941Smrg   if (ctx->fs->info.base.uses_primid)
168b8e80941Smrg      feState.vsVertexSize++;
169b8e80941Smrg   // sprite coord enable
170b8e80941Smrg   if (ctx->rasterizer->sprite_coord_enable)
171b8e80941Smrg      feState.vsVertexSize++;
172b8e80941Smrg
173b8e80941Smrg
174b8e80941Smrg   if (ctx->rasterizer->flatshade_first) {
175b8e80941Smrg      feState.provokingVertex = {1, 0, 0};
176b8e80941Smrg   } else {
177b8e80941Smrg      feState.provokingVertex = {2, 1, 2};
178b8e80941Smrg   }
179b8e80941Smrg
180b8e80941Smrg   enum pipe_prim_type topology;
181b8e80941Smrg   if (ctx->gs)
182b8e80941Smrg      topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
183b8e80941Smrg   else
184b8e80941Smrg      topology = info->mode;
185b8e80941Smrg
186b8e80941Smrg   switch (topology) {
187b8e80941Smrg   case PIPE_PRIM_TRIANGLE_FAN:
188b8e80941Smrg      feState.topologyProvokingVertex = feState.provokingVertex.triFan;
189b8e80941Smrg      break;
190b8e80941Smrg   case PIPE_PRIM_TRIANGLE_STRIP:
191b8e80941Smrg   case PIPE_PRIM_TRIANGLES:
192b8e80941Smrg      feState.topologyProvokingVertex = feState.provokingVertex.triStripList;
193b8e80941Smrg      break;
194b8e80941Smrg   case PIPE_PRIM_QUAD_STRIP:
195b8e80941Smrg   case PIPE_PRIM_QUADS:
196b8e80941Smrg      if (ctx->rasterizer->flatshade_first)
197b8e80941Smrg         feState.topologyProvokingVertex = 0;
198b8e80941Smrg      else
199b8e80941Smrg         feState.topologyProvokingVertex = 3;
200b8e80941Smrg      break;
201b8e80941Smrg   case PIPE_PRIM_LINES:
202b8e80941Smrg   case PIPE_PRIM_LINE_LOOP:
203b8e80941Smrg   case PIPE_PRIM_LINE_STRIP:
204b8e80941Smrg      feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;
205b8e80941Smrg      break;
206b8e80941Smrg   default:
207b8e80941Smrg      feState.topologyProvokingVertex = 0;
208b8e80941Smrg   }
209b8e80941Smrg
210b8e80941Smrg   feState.bEnableCutIndex = info->primitive_restart;
211b8e80941Smrg   ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);
212b8e80941Smrg
213b8e80941Smrg   if (info->index_size)
214b8e80941Smrg      ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
215b8e80941Smrg                                          swr_convert_prim_topology(info->mode),
216b8e80941Smrg                                          info->count,
217b8e80941Smrg                                          info->instance_count,
218b8e80941Smrg                                          info->start,
219b8e80941Smrg                                          info->index_bias,
220b8e80941Smrg                                          info->start_instance);
221b8e80941Smrg   else
222b8e80941Smrg      ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
223b8e80941Smrg                                   swr_convert_prim_topology(info->mode),
224b8e80941Smrg                                   info->count,
225b8e80941Smrg                                   info->instance_count,
226b8e80941Smrg                                   info->start,
227b8e80941Smrg                                   info->start_instance);
228b8e80941Smrg
229b8e80941Smrg   /* On large client-buffer draw, we used client buffer directly, without
230b8e80941Smrg    * copy.  Block until draw is finished.
231b8e80941Smrg    * VMD is an example application that benefits from this. */
232b8e80941Smrg   if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
233b8e80941Smrg      struct swr_screen *screen = swr_screen(pipe->screen);
234b8e80941Smrg      swr_fence_submit(ctx, screen->flush_fence);
235b8e80941Smrg      swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
236b8e80941Smrg   }
237b8e80941Smrg}
238b8e80941Smrg
239b8e80941Smrg
240b8e80941Smrgstatic void
241b8e80941Smrgswr_flush(struct pipe_context *pipe,
242b8e80941Smrg          struct pipe_fence_handle **fence,
243b8e80941Smrg          unsigned flags)
244b8e80941Smrg{
245b8e80941Smrg   struct swr_context *ctx = swr_context(pipe);
246b8e80941Smrg   struct swr_screen *screen = swr_screen(pipe->screen);
247b8e80941Smrg
248b8e80941Smrg   for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {
249b8e80941Smrg      struct pipe_surface *cb = ctx->framebuffer.cbufs[i];
250b8e80941Smrg      if (cb) {
251b8e80941Smrg         swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);
252b8e80941Smrg      }
253b8e80941Smrg   }
254b8e80941Smrg   if (ctx->framebuffer.zsbuf) {
255b8e80941Smrg      swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,
256b8e80941Smrg                               SWR_TILE_RESOLVED);
257b8e80941Smrg   }
258b8e80941Smrg
259b8e80941Smrg   if (fence)
260b8e80941Smrg      swr_fence_reference(pipe->screen, fence, screen->flush_fence);
261b8e80941Smrg}
262b8e80941Smrg
263b8e80941Smrgvoid
264b8e80941Smrgswr_finish(struct pipe_context *pipe)
265b8e80941Smrg{
266b8e80941Smrg   struct pipe_fence_handle *fence = nullptr;
267b8e80941Smrg
268b8e80941Smrg   swr_flush(pipe, &fence, 0);
269b8e80941Smrg   swr_fence_finish(pipe->screen, NULL, fence, 0);
270b8e80941Smrg   swr_fence_reference(pipe->screen, &fence, NULL);
271b8e80941Smrg}
272b8e80941Smrg
273b8e80941Smrg/*
274b8e80941Smrg * Invalidate tiles so they can be reloaded back when needed
275b8e80941Smrg */
276b8e80941Smrgvoid
277b8e80941Smrgswr_invalidate_render_target(struct pipe_context *pipe,
278b8e80941Smrg                             uint32_t attachment,
279b8e80941Smrg                             uint16_t width, uint16_t height)
280b8e80941Smrg{
281b8e80941Smrg   struct swr_context *ctx = swr_context(pipe);
282b8e80941Smrg
283b8e80941Smrg   /* grab the rect from the passed in arguments */
284b8e80941Smrg   swr_update_draw_context(ctx);
285b8e80941Smrg   SWR_RECT full_rect =
286b8e80941Smrg      {0, 0, (int32_t)width, (int32_t)height};
287b8e80941Smrg   ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,
288b8e80941Smrg                                  1 << attachment,
289b8e80941Smrg                                  full_rect);
290b8e80941Smrg}
291b8e80941Smrg
292b8e80941Smrg
293b8e80941Smrg/*
294b8e80941Smrg * Store SWR HotTiles back to renderTarget surface.
295b8e80941Smrg */
296b8e80941Smrgvoid
297b8e80941Smrgswr_store_render_target(struct pipe_context *pipe,
298b8e80941Smrg                        uint32_t attachment,
299b8e80941Smrg                        enum SWR_TILE_STATE post_tile_state)
300b8e80941Smrg{
301b8e80941Smrg   struct swr_context *ctx = swr_context(pipe);
302b8e80941Smrg   struct swr_draw_context *pDC = &ctx->swrDC;
303b8e80941Smrg   struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];
304b8e80941Smrg
305b8e80941Smrg   /* Only proceed if there's a valid surface to store to */
306b8e80941Smrg   if (renderTarget->xpBaseAddress) {
307b8e80941Smrg      swr_update_draw_context(ctx);
308b8e80941Smrg      SWR_RECT full_rect =
309b8e80941Smrg         {0, 0,
310b8e80941Smrg          (int32_t)u_minify(renderTarget->width, renderTarget->lod),
311b8e80941Smrg          (int32_t)u_minify(renderTarget->height, renderTarget->lod)};
312b8e80941Smrg      ctx->api.pfnSwrStoreTiles(ctx->swrContext,
313b8e80941Smrg                                1 << attachment,
314b8e80941Smrg                                post_tile_state,
315b8e80941Smrg                                full_rect);
316b8e80941Smrg   }
317b8e80941Smrg}
318b8e80941Smrg
319b8e80941Smrgvoid
320b8e80941Smrgswr_store_dirty_resource(struct pipe_context *pipe,
321b8e80941Smrg                         struct pipe_resource *resource,
322b8e80941Smrg                         enum SWR_TILE_STATE post_tile_state)
323b8e80941Smrg{
324b8e80941Smrg   /* Only store resource if it has been written to */
325b8e80941Smrg   if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {
326b8e80941Smrg      struct swr_context *ctx = swr_context(pipe);
327b8e80941Smrg      struct swr_screen *screen = swr_screen(pipe->screen);
328b8e80941Smrg      struct swr_resource *spr = swr_resource(resource);
329b8e80941Smrg
330b8e80941Smrg      swr_draw_context *pDC = &ctx->swrDC;
331b8e80941Smrg      SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;
332b8e80941Smrg      for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)
333b8e80941Smrg         if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||
334b8e80941Smrg             (spr->secondary.xpBaseAddress &&
335b8e80941Smrg              renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {
336b8e80941Smrg            swr_store_render_target(pipe, i, post_tile_state);
337b8e80941Smrg
338b8e80941Smrg            /* Mesa thinks depth/stencil are fused, so we'll never get an
339b8e80941Smrg             * explicit resource for stencil.  So, if checking depth, then
340b8e80941Smrg             * also check for stencil. */
341b8e80941Smrg            if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {
342b8e80941Smrg               swr_store_render_target(
343b8e80941Smrg                  pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);
344b8e80941Smrg            }
345b8e80941Smrg
346b8e80941Smrg            /* This fence signals StoreTiles completion */
347b8e80941Smrg            swr_fence_submit(ctx, screen->flush_fence);
348b8e80941Smrg
349b8e80941Smrg            break;
350b8e80941Smrg         }
351b8e80941Smrg   }
352b8e80941Smrg}
353b8e80941Smrg
354b8e80941Smrgvoid
355b8e80941Smrgswr_draw_init(struct pipe_context *pipe)
356b8e80941Smrg{
357b8e80941Smrg   pipe->draw_vbo = swr_draw_vbo;
358b8e80941Smrg   pipe->flush = swr_flush;
359b8e80941Smrg}
360