1b8e80941Smrg/**************************************************************************** 2b8e80941Smrg * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg ***************************************************************************/ 23b8e80941Smrg 24b8e80941Smrg#include "swr_screen.h" 25b8e80941Smrg#include "swr_context.h" 26b8e80941Smrg#include "swr_resource.h" 27b8e80941Smrg#include "swr_fence.h" 28b8e80941Smrg#include "swr_query.h" 29b8e80941Smrg#include "jit_api.h" 30b8e80941Smrg 31b8e80941Smrg#include "util/u_draw.h" 32b8e80941Smrg#include "util/u_prim.h" 33b8e80941Smrg 34b8e80941Smrg/* 35b8e80941Smrg * Draw vertex arrays, with optional indexing, optional instancing. 36b8e80941Smrg */ 37b8e80941Smrgstatic void 38b8e80941Smrgswr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) 39b8e80941Smrg{ 40b8e80941Smrg struct swr_context *ctx = swr_context(pipe); 41b8e80941Smrg 42b8e80941Smrg if (!info->count_from_stream_output && !info->indirect && 43b8e80941Smrg !info->primitive_restart && 44b8e80941Smrg !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) 45b8e80941Smrg return; 46b8e80941Smrg 47b8e80941Smrg if (!swr_check_render_cond(pipe)) 48b8e80941Smrg return; 49b8e80941Smrg 50b8e80941Smrg if (info->indirect) { 51b8e80941Smrg util_draw_indirect(pipe, info); 52b8e80941Smrg return; 53b8e80941Smrg } 54b8e80941Smrg 55b8e80941Smrg /* If indexed draw, force vertex validation since index buffer comes 56b8e80941Smrg * from draw info. */ 57b8e80941Smrg if (info->index_size) 58b8e80941Smrg ctx->dirty |= SWR_NEW_VERTEX; 59b8e80941Smrg 60b8e80941Smrg /* Update derived state, pass draw info to update function. */ 61b8e80941Smrg swr_update_derived(pipe, info); 62b8e80941Smrg 63b8e80941Smrg swr_update_draw_context(ctx); 64b8e80941Smrg 65b8e80941Smrg if (ctx->vs->pipe.stream_output.num_outputs) { 66b8e80941Smrg if (!ctx->vs->soFunc[info->mode]) { 67b8e80941Smrg STREAMOUT_COMPILE_STATE state = {0}; 68b8e80941Smrg struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; 69b8e80941Smrg 70b8e80941Smrg state.numVertsPerPrim = u_vertices_per_prim(info->mode); 71b8e80941Smrg 72b8e80941Smrg uint32_t offsets[MAX_SO_STREAMS] = {0}; 73b8e80941Smrg uint32_t num = 0; 74b8e80941Smrg 75b8e80941Smrg for (uint32_t i = 0; i < so->num_outputs; i++) { 76b8e80941Smrg assert(so->output[i].stream == 0); // @todo 77b8e80941Smrg uint32_t output_buffer = so->output[i].output_buffer; 78b8e80941Smrg if (so->output[i].dst_offset != offsets[output_buffer]) { 79b8e80941Smrg // hole - need to fill 80b8e80941Smrg state.stream.decl[num].bufferIndex = output_buffer; 81b8e80941Smrg state.stream.decl[num].hole = true; 82b8e80941Smrg state.stream.decl[num].componentMask = 83b8e80941Smrg (1 << (so->output[i].dst_offset - offsets[output_buffer])) 84b8e80941Smrg - 1; 85b8e80941Smrg num++; 86b8e80941Smrg offsets[output_buffer] = so->output[i].dst_offset; 87b8e80941Smrg } 88b8e80941Smrg 89b8e80941Smrg unsigned attrib_slot = so->output[i].register_index; 90b8e80941Smrg attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs); 91b8e80941Smrg 92b8e80941Smrg state.stream.decl[num].bufferIndex = output_buffer; 93b8e80941Smrg state.stream.decl[num].attribSlot = attrib_slot; 94b8e80941Smrg state.stream.decl[num].componentMask = 95b8e80941Smrg ((1 << so->output[i].num_components) - 1) 96b8e80941Smrg << so->output[i].start_component; 97b8e80941Smrg state.stream.decl[num].hole = false; 98b8e80941Smrg num++; 99b8e80941Smrg 100b8e80941Smrg offsets[output_buffer] += so->output[i].num_components; 101b8e80941Smrg } 102b8e80941Smrg 103b8e80941Smrg state.stream.numDecls = num; 104b8e80941Smrg 105b8e80941Smrg HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr; 106b8e80941Smrg ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state); 107b8e80941Smrg debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]); 108b8e80941Smrg assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL"); 109b8e80941Smrg } 110b8e80941Smrg 111b8e80941Smrg ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0); 112b8e80941Smrg } 113b8e80941Smrg 114b8e80941Smrg struct swr_vertex_element_state *velems = ctx->velems; 115b8e80941Smrg if (info->primitive_restart) 116b8e80941Smrg velems->fsState.cutIndex = info->restart_index; 117b8e80941Smrg else 118b8e80941Smrg velems->fsState.cutIndex = 0; 119b8e80941Smrg velems->fsState.bEnableCutIndex = info->primitive_restart; 120b8e80941Smrg velems->fsState.bPartialVertexBuffer = (info->min_index > 0); 121b8e80941Smrg 122b8e80941Smrg swr_jit_fetch_key key; 123b8e80941Smrg swr_generate_fetch_key(key, velems); 124b8e80941Smrg auto search = velems->map.find(key); 125b8e80941Smrg if (search != velems->map.end()) { 126b8e80941Smrg velems->fsFunc = search->second; 127b8e80941Smrg } else { 128b8e80941Smrg HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; 129b8e80941Smrg velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState); 130b8e80941Smrg 131b8e80941Smrg debug_printf("fetch shader %p\n", velems->fsFunc); 132b8e80941Smrg assert(velems->fsFunc && "Error: FetchShader = NULL"); 133b8e80941Smrg 134b8e80941Smrg velems->map.insert(std::make_pair(key, velems->fsFunc)); 135b8e80941Smrg } 136b8e80941Smrg 137b8e80941Smrg ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc); 138b8e80941Smrg 139b8e80941Smrg /* Set up frontend state 140b8e80941Smrg * XXX setup provokingVertex & topologyProvokingVertex */ 141b8e80941Smrg SWR_FRONTEND_STATE feState = {0}; 142b8e80941Smrg 143b8e80941Smrg // feState.vsVertexSize seeds the PA size that is used as an interface 144b8e80941Smrg // between all the shader stages, so it has to be large enough to 145b8e80941Smrg // incorporate all interfaces between stages 146b8e80941Smrg 147b8e80941Smrg // max of gs and vs num_outputs 148b8e80941Smrg feState.vsVertexSize = ctx->vs->info.base.num_outputs; 149b8e80941Smrg if (ctx->gs && 150b8e80941Smrg ctx->gs->info.base.num_outputs > feState.vsVertexSize) { 151b8e80941Smrg feState.vsVertexSize = ctx->gs->info.base.num_outputs; 152b8e80941Smrg } 153b8e80941Smrg 154b8e80941Smrg if (ctx->vs->info.base.num_outputs) { 155b8e80941Smrg // gs does not adjust for position in SGV slot at input from vs 156b8e80941Smrg if (!ctx->gs) 157b8e80941Smrg feState.vsVertexSize--; 158b8e80941Smrg } 159b8e80941Smrg 160b8e80941Smrg // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT 161b8e80941Smrg feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT; 162b8e80941Smrg 163b8e80941Smrg // The PA in the clipper does not handle BE vertex sizes 164b8e80941Smrg // different from FE. Increase vertexsize only for the cases that needed it 165b8e80941Smrg 166b8e80941Smrg // primid needs a slot 167b8e80941Smrg if (ctx->fs->info.base.uses_primid) 168b8e80941Smrg feState.vsVertexSize++; 169b8e80941Smrg // sprite coord enable 170b8e80941Smrg if (ctx->rasterizer->sprite_coord_enable) 171b8e80941Smrg feState.vsVertexSize++; 172b8e80941Smrg 173b8e80941Smrg 174b8e80941Smrg if (ctx->rasterizer->flatshade_first) { 175b8e80941Smrg feState.provokingVertex = {1, 0, 0}; 176b8e80941Smrg } else { 177b8e80941Smrg feState.provokingVertex = {2, 1, 2}; 178b8e80941Smrg } 179b8e80941Smrg 180b8e80941Smrg enum pipe_prim_type topology; 181b8e80941Smrg if (ctx->gs) 182b8e80941Smrg topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 183b8e80941Smrg else 184b8e80941Smrg topology = info->mode; 185b8e80941Smrg 186b8e80941Smrg switch (topology) { 187b8e80941Smrg case PIPE_PRIM_TRIANGLE_FAN: 188b8e80941Smrg feState.topologyProvokingVertex = feState.provokingVertex.triFan; 189b8e80941Smrg break; 190b8e80941Smrg case PIPE_PRIM_TRIANGLE_STRIP: 191b8e80941Smrg case PIPE_PRIM_TRIANGLES: 192b8e80941Smrg feState.topologyProvokingVertex = feState.provokingVertex.triStripList; 193b8e80941Smrg break; 194b8e80941Smrg case PIPE_PRIM_QUAD_STRIP: 195b8e80941Smrg case PIPE_PRIM_QUADS: 196b8e80941Smrg if (ctx->rasterizer->flatshade_first) 197b8e80941Smrg feState.topologyProvokingVertex = 0; 198b8e80941Smrg else 199b8e80941Smrg feState.topologyProvokingVertex = 3; 200b8e80941Smrg break; 201b8e80941Smrg case PIPE_PRIM_LINES: 202b8e80941Smrg case PIPE_PRIM_LINE_LOOP: 203b8e80941Smrg case PIPE_PRIM_LINE_STRIP: 204b8e80941Smrg feState.topologyProvokingVertex = feState.provokingVertex.lineStripList; 205b8e80941Smrg break; 206b8e80941Smrg default: 207b8e80941Smrg feState.topologyProvokingVertex = 0; 208b8e80941Smrg } 209b8e80941Smrg 210b8e80941Smrg feState.bEnableCutIndex = info->primitive_restart; 211b8e80941Smrg ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState); 212b8e80941Smrg 213b8e80941Smrg if (info->index_size) 214b8e80941Smrg ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext, 215b8e80941Smrg swr_convert_prim_topology(info->mode), 216b8e80941Smrg info->count, 217b8e80941Smrg info->instance_count, 218b8e80941Smrg info->start, 219b8e80941Smrg info->index_bias, 220b8e80941Smrg info->start_instance); 221b8e80941Smrg else 222b8e80941Smrg ctx->api.pfnSwrDrawInstanced(ctx->swrContext, 223b8e80941Smrg swr_convert_prim_topology(info->mode), 224b8e80941Smrg info->count, 225b8e80941Smrg info->instance_count, 226b8e80941Smrg info->start, 227b8e80941Smrg info->start_instance); 228b8e80941Smrg 229b8e80941Smrg /* On large client-buffer draw, we used client buffer directly, without 230b8e80941Smrg * copy. Block until draw is finished. 231b8e80941Smrg * VMD is an example application that benefits from this. */ 232b8e80941Smrg if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) { 233b8e80941Smrg struct swr_screen *screen = swr_screen(pipe->screen); 234b8e80941Smrg swr_fence_submit(ctx, screen->flush_fence); 235b8e80941Smrg swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); 236b8e80941Smrg } 237b8e80941Smrg} 238b8e80941Smrg 239b8e80941Smrg 240b8e80941Smrgstatic void 241b8e80941Smrgswr_flush(struct pipe_context *pipe, 242b8e80941Smrg struct pipe_fence_handle **fence, 243b8e80941Smrg unsigned flags) 244b8e80941Smrg{ 245b8e80941Smrg struct swr_context *ctx = swr_context(pipe); 246b8e80941Smrg struct swr_screen *screen = swr_screen(pipe->screen); 247b8e80941Smrg 248b8e80941Smrg for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) { 249b8e80941Smrg struct pipe_surface *cb = ctx->framebuffer.cbufs[i]; 250b8e80941Smrg if (cb) { 251b8e80941Smrg swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED); 252b8e80941Smrg } 253b8e80941Smrg } 254b8e80941Smrg if (ctx->framebuffer.zsbuf) { 255b8e80941Smrg swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture, 256b8e80941Smrg SWR_TILE_RESOLVED); 257b8e80941Smrg } 258b8e80941Smrg 259b8e80941Smrg if (fence) 260b8e80941Smrg swr_fence_reference(pipe->screen, fence, screen->flush_fence); 261b8e80941Smrg} 262b8e80941Smrg 263b8e80941Smrgvoid 264b8e80941Smrgswr_finish(struct pipe_context *pipe) 265b8e80941Smrg{ 266b8e80941Smrg struct pipe_fence_handle *fence = nullptr; 267b8e80941Smrg 268b8e80941Smrg swr_flush(pipe, &fence, 0); 269b8e80941Smrg swr_fence_finish(pipe->screen, NULL, fence, 0); 270b8e80941Smrg swr_fence_reference(pipe->screen, &fence, NULL); 271b8e80941Smrg} 272b8e80941Smrg 273b8e80941Smrg/* 274b8e80941Smrg * Invalidate tiles so they can be reloaded back when needed 275b8e80941Smrg */ 276b8e80941Smrgvoid 277b8e80941Smrgswr_invalidate_render_target(struct pipe_context *pipe, 278b8e80941Smrg uint32_t attachment, 279b8e80941Smrg uint16_t width, uint16_t height) 280b8e80941Smrg{ 281b8e80941Smrg struct swr_context *ctx = swr_context(pipe); 282b8e80941Smrg 283b8e80941Smrg /* grab the rect from the passed in arguments */ 284b8e80941Smrg swr_update_draw_context(ctx); 285b8e80941Smrg SWR_RECT full_rect = 286b8e80941Smrg {0, 0, (int32_t)width, (int32_t)height}; 287b8e80941Smrg ctx->api.pfnSwrInvalidateTiles(ctx->swrContext, 288b8e80941Smrg 1 << attachment, 289b8e80941Smrg full_rect); 290b8e80941Smrg} 291b8e80941Smrg 292b8e80941Smrg 293b8e80941Smrg/* 294b8e80941Smrg * Store SWR HotTiles back to renderTarget surface. 295b8e80941Smrg */ 296b8e80941Smrgvoid 297b8e80941Smrgswr_store_render_target(struct pipe_context *pipe, 298b8e80941Smrg uint32_t attachment, 299b8e80941Smrg enum SWR_TILE_STATE post_tile_state) 300b8e80941Smrg{ 301b8e80941Smrg struct swr_context *ctx = swr_context(pipe); 302b8e80941Smrg struct swr_draw_context *pDC = &ctx->swrDC; 303b8e80941Smrg struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment]; 304b8e80941Smrg 305b8e80941Smrg /* Only proceed if there's a valid surface to store to */ 306b8e80941Smrg if (renderTarget->xpBaseAddress) { 307b8e80941Smrg swr_update_draw_context(ctx); 308b8e80941Smrg SWR_RECT full_rect = 309b8e80941Smrg {0, 0, 310b8e80941Smrg (int32_t)u_minify(renderTarget->width, renderTarget->lod), 311b8e80941Smrg (int32_t)u_minify(renderTarget->height, renderTarget->lod)}; 312b8e80941Smrg ctx->api.pfnSwrStoreTiles(ctx->swrContext, 313b8e80941Smrg 1 << attachment, 314b8e80941Smrg post_tile_state, 315b8e80941Smrg full_rect); 316b8e80941Smrg } 317b8e80941Smrg} 318b8e80941Smrg 319b8e80941Smrgvoid 320b8e80941Smrgswr_store_dirty_resource(struct pipe_context *pipe, 321b8e80941Smrg struct pipe_resource *resource, 322b8e80941Smrg enum SWR_TILE_STATE post_tile_state) 323b8e80941Smrg{ 324b8e80941Smrg /* Only store resource if it has been written to */ 325b8e80941Smrg if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) { 326b8e80941Smrg struct swr_context *ctx = swr_context(pipe); 327b8e80941Smrg struct swr_screen *screen = swr_screen(pipe->screen); 328b8e80941Smrg struct swr_resource *spr = swr_resource(resource); 329b8e80941Smrg 330b8e80941Smrg swr_draw_context *pDC = &ctx->swrDC; 331b8e80941Smrg SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; 332b8e80941Smrg for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) 333b8e80941Smrg if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress || 334b8e80941Smrg (spr->secondary.xpBaseAddress && 335b8e80941Smrg renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) { 336b8e80941Smrg swr_store_render_target(pipe, i, post_tile_state); 337b8e80941Smrg 338b8e80941Smrg /* Mesa thinks depth/stencil are fused, so we'll never get an 339b8e80941Smrg * explicit resource for stencil. So, if checking depth, then 340b8e80941Smrg * also check for stencil. */ 341b8e80941Smrg if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) { 342b8e80941Smrg swr_store_render_target( 343b8e80941Smrg pipe, SWR_ATTACHMENT_STENCIL, post_tile_state); 344b8e80941Smrg } 345b8e80941Smrg 346b8e80941Smrg /* This fence signals StoreTiles completion */ 347b8e80941Smrg swr_fence_submit(ctx, screen->flush_fence); 348b8e80941Smrg 349b8e80941Smrg break; 350b8e80941Smrg } 351b8e80941Smrg } 352b8e80941Smrg} 353b8e80941Smrg 354b8e80941Smrgvoid 355b8e80941Smrgswr_draw_init(struct pipe_context *pipe) 356b8e80941Smrg{ 357b8e80941Smrg pipe->draw_vbo = swr_draw_vbo; 358b8e80941Smrg pipe->flush = swr_flush; 359b8e80941Smrg} 360