swr_draw.cpp revision 7ec681f3
1/**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24#include "swr_screen.h" 25#include "swr_context.h" 26#include "swr_resource.h" 27#include "swr_fence.h" 28#include "swr_query.h" 29#include "jit_api.h" 30 31#include "util/u_draw.h" 32#include "util/u_prim.h" 33 34#include <algorithm> 35#include <iostream> 36/* 37 * Draw vertex arrays, with optional indexing, optional instancing. 38 */ 39static void 40swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, 41 unsigned drawid_offset, 42 const struct pipe_draw_indirect_info *indirect, 43 const struct pipe_draw_start_count_bias *draws, 44 unsigned num_draws) 45{ 46 if (num_draws > 1) { 47 struct pipe_draw_info tmp_info = *info; 48 unsigned drawid = drawid_offset; 49 50 for (unsigned i = 0; i < num_draws; i++) { 51 swr_draw_vbo(pipe, &tmp_info, drawid, indirect, &draws[i], 1); 52 if (tmp_info.increment_draw_id) 53 drawid++; 54 } 55 return; 56 } 57 58 if (!indirect && (!draws[0].count || !info->instance_count)) 59 return; 60 61 struct swr_context *ctx = swr_context(pipe); 62 63 if (!indirect && 64 !info->primitive_restart && 65 !u_trim_pipe_prim((enum pipe_prim_type)info->mode, (unsigned*)&draws[0].count)) 66 return; 67 68 if (!swr_check_render_cond(pipe)) 69 return; 70 71 if (indirect && indirect->buffer) { 72 util_draw_indirect(pipe, info, indirect); 73 return; 74 } 75 76 /* If indexed draw, force vertex validation since index buffer comes 77 * from draw info. */ 78 if (info->index_size) 79 ctx->dirty |= SWR_NEW_VERTEX; 80 81 /* Update derived state, pass draw info to update function. */ 82 swr_update_derived(pipe, info, draws); 83 84 swr_update_draw_context(ctx); 85 86 struct pipe_draw_info resolved_info; 87 struct pipe_draw_start_count_bias resolved_draw; 88 /* DrawTransformFeedback */ 89 if (indirect && indirect->count_from_stream_output) { 90 // trick copied from softpipe to modify const struct *info 91 memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info)); 92 resolved_draw.start = draws[0].start; 93 resolved_draw.count = ctx->so_primCounter * ctx->patch_vertices; 94 resolved_info.max_index = resolved_draw.count - 1; 95 info = &resolved_info; 96 indirect = NULL; 97 draws = &resolved_draw; 98 } 99 100 if (ctx->vs->pipe.stream_output.num_outputs) { 101 if (!ctx->vs->soFunc[info->mode]) { 102 STREAMOUT_COMPILE_STATE state = {0}; 103 struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; 104 105 state.numVertsPerPrim = u_vertices_per_prim((enum pipe_prim_type)info->mode); 106 107 uint32_t offsets[MAX_SO_STREAMS] = {0}; 108 uint32_t num = 0; 109 110 for (uint32_t i = 0; i < so->num_outputs; i++) { 111 assert(so->output[i].stream == 0); // @todo 112 uint32_t output_buffer = so->output[i].output_buffer; 113 if (so->output[i].dst_offset != offsets[output_buffer]) { 114 // hole - need to fill 115 state.stream.decl[num].bufferIndex = output_buffer; 116 state.stream.decl[num].hole = true; 117 state.stream.decl[num].componentMask = 118 (1 << (so->output[i].dst_offset - offsets[output_buffer])) 119 - 1; 120 num++; 121 offsets[output_buffer] = so->output[i].dst_offset; 122 } 123 124 unsigned attrib_slot = so->output[i].register_index; 125 attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs); 126 127 state.stream.decl[num].bufferIndex = output_buffer; 128 state.stream.decl[num].attribSlot = attrib_slot; 129 state.stream.decl[num].componentMask = 130 ((1 << so->output[i].num_components) - 1) 131 << so->output[i].start_component; 132 state.stream.decl[num].hole = false; 133 num++; 134 135 offsets[output_buffer] += so->output[i].num_components; 136 } 137 138 state.stream.numDecls = num; 139 140 HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr; 141 ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state); 142 debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]); 143 assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL"); 144 } 145 146 ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0); 147 } 148 149 struct swr_vertex_element_state *velems = ctx->velems; 150 if (info->primitive_restart) 151 velems->fsState.cutIndex = info->restart_index; 152 else 153 velems->fsState.cutIndex = 0; 154 velems->fsState.bEnableCutIndex = info->primitive_restart; 155 velems->fsState.bPartialVertexBuffer = (info->index_bounds_valid && info->min_index > 0); 156 157 swr_jit_fetch_key key; 158 swr_generate_fetch_key(key, velems); 159 auto search = velems->map.find(key); 160 if (search != velems->map.end()) { 161 velems->fsFunc = search->second; 162 } else { 163 HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; 164 velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState); 165 166 debug_printf("fetch shader %p\n", velems->fsFunc); 167 assert(velems->fsFunc && "Error: FetchShader = NULL"); 168 169 velems->map.insert(std::make_pair(key, velems->fsFunc)); 170 } 171 172 ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc); 173 174 /* Set up frontend state 175 * XXX setup provokingVertex & topologyProvokingVertex */ 176 SWR_FRONTEND_STATE feState = {0}; 177 178 // feState.vsVertexSize seeds the PA size that is used as an interface 179 // between all the shader stages, so it has to be large enough to 180 // incorporate all interfaces between stages 181 182 // max of frontend shaders num_outputs 183 feState.vsVertexSize = ctx->vs->info.base.num_outputs; 184 if (ctx->gs) { 185 feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->gs->info.base.num_outputs); 186 } 187 if (ctx->tcs) { 188 feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tcs->info.base.num_outputs); 189 } 190 if (ctx->tes) { 191 feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tes->info.base.num_outputs); 192 } 193 194 195 if (ctx->vs->info.base.num_outputs) { 196 // gs does not adjust for position in SGV slot at input from vs 197 if (!ctx->gs && !ctx->tcs && !ctx->tes) 198 feState.vsVertexSize--; 199 } 200 201 // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT 202 feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT; 203 204 // The PA in the clipper does not handle BE vertex sizes 205 // different from FE. Increase vertexsize only for the cases that needed it 206 207 // primid needs a slot 208 if (ctx->fs->info.base.uses_primid) 209 feState.vsVertexSize++; 210 // sprite coord enable 211 if (ctx->rasterizer->sprite_coord_enable) 212 feState.vsVertexSize++; 213 214 if (ctx->rasterizer->flatshade_first) { 215 feState.provokingVertex = {1, 0, 0}; 216 } else { 217 feState.provokingVertex = {2, 1, 2}; 218 } 219 220 enum pipe_prim_type topology; 221 if (ctx->gs) 222 topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 223 else 224 topology = (enum pipe_prim_type)info->mode; 225 226 switch (topology) { 227 case PIPE_PRIM_TRIANGLE_FAN: 228 feState.topologyProvokingVertex = feState.provokingVertex.triFan; 229 break; 230 case PIPE_PRIM_TRIANGLE_STRIP: 231 case PIPE_PRIM_TRIANGLES: 232 feState.topologyProvokingVertex = feState.provokingVertex.triStripList; 233 break; 234 case PIPE_PRIM_QUAD_STRIP: 235 case PIPE_PRIM_QUADS: 236 if (ctx->rasterizer->flatshade_first) 237 feState.topologyProvokingVertex = 0; 238 else 239 feState.topologyProvokingVertex = 3; 240 break; 241 case PIPE_PRIM_LINES: 242 case PIPE_PRIM_LINE_LOOP: 243 case PIPE_PRIM_LINE_STRIP: 244 feState.topologyProvokingVertex = feState.provokingVertex.lineStripList; 245 break; 246 default: 247 feState.topologyProvokingVertex = 0; 248 } 249 250 feState.bEnableCutIndex = info->primitive_restart; 251 ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState); 252 253 if (info->index_size) 254 ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext, 255 swr_convert_prim_topology(info->mode, ctx->patch_vertices), 256 draws[0].count, 257 info->instance_count, 258 draws[0].start, 259 draws->index_bias, 260 info->start_instance); 261 else 262 ctx->api.pfnSwrDrawInstanced(ctx->swrContext, 263 swr_convert_prim_topology(info->mode, ctx->patch_vertices), 264 draws[0].count, 265 info->instance_count, 266 draws[0].start, 267 info->start_instance); 268 269 /* On client-buffer draw, we used client buffer directly, without 270 * copy. Block until draw is finished. 271 * VMD is an example application that benefits from this. */ 272 if (ctx->dirty & SWR_BLOCK_CLIENT_DRAW) { 273 struct swr_screen *screen = swr_screen(pipe->screen); 274 swr_fence_submit(ctx, screen->flush_fence); 275 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); 276 } 277} 278 279 280static void 281swr_flush(struct pipe_context *pipe, 282 struct pipe_fence_handle **fence, 283 unsigned flags) 284{ 285 struct swr_context *ctx = swr_context(pipe); 286 struct swr_screen *screen = swr_screen(pipe->screen); 287 288 for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) { 289 struct pipe_surface *cb = ctx->framebuffer.cbufs[i]; 290 if (cb) { 291 swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED); 292 } 293 } 294 if (ctx->framebuffer.zsbuf) { 295 swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture, 296 SWR_TILE_RESOLVED); 297 } 298 299 if (fence) 300 swr_fence_reference(pipe->screen, fence, screen->flush_fence); 301} 302 303void 304swr_finish(struct pipe_context *pipe) 305{ 306 struct pipe_fence_handle *fence = nullptr; 307 308 swr_flush(pipe, &fence, 0); 309 swr_fence_finish(pipe->screen, NULL, fence, 0); 310 swr_fence_reference(pipe->screen, &fence, NULL); 311} 312 313/* 314 * Invalidate tiles so they can be reloaded back when needed 315 */ 316void 317swr_invalidate_render_target(struct pipe_context *pipe, 318 uint32_t attachment, 319 uint16_t width, uint16_t height) 320{ 321 struct swr_context *ctx = swr_context(pipe); 322 323 /* grab the rect from the passed in arguments */ 324 swr_update_draw_context(ctx); 325 SWR_RECT full_rect = 326 {0, 0, (int32_t)width, (int32_t)height}; 327 ctx->api.pfnSwrInvalidateTiles(ctx->swrContext, 328 1 << attachment, 329 full_rect); 330} 331 332 333/* 334 * Store SWR HotTiles back to renderTarget surface. 335 */ 336void 337swr_store_render_target(struct pipe_context *pipe, 338 uint32_t attachment, 339 enum SWR_TILE_STATE post_tile_state) 340{ 341 struct swr_context *ctx = swr_context(pipe); 342 struct swr_draw_context *pDC = &ctx->swrDC; 343 struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment]; 344 345 /* Only proceed if there's a valid surface to store to */ 346 if (renderTarget->xpBaseAddress) { 347 swr_update_draw_context(ctx); 348 SWR_RECT full_rect = 349 {0, 0, 350 (int32_t)u_minify(renderTarget->width, renderTarget->lod), 351 (int32_t)u_minify(renderTarget->height, renderTarget->lod)}; 352 ctx->api.pfnSwrStoreTiles(ctx->swrContext, 353 1 << attachment, 354 post_tile_state, 355 full_rect); 356 } 357} 358 359void 360swr_store_dirty_resource(struct pipe_context *pipe, 361 struct pipe_resource *resource, 362 enum SWR_TILE_STATE post_tile_state) 363{ 364 /* Only store resource if it has been written to */ 365 if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) { 366 struct swr_context *ctx = swr_context(pipe); 367 struct swr_screen *screen = swr_screen(pipe->screen); 368 struct swr_resource *spr = swr_resource(resource); 369 370 swr_draw_context *pDC = &ctx->swrDC; 371 SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; 372 for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) 373 if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress || 374 (spr->secondary.xpBaseAddress && 375 renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) { 376 swr_store_render_target(pipe, i, post_tile_state); 377 378 /* Mesa thinks depth/stencil are fused, so we'll never get an 379 * explicit resource for stencil. So, if checking depth, then 380 * also check for stencil. */ 381 if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) { 382 swr_store_render_target( 383 pipe, SWR_ATTACHMENT_STENCIL, post_tile_state); 384 } 385 386 /* This fence signals StoreTiles completion */ 387 swr_fence_submit(ctx, screen->flush_fence); 388 389 break; 390 } 391 } 392} 393 394void 395swr_draw_init(struct pipe_context *pipe) 396{ 397 pipe->draw_vbo = swr_draw_vbo; 398 pipe->flush = swr_flush; 399} 400