1/********************************************************** 2 * Copyright 2014 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26#include "util/u_memory.h" 27#include "util/u_bitmask.h" 28 29#include "svga_cmd.h" 30#include "svga_context.h" 31#include "svga_resource_buffer.h" 32#include "svga_shader.h" 33#include "svga_debug.h" 34#include "svga_streamout.h" 35 36struct svga_stream_output_target { 37 struct pipe_stream_output_target base; 38}; 39 40/** cast wrapper */ 41static inline struct svga_stream_output_target * 42svga_stream_output_target(struct pipe_stream_output_target *s) 43{ 44 return (struct svga_stream_output_target *)s; 45} 46 47 48/** 49 * A helper function to send different version of the DefineStreamOutput command 50 * depending on if device is SM5 capable or not. 51 */ 52static enum pipe_error 53svga_define_stream_output(struct svga_context *svga, 54 SVGA3dStreamOutputId soid, 55 uint32 numOutputStreamEntries, 56 uint32 numOutputStreamStrides, 57 uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS], 58 const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS], 59 uint32 rasterizedStream, 60 struct svga_stream_output *streamout) 61{ 62 unsigned i; 63 64 SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __FUNCTION__, soid); 65 SVGA_DBG(DEBUG_STREAMOUT, 66 "numOutputStreamEntires=%d\n", numOutputStreamEntries); 67 68 for (i = 0; i < numOutputStreamEntries; i++) { 69 SVGA_DBG(DEBUG_STREAMOUT, 70 " %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n", 71 i, decls[i].outputSlot, decls[i].registerIndex, 72 decls[i].registerMask, decls[i].stream); 73 } 74 75 SVGA_DBG(DEBUG_STREAMOUT, 76 "numOutputStreamStrides=%d\n", numOutputStreamStrides); 77 for (i = 0; i < numOutputStreamStrides; i++) { 78 SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]); 79 } 80 SVGA_DBG(DEBUG_STREAMOUT, "\n"); 81 82 if (svga_have_sm5(svga) && 83 (numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS || 84 numOutputStreamStrides > 1)) { 85 unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry) 86 * numOutputStreamEntries; 87 struct svga_winsys_buffer *declBuf; 88 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 89 void *map; 90 91 declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED, 92 bufSize); 93 if (!declBuf) 94 return PIPE_ERROR; 95 map = sws->buffer_map(sws, declBuf, PIPE_MAP_WRITE); 96 if (!map) { 97 sws->buffer_destroy(sws, declBuf); 98 return PIPE_ERROR; 99 } 100 101 /* copy decls to buffer */ 102 memcpy(map, decls, bufSize); 103 104 /* unmap buffer */ 105 sws->buffer_unmap(sws, declBuf); 106 streamout->declBuf = declBuf; 107 108 SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput 109 (svga->swc, soid, 110 numOutputStreamEntries, 111 numOutputStreamStrides, 112 streamStrides, 113 streamout->declBuf, 114 rasterizedStream, 115 bufSize)); 116 } else { 117 SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid, 118 numOutputStreamEntries, 119 streamStrides, 120 decls)); 121 } 122 123 return PIPE_OK; 124} 125 126 127/** 128 * Creates stream output from the stream output info. 129 */ 130struct svga_stream_output * 131svga_create_stream_output(struct svga_context *svga, 132 struct svga_shader *shader, 133 const struct pipe_stream_output_info *info) 134{ 135 struct svga_stream_output *streamout; 136 SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS]; 137 unsigned strides[SVGA3D_DX_MAX_SOTARGETS]; 138 unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS]; 139 unsigned numStreamStrides = 0; 140 unsigned numDecls; 141 unsigned i; 142 enum pipe_error ret; 143 unsigned id; 144 ASSERTED unsigned maxDecls; 145 146 assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS); 147 148 /* Gallium utility creates shaders with stream output. 149 * For non-DX10, just return NULL. 150 */ 151 if (!svga_have_vgpu10(svga)) 152 return NULL; 153 154 if (svga_have_sm5(svga)) 155 maxDecls = SVGA3D_MAX_STREAMOUT_DECLS; 156 else if (svga_have_vgpu10(svga)) 157 maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS; 158 159 assert(info->num_outputs <= maxDecls); 160 161 /* Allocate an integer ID for the stream output */ 162 id = util_bitmask_add(svga->stream_output_id_bm); 163 if (id == UTIL_BITMASK_INVALID_INDEX) { 164 return NULL; 165 } 166 167 /* Allocate the streamout data structure */ 168 streamout = CALLOC_STRUCT(svga_stream_output); 169 170 if (!streamout) 171 return NULL; 172 173 streamout->info = *info; 174 streamout->id = id; 175 streamout->pos_out_index = -1; 176 streamout->streammask = 0; 177 178 /* Init whole decls and stride arrays to zero to avoid garbage values */ 179 memset(decls, 0, sizeof(decls)); 180 memset(strides, 0, sizeof(strides)); 181 memset(dstOffset, 0, sizeof(dstOffset)); 182 183 SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs=%d\n", 184 __FUNCTION__, info->num_outputs); 185 186 for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) { 187 unsigned reg_idx = info->output[i].register_index; 188 unsigned buf_idx = info->output[i].output_buffer; 189 const enum tgsi_semantic sem_name = 190 shader->info.output_semantic_name[reg_idx]; 191 192 assert(buf_idx <= PIPE_MAX_SO_BUFFERS); 193 194 numStreamStrides = MAX2(numStreamStrides, buf_idx); 195 196 SVGA_DBG(DEBUG_STREAMOUT, 197 " %d: register_index=%d output_buffer=%d stream=%d\n", 198 i, reg_idx, buf_idx, info->output[i].stream); 199 200 SVGA_DBG(DEBUG_STREAMOUT, 201 " dst_offset=%d start_component=%d num_components=%d\n", 202 info->output[i].dst_offset, 203 info->output[i].start_component, 204 info->output[i].num_components); 205 206 streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4); 207 208 /** 209 * Check if the destination offset of the current output 210 * is at the expected offset. If it is greater, then that means 211 * there is a gap in the stream output. We need to insert 212 * extra declaration entries with an invalid register index 213 * to specify a gap. 214 */ 215 while (info->output[i].dst_offset > dstOffset[buf_idx]) { 216 217 unsigned numComponents = info->output[i].dst_offset - 218 dstOffset[buf_idx];; 219 220 assert(svga_have_sm5(svga)); 221 222 /* We can only specify at most 4 components to skip in each 223 * declaration entry. 224 */ 225 numComponents = numComponents > 4 ? 4 : numComponents; 226 227 decls[numDecls].outputSlot = buf_idx, 228 decls[numDecls].stream = info->output[i].stream; 229 decls[numDecls].registerIndex = SVGA3D_INVALID_ID; 230 decls[numDecls].registerMask = (1 << numComponents) - 1; 231 232 dstOffset[buf_idx] += numComponents; 233 numDecls++; 234 } 235 236 if (sem_name == TGSI_SEMANTIC_POSITION) { 237 /** 238 * Check if streaming out POSITION. If so, replace the 239 * register index with the index for NON_ADJUSTED POSITION. 240 */ 241 decls[numDecls].registerIndex = shader->info.num_outputs; 242 243 /* Save this output index, so we can tell later if this stream output 244 * includes an output of a vertex position 245 */ 246 streamout->pos_out_index = numDecls; 247 } 248 else if (sem_name == TGSI_SEMANTIC_CLIPDIST) { 249 /** 250 * Use the shadow copy for clip distance because 251 * CLIPDIST instruction is only emitted for enabled clip planes. 252 * It's valid to write to ClipDistance variable for non-enabled 253 * clip planes. 254 */ 255 decls[numDecls].registerIndex = 256 shader->info.num_outputs + 1 + 257 shader->info.output_semantic_index[reg_idx]; 258 } 259 else { 260 decls[numDecls].registerIndex = reg_idx; 261 } 262 263 decls[numDecls].outputSlot = buf_idx; 264 decls[numDecls].registerMask = 265 ((1 << info->output[i].num_components) - 1) 266 << info->output[i].start_component; 267 268 decls[numDecls].stream = info->output[i].stream; 269 assert(decls[numDecls].stream == 0 || svga_have_sm5(svga)); 270 271 /* Set the bit in streammask for the enabled stream */ 272 streamout->streammask |= 1 << info->output[i].stream; 273 274 /* Update the expected offset for the next output */ 275 dstOffset[buf_idx] += info->output[i].num_components; 276 277 strides[buf_idx] = info->stride[buf_idx] * sizeof(float); 278 } 279 280 assert(numDecls <= maxDecls); 281 282 /* Send the DefineStreamOutput command. 283 * Note, rasterizedStream is always 0. 284 */ 285 ret = svga_define_stream_output(svga, id, 286 numDecls, numStreamStrides+1, 287 strides, decls, 0, streamout); 288 289 if (ret != PIPE_OK) { 290 util_bitmask_clear(svga->stream_output_id_bm, id); 291 FREE(streamout); 292 streamout = NULL; 293 } 294 return streamout; 295} 296 297 298enum pipe_error 299svga_set_stream_output(struct svga_context *svga, 300 struct svga_stream_output *streamout) 301{ 302 unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID; 303 304 if (!svga_have_vgpu10(svga)) { 305 return PIPE_OK; 306 } 307 308 SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__, 309 streamout, id); 310 311 if (svga->current_so != streamout) { 312 313 /* Before unbinding the current stream output, stop the stream output 314 * statistics queries for the active streams. 315 */ 316 if (svga_have_sm5(svga) && svga->current_so) { 317 svga->vcount_buffer_stream = svga->current_so->buffer_stream; 318 svga_end_stream_output_queries(svga, svga->current_so->streammask); 319 } 320 321 enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id); 322 if (ret != PIPE_OK) { 323 return ret; 324 } 325 326 svga->current_so = streamout; 327 328 /* After binding the new stream output, start the stream output 329 * statistics queries for the active streams. 330 */ 331 if (svga_have_sm5(svga) && svga->current_so) { 332 svga_begin_stream_output_queries(svga, svga->current_so->streammask); 333 } 334 } 335 336 return PIPE_OK; 337} 338 339void 340svga_delete_stream_output(struct svga_context *svga, 341 struct svga_stream_output *streamout) 342{ 343 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 344 345 SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout); 346 347 assert(svga_have_vgpu10(svga)); 348 assert(streamout != NULL); 349 350 SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, 351 streamout->id)); 352 353 if (svga_have_sm5(svga) && streamout->declBuf) { 354 sws->buffer_destroy(sws, streamout->declBuf); 355 } 356 357 /* Before deleting the current streamout, make sure to stop any pending 358 * SO queries. 359 */ 360 if (svga->current_so == streamout) { 361 if (svga->in_streamout) 362 svga_end_stream_output_queries(svga, svga->current_so->streammask); 363 svga->current_so = NULL; 364 } 365 366 /* Release the ID */ 367 util_bitmask_clear(svga->stream_output_id_bm, streamout->id); 368 369 /* Free streamout structure */ 370 FREE(streamout); 371} 372 373 374static struct pipe_stream_output_target * 375svga_create_stream_output_target(struct pipe_context *pipe, 376 struct pipe_resource *buffer, 377 unsigned buffer_offset, 378 unsigned buffer_size) 379{ 380 struct svga_context *svga = svga_context(pipe); 381 struct svga_stream_output_target *sot; 382 383 SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__, 384 buffer_offset, buffer_size); 385 386 assert(svga_have_vgpu10(svga)); 387 (void) svga; 388 389 sot = CALLOC_STRUCT(svga_stream_output_target); 390 if (!sot) 391 return NULL; 392 393 pipe_reference_init(&sot->base.reference, 1); 394 pipe_resource_reference(&sot->base.buffer, buffer); 395 sot->base.context = pipe; 396 sot->base.buffer = buffer; 397 sot->base.buffer_offset = buffer_offset; 398 sot->base.buffer_size = buffer_size; 399 400 return &sot->base; 401} 402 403static void 404svga_destroy_stream_output_target(struct pipe_context *pipe, 405 struct pipe_stream_output_target *target) 406{ 407 struct svga_stream_output_target *sot = svga_stream_output_target(target); 408 409 SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__); 410 411 pipe_resource_reference(&sot->base.buffer, NULL); 412 FREE(sot); 413} 414 415static void 416svga_set_stream_output_targets(struct pipe_context *pipe, 417 unsigned num_targets, 418 struct pipe_stream_output_target **targets, 419 const unsigned *offsets) 420{ 421 struct svga_context *svga = svga_context(pipe); 422 struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS]; 423 unsigned i; 424 unsigned num_so_targets; 425 boolean begin_so_queries = num_targets > 0; 426 427 SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__, 428 num_targets); 429 430 assert(svga_have_vgpu10(svga)); 431 432 /* Mark the streamout buffers as dirty so that we'll issue readbacks 433 * before mapping. 434 */ 435 for (i = 0; i < svga->num_so_targets; i++) { 436 struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer); 437 sbuf->dirty = TRUE; 438 } 439 440 /* Before the currently bound streamout targets are unbound, 441 * save them in case they need to be referenced to retrieve the 442 * number of vertices being streamed out. 443 */ 444 for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) { 445 svga->vcount_so_targets[i] = svga->so_targets[i]; 446 } 447 448 assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS); 449 450 for (i = 0; i < num_targets; i++) { 451 struct svga_stream_output_target *sot 452 = svga_stream_output_target(targets[i]); 453 unsigned size; 454 455 svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer, 456 PIPE_BIND_STREAM_OUTPUT); 457 458 assert(svga_buffer(sot->base.buffer)->key.flags 459 & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); 460 461 svga->so_targets[i] = &sot->base; 462 if (offsets[i] == -1) { 463 soBindings[i].offset = -1; 464 465 /* The streamout is being resumed. There is no need to restart streamout statistics 466 * queries for the draw-auto fallback since those queries are still active. 467 */ 468 begin_so_queries = FALSE; 469 } 470 else 471 soBindings[i].offset = sot->base.buffer_offset + offsets[i]; 472 473 /* The size cannot extend beyond the end of the buffer. Clamp it. */ 474 size = MIN2(sot->base.buffer_size, 475 sot->base.buffer->width0 - sot->base.buffer_offset); 476 477 soBindings[i].sizeInBytes = size; 478 } 479 480 /* unbind any previously bound stream output buffers */ 481 for (; i < svga->num_so_targets; i++) { 482 svga->so_surfaces[i] = NULL; 483 svga->so_targets[i] = NULL; 484 } 485 486 num_so_targets = MAX2(svga->num_so_targets, num_targets); 487 SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, 488 soBindings, svga->so_surfaces)); 489 svga->num_so_targets = num_targets; 490 491 if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) { 492 493 /* If there are already active queries and we need to start a new streamout, 494 * we need to stop the current active queries first. 495 */ 496 if (svga->in_streamout) { 497 svga_end_stream_output_queries(svga, svga->current_so->streammask); 498 } 499 500 /* Start stream out statistics queries for the new streamout */ 501 svga_begin_stream_output_queries(svga, svga->current_so->streammask); 502 } 503} 504 505/** 506 * Rebind stream output target surfaces 507 */ 508enum pipe_error 509svga_rebind_stream_output_targets(struct svga_context *svga) 510{ 511 struct svga_winsys_context *swc = svga->swc; 512 enum pipe_error ret; 513 unsigned i; 514 515 for (i = 0; i < svga->num_so_targets; i++) { 516 ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE); 517 if (ret != PIPE_OK) 518 return ret; 519 } 520 521 return PIPE_OK; 522} 523 524 525void 526svga_init_stream_output_functions(struct svga_context *svga) 527{ 528 svga->pipe.create_stream_output_target = svga_create_stream_output_target; 529 svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target; 530 svga->pipe.set_stream_output_targets = svga_set_stream_output_targets; 531} 532 533 534/** 535 * A helper function to create stream output statistics queries for each stream. 536 * These queries are created as a workaround for DrawTransformFeedbackInstanced or 537 * DrawTransformFeedbackStreamInstanced when auto draw doesn't support 538 * instancing or non-0 stream. In this case, the vertex count will 539 * be retrieved from the stream output statistics query. 540 */ 541void 542svga_create_stream_output_queries(struct svga_context *svga) 543{ 544 unsigned i; 545 546 if (!svga_have_sm5(svga)) 547 return; 548 549 for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 550 svga->so_queries[i] = svga->pipe.create_query(&svga->pipe, 551 PIPE_QUERY_SO_STATISTICS, i); 552 assert(svga->so_queries[i] != NULL); 553 } 554} 555 556 557/** 558 * Destroy the stream output statistics queries for the draw-auto workaround. 559 */ 560void 561svga_destroy_stream_output_queries(struct svga_context *svga) 562{ 563 unsigned i; 564 565 if (!svga_have_sm5(svga)) 566 return; 567 568 for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 569 svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]); 570 } 571} 572 573 574/** 575 * Start stream output statistics queries for the active streams. 576 */ 577void 578svga_begin_stream_output_queries(struct svga_context *svga, 579 unsigned streammask) 580{ 581 assert(svga_have_sm5(svga)); 582 assert(!svga->in_streamout); 583 584 for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 585 bool ret; 586 if (streammask & (1 << i)) { 587 ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]); 588 } 589 (void) ret; 590 } 591 svga->in_streamout = TRUE; 592 593 return; 594} 595 596 597/** 598 * Stop stream output statistics queries for the active streams. 599 */ 600void 601svga_end_stream_output_queries(struct svga_context *svga, 602 unsigned streammask) 603{ 604 assert(svga_have_sm5(svga)); 605 606 if (!svga->in_streamout) 607 return; 608 609 for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 610 bool ret; 611 if (streammask & (1 << i)) { 612 ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]); 613 } 614 (void) ret; 615 } 616 svga->in_streamout = FALSE; 617 618 return; 619} 620 621 622/** 623 * Return the primitive count returned from the stream output statistics query 624 * for the specified stream. 625 */ 626unsigned 627svga_get_primcount_from_stream_output(struct svga_context *svga, 628 unsigned stream) 629{ 630 unsigned primcount = 0; 631 union pipe_query_result result; 632 bool ret; 633 634 if (svga->current_so) { 635 svga_end_stream_output_queries(svga, svga->current_so->streammask); 636 } 637 638 ret = svga->pipe.get_query_result(&svga->pipe, 639 svga->so_queries[stream], 640 TRUE, &result); 641 if (ret) 642 primcount = result.so_statistics.num_primitives_written; 643 644 return primcount; 645} 646