1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** \file gfx6_sol.c 25 * 26 * Code to initialize the binding table entries used by transform feedback. 27 */ 28 29#include "main/bufferobj.h" 30#include "main/macros.h" 31#include "brw_context.h" 32#include "brw_batch.h" 33#include "brw_defines.h" 34#include "brw_state.h" 35#include "main/transformfeedback.h" 36#include "util/u_memory.h" 37 38static void 39gfx6_update_sol_surfaces(struct brw_context *brw) 40{ 41 struct gl_context *ctx = &brw->ctx; 42 bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx); 43 struct gl_transform_feedback_object *xfb_obj; 44 const struct gl_transform_feedback_info *linked_xfb_info = NULL; 45 46 if (xfb_active) { 47 /* BRW_NEW_TRANSFORM_FEEDBACK */ 48 xfb_obj = ctx->TransformFeedback.CurrentObject; 49 linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback; 50 } 51 52 for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) { 53 const int surf_index = BRW_GFX6_SOL_BINDING_START + i; 54 if (xfb_active && i < linked_xfb_info->NumOutputs) { 55 unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer; 56 unsigned buffer_offset = 57 xfb_obj->Offset[buffer] / 4 + 58 linked_xfb_info->Outputs[i].DstOffset; 59 if (brw->programs[MESA_SHADER_GEOMETRY]) { 60 brw_update_sol_surface( 61 brw, xfb_obj->Buffers[buffer], 62 &brw->gs.base.surf_offset[surf_index], 63 linked_xfb_info->Outputs[i].NumComponents, 64 linked_xfb_info->Buffers[buffer].Stride, buffer_offset); 65 } else { 66 brw_update_sol_surface( 67 brw, xfb_obj->Buffers[buffer], 68 &brw->ff_gs.surf_offset[surf_index], 69 linked_xfb_info->Outputs[i].NumComponents, 70 linked_xfb_info->Buffers[buffer].Stride, buffer_offset); 71 } 72 } else { 73 if (!brw->programs[MESA_SHADER_GEOMETRY]) 74 brw->ff_gs.surf_offset[surf_index] = 0; 75 else 76 brw->gs.base.surf_offset[surf_index] = 0; 77 } 78 } 79 80 brw->ctx.NewDriverState |= BRW_NEW_SURFACES; 81} 82 83const struct brw_tracked_state gfx6_sol_surface = { 84 .dirty = { 85 .mesa = 0, 86 .brw = BRW_NEW_BATCH | 87 BRW_NEW_BLORP | 88 BRW_NEW_TRANSFORM_FEEDBACK, 89 }, 90 .emit = gfx6_update_sol_surfaces, 91}; 92 93/** 94 * Constructs the binding table for the WM surface state, which maps unit 95 * numbers to surface state objects. 96 */ 97static void 98brw_gs_upload_binding_table(struct brw_context *brw) 99{ 100 uint32_t *bind; 101 struct gl_context *ctx = &brw->ctx; 102 const struct gl_program *prog; 103 bool need_binding_table = false; 104 105 /* We have two scenarios here: 106 * 1) We are using a geometry shader only to implement transform feedback 107 * for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL). 108 * In this case, we only need surfaces for transform feedback in the 109 * GS stage. 110 * 2) We have a user-provided geometry shader. In this case we may need 111 * surfaces for transform feedback and/or other stuff, like textures, 112 * in the GS stage. 113 */ 114 115 if (!brw->programs[MESA_SHADER_GEOMETRY]) { 116 /* BRW_NEW_VERTEX_PROGRAM */ 117 prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; 118 if (prog) { 119 /* Skip making a binding table if we don't have anything to put in it */ 120 const struct gl_transform_feedback_info *linked_xfb_info = 121 prog->sh.LinkedTransformFeedback; 122 need_binding_table = linked_xfb_info->NumOutputs > 0; 123 } 124 if (!need_binding_table) { 125 if (brw->ff_gs.bind_bo_offset != 0) { 126 brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; 127 brw->ff_gs.bind_bo_offset = 0; 128 } 129 return; 130 } 131 132 /* Might want to calculate nr_surfaces first, to avoid taking up so much 133 * space for the binding table. Anyway, in this case we know that we only 134 * use BRW_MAX_SOL_BINDINGS surfaces at most. 135 */ 136 bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS, 137 32, &brw->ff_gs.bind_bo_offset); 138 139 /* BRW_NEW_SURFACES */ 140 memcpy(bind, brw->ff_gs.surf_offset, 141 BRW_MAX_SOL_BINDINGS * sizeof(uint32_t)); 142 } else { 143 /* BRW_NEW_GEOMETRY_PROGRAM */ 144 prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; 145 if (prog) { 146 /* Skip making a binding table if we don't have anything to put in it */ 147 struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; 148 const struct gl_transform_feedback_info *linked_xfb_info = 149 prog->sh.LinkedTransformFeedback; 150 need_binding_table = linked_xfb_info->NumOutputs > 0 || 151 prog_data->binding_table.size_bytes > 0; 152 } 153 if (!need_binding_table) { 154 if (brw->gs.base.bind_bo_offset != 0) { 155 brw->gs.base.bind_bo_offset = 0; 156 brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; 157 } 158 return; 159 } 160 161 /* Might want to calculate nr_surfaces first, to avoid taking up so much 162 * space for the binding table. 163 */ 164 bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES, 165 32, &brw->gs.base.bind_bo_offset); 166 167 /* BRW_NEW_SURFACES */ 168 memcpy(bind, brw->gs.base.surf_offset, 169 BRW_MAX_SURFACES * sizeof(uint32_t)); 170 } 171 172 brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; 173} 174 175const struct brw_tracked_state gfx6_gs_binding_table = { 176 .dirty = { 177 .mesa = 0, 178 .brw = BRW_NEW_BATCH | 179 BRW_NEW_BLORP | 180 BRW_NEW_GEOMETRY_PROGRAM | 181 BRW_NEW_VERTEX_PROGRAM | 182 BRW_NEW_SURFACES, 183 }, 184 .emit = brw_gs_upload_binding_table, 185}; 186 187struct gl_transform_feedback_object * 188brw_new_transform_feedback(struct gl_context *ctx, GLuint name) 189{ 190 struct brw_context *brw = brw_context(ctx); 191 struct brw_transform_feedback_object *brw_obj = 192 CALLOC_STRUCT(brw_transform_feedback_object); 193 if (!brw_obj) 194 return NULL; 195 196 _mesa_init_transform_feedback_object(&brw_obj->base, name); 197 198 brw_obj->offset_bo = 199 brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 200 BRW_MEMZONE_OTHER); 201 brw_obj->prim_count_bo = 202 brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384, 203 BRW_MEMZONE_OTHER); 204 205 return &brw_obj->base; 206} 207 208void 209brw_delete_transform_feedback(struct gl_context *ctx, 210 struct gl_transform_feedback_object *obj) 211{ 212 struct brw_transform_feedback_object *brw_obj = 213 (struct brw_transform_feedback_object *) obj; 214 215 brw_bo_unreference(brw_obj->offset_bo); 216 brw_bo_unreference(brw_obj->prim_count_bo); 217 218 _mesa_delete_transform_feedback_object(ctx, obj); 219} 220 221/** 222 * Tally the number of primitives generated so far. 223 * 224 * The buffer contains a series of pairs: 225 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ; 226 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ; 227 * 228 * For each stream, we subtract the pair of values (end - start) to get the 229 * number of primitives generated during one section. We accumulate these 230 * values, adding them up to get the total number of primitives generated. 231 * 232 * Note that we expose one stream pre-Gfx7, so the above is just (start, end). 233 */ 234static void 235aggregate_transform_feedback_counter( 236 struct brw_context *brw, 237 struct brw_bo *bo, 238 struct brw_transform_feedback_counter *counter) 239{ 240 const unsigned streams = brw->ctx.Const.MaxVertexStreams; 241 242 /* If the current batch is still contributing to the number of primitives 243 * generated, flush it now so the results will be present when mapped. 244 */ 245 if (brw_batch_references(&brw->batch, bo)) 246 brw_batch_flush(brw); 247 248 if (unlikely(brw->perf_debug && brw_bo_busy(bo))) 249 perf_debug("Stalling for # of transform feedback primitives written.\n"); 250 251 uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ); 252 prim_counts += counter->bo_start * streams; 253 254 for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) { 255 for (unsigned s = 0; s < streams; s++) 256 counter->accum[s] += prim_counts[streams + s] - prim_counts[s]; 257 258 prim_counts += 2 * streams; 259 } 260 261 brw_bo_unmap(bo); 262 263 /* We've already gathered up the old data; we can safely overwrite it now. */ 264 counter->bo_start = counter->bo_end = 0; 265} 266 267/** 268 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) 269 * to prim_count_bo. 270 * 271 * If prim_count_bo is out of space, gather up the results so far into 272 * prims_generated[] and allocate a new buffer with enough space. 273 * 274 * The number of primitives written is used to compute the number of vertices 275 * written to a transform feedback stream, which is required to implement 276 * DrawTransformFeedback(). 277 */ 278void 279brw_save_primitives_written_counters(struct brw_context *brw, 280 struct brw_transform_feedback_object *obj) 281{ 282 const struct intel_device_info *devinfo = &brw->screen->devinfo; 283 const struct gl_context *ctx = &brw->ctx; 284 const int streams = ctx->Const.MaxVertexStreams; 285 286 assert(obj->prim_count_bo != NULL); 287 288 /* Check if there's enough space for a new pair of four values. */ 289 if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 290 obj->prim_count_bo->size) { 291 aggregate_transform_feedback_counter(brw, obj->prim_count_bo, 292 &obj->previous_counter); 293 aggregate_transform_feedback_counter(brw, obj->prim_count_bo, 294 &obj->counter); 295 } 296 297 /* Flush any drawing so that the counters have the right values. */ 298 brw_emit_mi_flush(brw); 299 300 /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ 301 if (devinfo->ver >= 7) { 302 for (int i = 0; i < streams; i++) { 303 int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t); 304 brw_store_register_mem64(brw, obj->prim_count_bo, 305 GFX7_SO_NUM_PRIMS_WRITTEN(i), 306 offset); 307 } 308 } else { 309 brw_store_register_mem64(brw, obj->prim_count_bo, 310 GFX6_SO_NUM_PRIMS_WRITTEN, 311 obj->counter.bo_end * sizeof(uint64_t)); 312 } 313 314 /* Update where to write data to. */ 315 obj->counter.bo_end++; 316} 317 318static void 319compute_vertices_written_so_far(struct brw_context *brw, 320 struct brw_transform_feedback_object *obj, 321 struct brw_transform_feedback_counter *counter, 322 uint64_t *vertices_written) 323{ 324 const struct gl_context *ctx = &brw->ctx; 325 unsigned vertices_per_prim = 0; 326 327 switch (obj->primitive_mode) { 328 case GL_POINTS: 329 vertices_per_prim = 1; 330 break; 331 case GL_LINES: 332 vertices_per_prim = 2; 333 break; 334 case GL_TRIANGLES: 335 vertices_per_prim = 3; 336 break; 337 default: 338 unreachable("Invalid transform feedback primitive mode."); 339 } 340 341 /* Get the number of primitives generated. */ 342 aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter); 343 344 for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) { 345 vertices_written[i] = vertices_per_prim * counter->accum[i]; 346 } 347} 348 349/** 350 * Compute the number of vertices written by the last transform feedback 351 * begin/end block. 352 */ 353static void 354compute_xfb_vertices_written(struct brw_context *brw, 355 struct brw_transform_feedback_object *obj) 356{ 357 if (obj->vertices_written_valid || !obj->base.EndedAnytime) 358 return; 359 360 compute_vertices_written_so_far(brw, obj, &obj->previous_counter, 361 obj->vertices_written); 362 obj->vertices_written_valid = true; 363} 364 365/** 366 * GetTransformFeedbackVertexCount() driver hook. 367 * 368 * Returns the number of vertices written to a particular stream by the last 369 * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback(). 370 */ 371GLsizei 372brw_get_transform_feedback_vertex_count(struct gl_context *ctx, 373 struct gl_transform_feedback_object *obj, 374 GLuint stream) 375{ 376 struct brw_context *brw = brw_context(ctx); 377 struct brw_transform_feedback_object *brw_obj = 378 (struct brw_transform_feedback_object *) obj; 379 380 assert(obj->EndedAnytime); 381 assert(stream < ctx->Const.MaxVertexStreams); 382 383 compute_xfb_vertices_written(brw, brw_obj); 384 return brw_obj->vertices_written[stream]; 385} 386 387void 388brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, 389 struct gl_transform_feedback_object *obj) 390{ 391 struct brw_context *brw = brw_context(ctx); 392 const struct gl_program *prog; 393 const struct gl_transform_feedback_info *linked_xfb_info; 394 struct gl_transform_feedback_object *xfb_obj = 395 ctx->TransformFeedback.CurrentObject; 396 struct brw_transform_feedback_object *brw_obj = 397 (struct brw_transform_feedback_object *) xfb_obj; 398 399 assert(brw->screen->devinfo.ver == 6); 400 401 if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) { 402 /* BRW_NEW_GEOMETRY_PROGRAM */ 403 prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; 404 } else { 405 /* BRW_NEW_VERTEX_PROGRAM */ 406 prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; 407 } 408 linked_xfb_info = prog->sh.LinkedTransformFeedback; 409 410 /* Compute the maximum number of vertices that we can write without 411 * overflowing any of the buffers currently being used for feedback. 412 */ 413 brw_obj->max_index 414 = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj, 415 linked_xfb_info); 416 417 /* Initialize the SVBI 0 register to zero and set the maximum index. */ 418 BEGIN_BATCH(4); 419 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); 420 OUT_BATCH(0); /* SVBI 0 */ 421 OUT_BATCH(0); /* starting index */ 422 OUT_BATCH(brw_obj->max_index); 423 ADVANCE_BATCH(); 424 425 /* Initialize the rest of the unused streams to sane values. Otherwise, 426 * they may indicate that there is no room to write data and prevent 427 * anything from happening at all. 428 */ 429 for (int i = 1; i < 4; i++) { 430 BEGIN_BATCH(4); 431 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); 432 OUT_BATCH(i << SVB_INDEX_SHIFT); 433 OUT_BATCH(0); /* starting index */ 434 OUT_BATCH(0xffffffff); 435 ADVANCE_BATCH(); 436 } 437 438 /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 439 brw_save_primitives_written_counters(brw, brw_obj); 440 441 brw_obj->primitive_mode = mode; 442} 443 444void 445brw_end_transform_feedback(struct gl_context *ctx, 446 struct gl_transform_feedback_object *obj) 447{ 448 struct brw_context *brw = brw_context(ctx); 449 struct brw_transform_feedback_object *brw_obj = 450 (struct brw_transform_feedback_object *) obj; 451 452 /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ 453 if (!obj->Paused) 454 brw_save_primitives_written_counters(brw, brw_obj); 455 456 /* We've reached the end of a transform feedback begin/end block. This 457 * means that future DrawTransformFeedback() calls will need to pick up the 458 * results of the current counter, and that it's time to roll back the 459 * current primitive counter to zero. 460 */ 461 brw_obj->previous_counter = brw_obj->counter; 462 brw_reset_transform_feedback_counter(&brw_obj->counter); 463 464 /* EndTransformFeedback() means that we need to update the number of 465 * vertices written. Since it's only necessary if DrawTransformFeedback() 466 * is called and it means mapping a buffer object, we delay computing it 467 * until it's absolutely necessary to try and avoid stalls. 468 */ 469 brw_obj->vertices_written_valid = false; 470} 471 472void 473brw_pause_transform_feedback(struct gl_context *ctx, 474 struct gl_transform_feedback_object *obj) 475{ 476 struct brw_context *brw = brw_context(ctx); 477 struct brw_transform_feedback_object *brw_obj = 478 (struct brw_transform_feedback_object *) obj; 479 480 /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. 481 * While this operation is paused, other transform feedback actions may 482 * occur, which will contribute to the counters. We need to exclude that 483 * from our counts. 484 */ 485 brw_save_primitives_written_counters(brw, brw_obj); 486} 487 488void 489brw_resume_transform_feedback(struct gl_context *ctx, 490 struct gl_transform_feedback_object *obj) 491{ 492 struct brw_context *brw = brw_context(ctx); 493 struct brw_transform_feedback_object *brw_obj = 494 (struct brw_transform_feedback_object *) obj; 495 496 /* Reload SVBI 0 with the count of vertices written so far. */ 497 uint64_t svbi; 498 compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi); 499 500 BEGIN_BATCH(4); 501 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); 502 OUT_BATCH(0); /* SVBI 0 */ 503 OUT_BATCH((uint32_t) svbi); /* starting index */ 504 OUT_BATCH(brw_obj->max_index); 505 ADVANCE_BATCH(); 506 507 /* Initialize the rest of the unused streams to sane values. Otherwise, 508 * they may indicate that there is no room to write data and prevent 509 * anything from happening at all. 510 */ 511 for (int i = 1; i < 4; i++) { 512 BEGIN_BATCH(4); 513 OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); 514 OUT_BATCH(i << SVB_INDEX_SHIFT); 515 OUT_BATCH(0); /* starting index */ 516 OUT_BATCH(0xffffffff); 517 ADVANCE_BATCH(); 518 } 519 520 /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ 521 brw_save_primitives_written_counters(brw, brw_obj); 522} 523