1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** \file gfx6_sol.c
25 *
26 * Code to initialize the binding table entries used by transform feedback.
27 */
28
29#include "main/bufferobj.h"
30#include "main/macros.h"
31#include "brw_context.h"
32#include "brw_batch.h"
33#include "brw_defines.h"
34#include "brw_state.h"
35#include "main/transformfeedback.h"
36#include "util/u_memory.h"
37
38static void
39gfx6_update_sol_surfaces(struct brw_context *brw)
40{
41   struct gl_context *ctx = &brw->ctx;
42   bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx);
43   struct gl_transform_feedback_object *xfb_obj;
44   const struct gl_transform_feedback_info *linked_xfb_info = NULL;
45
46   if (xfb_active) {
47      /* BRW_NEW_TRANSFORM_FEEDBACK */
48      xfb_obj = ctx->TransformFeedback.CurrentObject;
49      linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback;
50   }
51
52   for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
53      const int surf_index = BRW_GFX6_SOL_BINDING_START + i;
54      if (xfb_active && i < linked_xfb_info->NumOutputs) {
55         unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
56         unsigned buffer_offset =
57            xfb_obj->Offset[buffer] / 4 +
58            linked_xfb_info->Outputs[i].DstOffset;
59         if (brw->programs[MESA_SHADER_GEOMETRY]) {
60            brw_update_sol_surface(
61               brw, xfb_obj->Buffers[buffer],
62               &brw->gs.base.surf_offset[surf_index],
63               linked_xfb_info->Outputs[i].NumComponents,
64               linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
65         } else {
66            brw_update_sol_surface(
67               brw, xfb_obj->Buffers[buffer],
68               &brw->ff_gs.surf_offset[surf_index],
69               linked_xfb_info->Outputs[i].NumComponents,
70               linked_xfb_info->Buffers[buffer].Stride, buffer_offset);
71         }
72      } else {
73         if (!brw->programs[MESA_SHADER_GEOMETRY])
74            brw->ff_gs.surf_offset[surf_index] = 0;
75         else
76            brw->gs.base.surf_offset[surf_index] = 0;
77      }
78   }
79
80   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
81}
82
83const struct brw_tracked_state gfx6_sol_surface = {
84   .dirty = {
85      .mesa = 0,
86      .brw = BRW_NEW_BATCH |
87             BRW_NEW_BLORP |
88             BRW_NEW_TRANSFORM_FEEDBACK,
89   },
90   .emit = gfx6_update_sol_surfaces,
91};
92
93/**
94 * Constructs the binding table for the WM surface state, which maps unit
95 * numbers to surface state objects.
96 */
97static void
98brw_gs_upload_binding_table(struct brw_context *brw)
99{
100   uint32_t *bind;
101   struct gl_context *ctx = &brw->ctx;
102   const struct gl_program *prog;
103   bool need_binding_table = false;
104
105   /* We have two scenarios here:
106    * 1) We are using a geometry shader only to implement transform feedback
107    *    for a vertex shader (brw->programs[MESA_SHADER_GEOMETRY] == NULL).
108    *    In this case, we only need surfaces for transform feedback in the
109    *    GS stage.
110    * 2) We have a user-provided geometry shader. In this case we may need
111    *    surfaces for transform feedback and/or other stuff, like textures,
112    *    in the GS stage.
113    */
114
115   if (!brw->programs[MESA_SHADER_GEOMETRY]) {
116      /* BRW_NEW_VERTEX_PROGRAM */
117      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
118      if (prog) {
119         /* Skip making a binding table if we don't have anything to put in it */
120         const struct gl_transform_feedback_info *linked_xfb_info =
121            prog->sh.LinkedTransformFeedback;
122         need_binding_table = linked_xfb_info->NumOutputs > 0;
123      }
124      if (!need_binding_table) {
125         if (brw->ff_gs.bind_bo_offset != 0) {
126            brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
127            brw->ff_gs.bind_bo_offset = 0;
128         }
129         return;
130      }
131
132      /* Might want to calculate nr_surfaces first, to avoid taking up so much
133       * space for the binding table. Anyway, in this case we know that we only
134       * use BRW_MAX_SOL_BINDINGS surfaces at most.
135       */
136      bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS,
137                             32, &brw->ff_gs.bind_bo_offset);
138
139      /* BRW_NEW_SURFACES */
140      memcpy(bind, brw->ff_gs.surf_offset,
141             BRW_MAX_SOL_BINDINGS * sizeof(uint32_t));
142   } else {
143      /* BRW_NEW_GEOMETRY_PROGRAM */
144      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
145      if (prog) {
146         /* Skip making a binding table if we don't have anything to put in it */
147         struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
148         const struct gl_transform_feedback_info *linked_xfb_info =
149            prog->sh.LinkedTransformFeedback;
150         need_binding_table = linked_xfb_info->NumOutputs > 0 ||
151                              prog_data->binding_table.size_bytes > 0;
152      }
153      if (!need_binding_table) {
154         if (brw->gs.base.bind_bo_offset != 0) {
155            brw->gs.base.bind_bo_offset = 0;
156            brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
157         }
158         return;
159      }
160
161      /* Might want to calculate nr_surfaces first, to avoid taking up so much
162       * space for the binding table.
163       */
164      bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES,
165                             32, &brw->gs.base.bind_bo_offset);
166
167      /* BRW_NEW_SURFACES */
168      memcpy(bind, brw->gs.base.surf_offset,
169             BRW_MAX_SURFACES * sizeof(uint32_t));
170   }
171
172   brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
173}
174
175const struct brw_tracked_state gfx6_gs_binding_table = {
176   .dirty = {
177      .mesa = 0,
178      .brw = BRW_NEW_BATCH |
179             BRW_NEW_BLORP |
180             BRW_NEW_GEOMETRY_PROGRAM |
181             BRW_NEW_VERTEX_PROGRAM |
182             BRW_NEW_SURFACES,
183   },
184   .emit = brw_gs_upload_binding_table,
185};
186
187struct gl_transform_feedback_object *
188brw_new_transform_feedback(struct gl_context *ctx, GLuint name)
189{
190   struct brw_context *brw = brw_context(ctx);
191   struct brw_transform_feedback_object *brw_obj =
192      CALLOC_STRUCT(brw_transform_feedback_object);
193   if (!brw_obj)
194      return NULL;
195
196   _mesa_init_transform_feedback_object(&brw_obj->base, name);
197
198   brw_obj->offset_bo =
199      brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16,
200                   BRW_MEMZONE_OTHER);
201   brw_obj->prim_count_bo =
202      brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384,
203                   BRW_MEMZONE_OTHER);
204
205   return &brw_obj->base;
206}
207
208void
209brw_delete_transform_feedback(struct gl_context *ctx,
210                              struct gl_transform_feedback_object *obj)
211{
212   struct brw_transform_feedback_object *brw_obj =
213      (struct brw_transform_feedback_object *) obj;
214
215   brw_bo_unreference(brw_obj->offset_bo);
216   brw_bo_unreference(brw_obj->prim_count_bo);
217
218   _mesa_delete_transform_feedback_object(ctx, obj);
219}
220
221/**
222 * Tally the number of primitives generated so far.
223 *
224 * The buffer contains a series of pairs:
225 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
226 * (<start0, start1, start2, start3>, <end0, end1, end2, end3>) ;
227 *
228 * For each stream, we subtract the pair of values (end - start) to get the
229 * number of primitives generated during one section.  We accumulate these
230 * values, adding them up to get the total number of primitives generated.
231 *
232 * Note that we expose one stream pre-Gfx7, so the above is just (start, end).
233 */
234static void
235aggregate_transform_feedback_counter(
236   struct brw_context *brw,
237   struct brw_bo *bo,
238   struct brw_transform_feedback_counter *counter)
239{
240   const unsigned streams = brw->ctx.Const.MaxVertexStreams;
241
242   /* If the current batch is still contributing to the number of primitives
243    * generated, flush it now so the results will be present when mapped.
244    */
245   if (brw_batch_references(&brw->batch, bo))
246      brw_batch_flush(brw);
247
248   if (unlikely(brw->perf_debug && brw_bo_busy(bo)))
249      perf_debug("Stalling for # of transform feedback primitives written.\n");
250
251   uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ);
252   prim_counts += counter->bo_start * streams;
253
254   for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) {
255      for (unsigned s = 0; s < streams; s++)
256         counter->accum[s] += prim_counts[streams + s] - prim_counts[s];
257
258      prim_counts += 2 * streams;
259   }
260
261   brw_bo_unmap(bo);
262
263   /* We've already gathered up the old data; we can safely overwrite it now. */
264   counter->bo_start = counter->bo_end = 0;
265}
266
267/**
268 * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
269 * to prim_count_bo.
270 *
271 * If prim_count_bo is out of space, gather up the results so far into
272 * prims_generated[] and allocate a new buffer with enough space.
273 *
274 * The number of primitives written is used to compute the number of vertices
275 * written to a transform feedback stream, which is required to implement
276 * DrawTransformFeedback().
277 */
278void
279brw_save_primitives_written_counters(struct brw_context *brw,
280                                     struct brw_transform_feedback_object *obj)
281{
282   const struct intel_device_info *devinfo = &brw->screen->devinfo;
283   const struct gl_context *ctx = &brw->ctx;
284   const int streams = ctx->Const.MaxVertexStreams;
285
286   assert(obj->prim_count_bo != NULL);
287
288   /* Check if there's enough space for a new pair of four values. */
289   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
290       obj->prim_count_bo->size) {
291      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
292                                           &obj->previous_counter);
293      aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
294                                           &obj->counter);
295   }
296
297   /* Flush any drawing so that the counters have the right values. */
298   brw_emit_mi_flush(brw);
299
300   /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
301   if (devinfo->ver >= 7) {
302      for (int i = 0; i < streams; i++) {
303         int offset = (streams * obj->counter.bo_end + i) * sizeof(uint64_t);
304         brw_store_register_mem64(brw, obj->prim_count_bo,
305                                  GFX7_SO_NUM_PRIMS_WRITTEN(i),
306                                  offset);
307      }
308   } else {
309      brw_store_register_mem64(brw, obj->prim_count_bo,
310                               GFX6_SO_NUM_PRIMS_WRITTEN,
311                               obj->counter.bo_end * sizeof(uint64_t));
312   }
313
314   /* Update where to write data to. */
315   obj->counter.bo_end++;
316}
317
318static void
319compute_vertices_written_so_far(struct brw_context *brw,
320                                struct brw_transform_feedback_object *obj,
321                                struct brw_transform_feedback_counter *counter,
322                                uint64_t *vertices_written)
323{
324   const struct gl_context *ctx = &brw->ctx;
325   unsigned vertices_per_prim = 0;
326
327   switch (obj->primitive_mode) {
328   case GL_POINTS:
329      vertices_per_prim = 1;
330      break;
331   case GL_LINES:
332      vertices_per_prim = 2;
333      break;
334   case GL_TRIANGLES:
335      vertices_per_prim = 3;
336      break;
337   default:
338      unreachable("Invalid transform feedback primitive mode.");
339   }
340
341   /* Get the number of primitives generated. */
342   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
343
344   for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
345      vertices_written[i] = vertices_per_prim * counter->accum[i];
346   }
347}
348
349/**
350 * Compute the number of vertices written by the last transform feedback
351 * begin/end block.
352 */
353static void
354compute_xfb_vertices_written(struct brw_context *brw,
355                             struct brw_transform_feedback_object *obj)
356{
357   if (obj->vertices_written_valid || !obj->base.EndedAnytime)
358      return;
359
360   compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
361                                   obj->vertices_written);
362   obj->vertices_written_valid = true;
363}
364
365/**
366 * GetTransformFeedbackVertexCount() driver hook.
367 *
368 * Returns the number of vertices written to a particular stream by the last
369 * Begin/EndTransformFeedback block.  Used to implement DrawTransformFeedback().
370 */
371GLsizei
372brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
373                                        struct gl_transform_feedback_object *obj,
374                                        GLuint stream)
375{
376   struct brw_context *brw = brw_context(ctx);
377   struct brw_transform_feedback_object *brw_obj =
378      (struct brw_transform_feedback_object *) obj;
379
380   assert(obj->EndedAnytime);
381   assert(stream < ctx->Const.MaxVertexStreams);
382
383   compute_xfb_vertices_written(brw, brw_obj);
384   return brw_obj->vertices_written[stream];
385}
386
387void
388brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
389                             struct gl_transform_feedback_object *obj)
390{
391   struct brw_context *brw = brw_context(ctx);
392   const struct gl_program *prog;
393   const struct gl_transform_feedback_info *linked_xfb_info;
394   struct gl_transform_feedback_object *xfb_obj =
395      ctx->TransformFeedback.CurrentObject;
396   struct brw_transform_feedback_object *brw_obj =
397      (struct brw_transform_feedback_object *) xfb_obj;
398
399   assert(brw->screen->devinfo.ver == 6);
400
401   if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
402      /* BRW_NEW_GEOMETRY_PROGRAM */
403      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
404   } else {
405      /* BRW_NEW_VERTEX_PROGRAM */
406      prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
407   }
408   linked_xfb_info = prog->sh.LinkedTransformFeedback;
409
410   /* Compute the maximum number of vertices that we can write without
411    * overflowing any of the buffers currently being used for feedback.
412    */
413   brw_obj->max_index
414      = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj,
415                                                      linked_xfb_info);
416
417   /* Initialize the SVBI 0 register to zero and set the maximum index. */
418   BEGIN_BATCH(4);
419   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
420   OUT_BATCH(0); /* SVBI 0 */
421   OUT_BATCH(0); /* starting index */
422   OUT_BATCH(brw_obj->max_index);
423   ADVANCE_BATCH();
424
425   /* Initialize the rest of the unused streams to sane values.  Otherwise,
426    * they may indicate that there is no room to write data and prevent
427    * anything from happening at all.
428    */
429   for (int i = 1; i < 4; i++) {
430      BEGIN_BATCH(4);
431      OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
432      OUT_BATCH(i << SVB_INDEX_SHIFT);
433      OUT_BATCH(0); /* starting index */
434      OUT_BATCH(0xffffffff);
435      ADVANCE_BATCH();
436   }
437
438   /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */
439   brw_save_primitives_written_counters(brw, brw_obj);
440
441   brw_obj->primitive_mode = mode;
442}
443
444void
445brw_end_transform_feedback(struct gl_context *ctx,
446                           struct gl_transform_feedback_object *obj)
447{
448   struct brw_context *brw = brw_context(ctx);
449   struct brw_transform_feedback_object *brw_obj =
450      (struct brw_transform_feedback_object *) obj;
451
452   /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */
453   if (!obj->Paused)
454      brw_save_primitives_written_counters(brw, brw_obj);
455
456   /* We've reached the end of a transform feedback begin/end block.  This
457    * means that future DrawTransformFeedback() calls will need to pick up the
458    * results of the current counter, and that it's time to roll back the
459    * current primitive counter to zero.
460    */
461   brw_obj->previous_counter = brw_obj->counter;
462   brw_reset_transform_feedback_counter(&brw_obj->counter);
463
464   /* EndTransformFeedback() means that we need to update the number of
465    * vertices written.  Since it's only necessary if DrawTransformFeedback()
466    * is called and it means mapping a buffer object, we delay computing it
467    * until it's absolutely necessary to try and avoid stalls.
468    */
469   brw_obj->vertices_written_valid = false;
470}
471
472void
473brw_pause_transform_feedback(struct gl_context *ctx,
474                             struct gl_transform_feedback_object *obj)
475{
476   struct brw_context *brw = brw_context(ctx);
477   struct brw_transform_feedback_object *brw_obj =
478      (struct brw_transform_feedback_object *) obj;
479
480   /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
481    * While this operation is paused, other transform feedback actions may
482    * occur, which will contribute to the counters.  We need to exclude that
483    * from our counts.
484    */
485   brw_save_primitives_written_counters(brw, brw_obj);
486}
487
488void
489brw_resume_transform_feedback(struct gl_context *ctx,
490                              struct gl_transform_feedback_object *obj)
491{
492   struct brw_context *brw = brw_context(ctx);
493   struct brw_transform_feedback_object *brw_obj =
494      (struct brw_transform_feedback_object *) obj;
495
496   /* Reload SVBI 0 with the count of vertices written so far. */
497   uint64_t svbi;
498   compute_vertices_written_so_far(brw, brw_obj, &brw_obj->counter, &svbi);
499
500   BEGIN_BATCH(4);
501   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
502   OUT_BATCH(0); /* SVBI 0 */
503   OUT_BATCH((uint32_t) svbi); /* starting index */
504   OUT_BATCH(brw_obj->max_index);
505   ADVANCE_BATCH();
506
507   /* Initialize the rest of the unused streams to sane values.  Otherwise,
508    * they may indicate that there is no room to write data and prevent
509    * anything from happening at all.
510    */
511   for (int i = 1; i < 4; i++) {
512      BEGIN_BATCH(4);
513      OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
514      OUT_BATCH(i << SVB_INDEX_SHIFT);
515      OUT_BATCH(0); /* starting index */
516      OUT_BATCH(0xffffffff);
517      ADVANCE_BATCH();
518   }
519
520   /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
521   brw_save_primitives_written_counters(brw, brw_obj);
522}
523