gfx7_cmd_buffer.c revision 7ec681f3
1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <string.h>
27#include <unistd.h>
28#include <fcntl.h>
29
30#include "anv_private.h"
31#include "vk_format.h"
32
33#include "genxml/gen_macros.h"
34#include "genxml/genX_pack.h"
35
36#if GFX_VERx10 == 70
37static int64_t
38clamp_int64(int64_t x, int64_t min, int64_t max)
39{
40   if (x < min)
41      return min;
42   else if (x < max)
43      return x;
44   else
45      return max;
46}
47
48void
49gfx7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
50{
51   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
52   uint32_t count = cmd_buffer->state.gfx.dynamic.scissor.count;
53   const VkRect2D *scissors = cmd_buffer->state.gfx.dynamic.scissor.scissors;
54
55   /* Wa_1409725701:
56    *    "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
57    *    stored as an array of up to 16 elements. The location of first
58    *    element of the array, as specified by Pointer to SCISSOR_RECT, should
59    *    be aligned to a 64-byte boundary.
60    */
61   uint32_t alignment = 64;
62   struct anv_state scissor_state =
63      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, alignment);
64
65   for (uint32_t i = 0; i < count; i++) {
66      const VkRect2D *s = &scissors[i];
67
68      /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
69       * ymax < ymin for empty clips.  In case clip x, y, width height are all
70       * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
71       * what we want. Just special case empty clips and produce a canonical
72       * empty clip. */
73      static const struct GFX7_SCISSOR_RECT empty_scissor = {
74         .ScissorRectangleYMin = 1,
75         .ScissorRectangleXMin = 1,
76         .ScissorRectangleYMax = 0,
77         .ScissorRectangleXMax = 0
78      };
79
80      const int max = 0xffff;
81
82      uint32_t y_min = s->offset.y;
83      uint32_t x_min = s->offset.x;
84      uint32_t y_max = s->offset.y + s->extent.height - 1;
85      uint32_t x_max = s->offset.x + s->extent.width - 1;
86
87      /* Do this math using int64_t so overflow gets clamped correctly. */
88      if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
89         y_min = clamp_int64((uint64_t) y_min,
90                             cmd_buffer->state.render_area.offset.y, max);
91         x_min = clamp_int64((uint64_t) x_min,
92                             cmd_buffer->state.render_area.offset.x, max);
93         y_max = clamp_int64((uint64_t) y_max, 0,
94                             cmd_buffer->state.render_area.offset.y +
95                             cmd_buffer->state.render_area.extent.height - 1);
96         x_max = clamp_int64((uint64_t) x_max, 0,
97                             cmd_buffer->state.render_area.offset.x +
98                             cmd_buffer->state.render_area.extent.width - 1);
99      } else if (fb) {
100         y_min = clamp_int64((uint64_t) y_min, 0, max);
101         x_min = clamp_int64((uint64_t) x_min, 0, max);
102         y_max = clamp_int64((uint64_t) y_max, 0, fb->height - 1);
103         x_max = clamp_int64((uint64_t) x_max, 0, fb->width - 1);
104      }
105
106      struct GFX7_SCISSOR_RECT scissor = {
107         .ScissorRectangleYMin = y_min,
108         .ScissorRectangleXMin = x_min,
109         .ScissorRectangleYMax = y_max,
110         .ScissorRectangleXMax = x_max
111      };
112
113      if (s->extent.width <= 0 || s->extent.height <= 0) {
114         GFX7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8,
115                                &empty_scissor);
116      } else {
117         GFX7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor);
118      }
119   }
120
121   anv_batch_emit(&cmd_buffer->batch,
122                  GFX7_3DSTATE_SCISSOR_STATE_POINTERS, ssp) {
123      ssp.ScissorRectPointer = scissor_state.offset;
124   }
125}
126#endif
127
128static uint32_t vk_to_intel_index_type(VkIndexType type)
129{
130   switch (type) {
131   case VK_INDEX_TYPE_UINT8_EXT:
132      return INDEX_BYTE;
133   case VK_INDEX_TYPE_UINT16:
134      return INDEX_WORD;
135   case VK_INDEX_TYPE_UINT32:
136      return INDEX_DWORD;
137   default:
138      unreachable("invalid index type");
139   }
140}
141
142static uint32_t restart_index_for_type(VkIndexType type)
143{
144   switch (type) {
145   case VK_INDEX_TYPE_UINT8_EXT:
146      return UINT8_MAX;
147   case VK_INDEX_TYPE_UINT16:
148      return UINT16_MAX;
149   case VK_INDEX_TYPE_UINT32:
150      return UINT32_MAX;
151   default:
152      unreachable("invalid index type");
153   }
154}
155
156void genX(CmdBindIndexBuffer)(
157    VkCommandBuffer                             commandBuffer,
158    VkBuffer                                    _buffer,
159    VkDeviceSize                                offset,
160    VkIndexType                                 indexType)
161{
162   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
163   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
164
165   cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
166   if (GFX_VERx10 == 75)
167      cmd_buffer->state.restart_index = restart_index_for_type(indexType);
168   cmd_buffer->state.gfx.gfx7.index_buffer = buffer;
169   cmd_buffer->state.gfx.gfx7.index_type = vk_to_intel_index_type(indexType);
170   cmd_buffer->state.gfx.gfx7.index_offset = offset;
171}
172
173static uint32_t
174get_depth_format(struct anv_cmd_buffer *cmd_buffer)
175{
176   const struct anv_render_pass *pass = cmd_buffer->state.pass;
177   const struct anv_subpass *subpass = cmd_buffer->state.subpass;
178
179   if (!subpass->depth_stencil_attachment)
180      return D16_UNORM;
181
182   struct anv_render_pass_attachment *att =
183      &pass->attachments[subpass->depth_stencil_attachment->attachment];
184
185   switch (att->format) {
186   case VK_FORMAT_D16_UNORM:
187   case VK_FORMAT_D16_UNORM_S8_UINT:
188      return D16_UNORM;
189
190   case VK_FORMAT_X8_D24_UNORM_PACK32:
191   case VK_FORMAT_D24_UNORM_S8_UINT:
192      return D24_UNORM_X8_UINT;
193
194   case VK_FORMAT_D32_SFLOAT:
195   case VK_FORMAT_D32_SFLOAT_S8_UINT:
196      return D32_FLOAT;
197
198   default:
199      return D16_UNORM;
200   }
201}
202
203void
204genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
205{
206   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
207   struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
208
209   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {
210      uint32_t topology;
211      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
212         topology = pipeline->topology;
213      else
214         topology = genX(vk_to_intel_primitive_type)[d->primitive_topology];
215
216      cmd_buffer->state.gfx.primitive_topology = topology;
217   }
218
219   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
220                                      ANV_CMD_DIRTY_RENDER_TARGETS |
221                                      ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
222                                      ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |
223                                      ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |
224                                      ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
225                                      ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE |
226                                      ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) {
227      /* Take dynamic primitive topology in to account with
228       *    3DSTATE_SF::MultisampleRasterizationMode
229       */
230      uint32_t ms_rast_mode = 0;
231
232      if (cmd_buffer->state.gfx.pipeline->dynamic_states &
233          ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {
234         VkPrimitiveTopology primitive_topology =
235            cmd_buffer->state.gfx.dynamic.primitive_topology;
236
237         VkPolygonMode dynamic_raster_mode =
238            genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
239                                      primitive_topology);
240
241         ms_rast_mode =
242            genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode);
243      }
244
245      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
246      struct GENX(3DSTATE_SF) sf = {
247         GENX(3DSTATE_SF_header),
248         .DepthBufferSurfaceFormat = get_depth_format(cmd_buffer),
249         .LineWidth = d->line_width,
250         .GlobalDepthOffsetConstant = d->depth_bias.bias,
251         .GlobalDepthOffsetScale = d->depth_bias.slope,
252         .GlobalDepthOffsetClamp = d->depth_bias.clamp,
253         .FrontWinding            = genX(vk_to_intel_front_face)[d->front_face],
254         .CullMode                = genX(vk_to_intel_cullmode)[d->cull_mode],
255         .GlobalDepthOffsetEnableSolid = d->depth_bias_enable,
256         .GlobalDepthOffsetEnableWireframe = d->depth_bias_enable,
257         .GlobalDepthOffsetEnablePoint = d->depth_bias_enable,
258         .MultisampleRasterizationMode = ms_rast_mode,
259      };
260      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
261
262      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx7.sf);
263   }
264
265   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
266                                      ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
267      struct anv_state cc_state =
268         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
269                                            GENX(COLOR_CALC_STATE_length) * 4,
270                                            64);
271      struct GENX(COLOR_CALC_STATE) cc = {
272         .BlendConstantColorRed = d->blend_constants[0],
273         .BlendConstantColorGreen = d->blend_constants[1],
274         .BlendConstantColorBlue = d->blend_constants[2],
275         .BlendConstantColorAlpha = d->blend_constants[3],
276         .StencilReferenceValue = d->stencil_reference.front & 0xff,
277         .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
278      };
279      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
280
281      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
282         ccp.ColorCalcStatePointer = cc_state.offset;
283      }
284   }
285
286   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
287      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
288         ls.LineStipplePattern = d->line_stipple.pattern;
289         ls.LineStippleInverseRepeatCount =
290            1.0f / MAX2(1, d->line_stipple.factor);
291         ls.LineStippleRepeatCount = d->line_stipple.factor;
292      }
293   }
294
295   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
296                                      ANV_CMD_DIRTY_RENDER_TARGETS |
297                                      ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
298                                      ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
299                                      ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
300                                      ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
301                                      ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
302                                      ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
303                                      ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) {
304      uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
305
306      struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
307         .StencilTestMask = d->stencil_compare_mask.front & 0xff,
308         .StencilWriteMask = d->stencil_write_mask.front & 0xff,
309
310         .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
311         .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
312
313         .StencilBufferWriteEnable =
314            (d->stencil_write_mask.front || d->stencil_write_mask.back) &&
315            d->stencil_test_enable,
316
317         .DepthTestEnable = d->depth_test_enable,
318         .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
319         .DepthTestFunction = genX(vk_to_intel_compare_op)[d->depth_compare_op],
320         .StencilTestEnable = d->stencil_test_enable,
321         .StencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.fail_op],
322         .StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.pass_op],
323         .StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.front.depth_fail_op],
324         .StencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.front.compare_op],
325         .BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.fail_op],
326         .BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.pass_op],
327         .BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[d->stencil_op.back.depth_fail_op],
328         .BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[d->stencil_op.back.compare_op],
329      };
330      GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
331
332      struct anv_state ds_state =
333         anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw,
334                                      pipeline->gfx7.depth_stencil_state,
335                                      GENX(DEPTH_STENCIL_STATE_length), 64);
336
337      anv_batch_emit(&cmd_buffer->batch,
338                     GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), dsp) {
339         dsp.PointertoDEPTH_STENCIL_STATE = ds_state.offset;
340      }
341   }
342
343   if (cmd_buffer->state.gfx.gfx7.index_buffer &&
344       cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
345                                      ANV_CMD_DIRTY_INDEX_BUFFER |
346                                      ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)) {
347      struct anv_buffer *buffer = cmd_buffer->state.gfx.gfx7.index_buffer;
348      uint32_t offset = cmd_buffer->state.gfx.gfx7.index_offset;
349
350#if GFX_VERx10 == 75
351      anv_batch_emit(&cmd_buffer->batch, GFX75_3DSTATE_VF, vf) {
352         vf.IndexedDrawCutIndexEnable  = d->primitive_restart_enable;
353         vf.CutIndex                   = cmd_buffer->state.restart_index;
354      }
355#endif
356
357      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
358#if GFX_VERx10 != 75
359         ib.CutIndexEnable        = d->primitive_restart_enable;
360#endif
361         ib.IndexFormat           = cmd_buffer->state.gfx.gfx7.index_type;
362         ib.MOCS                  = anv_mocs(cmd_buffer->device,
363                                             buffer->address.bo,
364                                             ISL_SURF_USAGE_INDEX_BUFFER_BIT);
365
366         ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
367         ib.BufferEndingAddress   = anv_address_add(buffer->address,
368                                                    buffer->size);
369      }
370   }
371
372   /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
373    * threads or if we have dirty dynamic primitive topology state and
374    * need to toggle 3DSTATE_WM::MultisampleRasterizationMode dynamically.
375    */
376   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE ||
377       cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {
378      const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
379
380      bool dirty_color_blend =
381         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
382
383      bool dirty_primitive_topology =
384         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
385
386      VkPolygonMode dynamic_raster_mode;
387      VkPrimitiveTopology primitive_topology =
388         cmd_buffer->state.gfx.dynamic.primitive_topology;
389      dynamic_raster_mode =
390         genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
391                                   primitive_topology);
392
393      if (dirty_color_blend || dirty_primitive_topology) {
394         uint32_t dwords[GENX(3DSTATE_WM_length)];
395         struct GENX(3DSTATE_WM) wm = {
396            GENX(3DSTATE_WM_header),
397
398            .ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch ||
399                                    color_writes,
400            .MultisampleRasterizationMode =
401               genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode),
402         };
403         GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
404
405         anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
406      }
407
408   }
409
410   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
411      genX(emit_multisample)(&cmd_buffer->batch,
412                             cmd_buffer->state.gfx.dynamic.sample_locations.samples,
413                             cmd_buffer->state.gfx.dynamic.sample_locations.locations);
414   }
415
416   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE ||
417       cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP) {
418      const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
419      bool dirty_color_blend =
420         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
421
422      /* Blend states of each RT */
423      uint32_t surface_count = 0;
424      struct anv_pipeline_bind_map *map;
425      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
426         map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
427         surface_count = map->surface_count;
428      }
429
430      uint32_t blend_dws[GENX(BLEND_STATE_length) +
431                         MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
432      uint32_t *dws = blend_dws;
433      memset(blend_dws, 0, sizeof(blend_dws));
434
435      /* Skip this part */
436      dws += GENX(BLEND_STATE_length);
437
438      bool dirty_logic_op =
439         cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
440
441      for (uint32_t i = 0; i < surface_count; i++) {
442         struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
443         bool write_disabled =
444            dirty_color_blend && (color_writes & (1u << binding->index)) == 0;
445         struct GENX(BLEND_STATE_ENTRY) entry = {
446            .WriteDisableAlpha = write_disabled,
447            .WriteDisableRed   = write_disabled,
448            .WriteDisableGreen = write_disabled,
449            .WriteDisableBlue  = write_disabled,
450            .LogicOpFunction =
451               dirty_logic_op ? genX(vk_to_intel_logic_op)[d->logic_op] : 0,
452         };
453         GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
454         dws += GENX(BLEND_STATE_ENTRY_length);
455      }
456
457      uint32_t num_dwords = GENX(BLEND_STATE_length) +
458         GENX(BLEND_STATE_ENTRY_length) * surface_count;
459
460      struct anv_state blend_states =
461         anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
462                                      pipeline->gfx7.blend_state, num_dwords, 64);
463      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
464         bsp.BlendStatePointer      = blend_states.offset;
465      }
466   }
467
468   cmd_buffer->state.gfx.dirty = 0;
469}
470
471void
472genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
473                                bool enable)
474{
475   /* The NP PMA fix doesn't exist on gfx7 */
476}
477