1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2012 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include <errno.h>
25b8e80941Smrg
26b8e80941Smrg#include "program/prog_instruction.h"
27b8e80941Smrg
28b8e80941Smrg#include "blorp_priv.h"
29b8e80941Smrg#include "compiler/brw_compiler.h"
30b8e80941Smrg#include "compiler/brw_nir.h"
31b8e80941Smrg
32b8e80941Smrgvoid
33b8e80941Smrgblorp_init(struct blorp_context *blorp, void *driver_ctx,
34b8e80941Smrg           struct isl_device *isl_dev)
35b8e80941Smrg{
36b8e80941Smrg   blorp->driver_ctx = driver_ctx;
37b8e80941Smrg   blorp->isl_dev = isl_dev;
38b8e80941Smrg}
39b8e80941Smrg
40b8e80941Smrgvoid
41b8e80941Smrgblorp_finish(struct blorp_context *blorp)
42b8e80941Smrg{
43b8e80941Smrg   blorp->driver_ctx = NULL;
44b8e80941Smrg}
45b8e80941Smrg
46b8e80941Smrgvoid
47b8e80941Smrgblorp_batch_init(struct blorp_context *blorp,
48b8e80941Smrg                 struct blorp_batch *batch, void *driver_batch,
49b8e80941Smrg                 enum blorp_batch_flags flags)
50b8e80941Smrg{
51b8e80941Smrg   batch->blorp = blorp;
52b8e80941Smrg   batch->driver_batch = driver_batch;
53b8e80941Smrg   batch->flags = flags;
54b8e80941Smrg}
55b8e80941Smrg
56b8e80941Smrgvoid
57b8e80941Smrgblorp_batch_finish(struct blorp_batch *batch)
58b8e80941Smrg{
59b8e80941Smrg   batch->blorp = NULL;
60b8e80941Smrg}
61b8e80941Smrg
62b8e80941Smrgvoid
63b8e80941Smrgbrw_blorp_surface_info_init(struct blorp_context *blorp,
64b8e80941Smrg                            struct brw_blorp_surface_info *info,
65b8e80941Smrg                            const struct blorp_surf *surf,
66b8e80941Smrg                            unsigned int level, unsigned int layer,
67b8e80941Smrg                            enum isl_format format, bool is_render_target)
68b8e80941Smrg{
69b8e80941Smrg   assert(level < surf->surf->levels);
70b8e80941Smrg   assert(layer < MAX2(surf->surf->logical_level0_px.depth >> level,
71b8e80941Smrg                       surf->surf->logical_level0_px.array_len));
72b8e80941Smrg
73b8e80941Smrg   info->enabled = true;
74b8e80941Smrg
75b8e80941Smrg   if (format == ISL_FORMAT_UNSUPPORTED)
76b8e80941Smrg      format = surf->surf->format;
77b8e80941Smrg
78b8e80941Smrg   info->surf = *surf->surf;
79b8e80941Smrg   info->addr = surf->addr;
80b8e80941Smrg
81b8e80941Smrg   info->aux_usage = surf->aux_usage;
82b8e80941Smrg   if (info->aux_usage != ISL_AUX_USAGE_NONE) {
83b8e80941Smrg      info->aux_surf = *surf->aux_surf;
84b8e80941Smrg      info->aux_addr = surf->aux_addr;
85b8e80941Smrg      assert(level < info->aux_surf.levels);
86b8e80941Smrg      assert(layer < MAX2(info->aux_surf.logical_level0_px.depth >> level,
87b8e80941Smrg                          info->aux_surf.logical_level0_px.array_len));
88b8e80941Smrg   }
89b8e80941Smrg
90b8e80941Smrg   info->clear_color = surf->clear_color;
91b8e80941Smrg   info->clear_color_addr = surf->clear_color_addr;
92b8e80941Smrg
93b8e80941Smrg   info->view = (struct isl_view) {
94b8e80941Smrg      .usage = is_render_target ? ISL_SURF_USAGE_RENDER_TARGET_BIT :
95b8e80941Smrg                                  ISL_SURF_USAGE_TEXTURE_BIT,
96b8e80941Smrg      .format = format,
97b8e80941Smrg      .base_level = level,
98b8e80941Smrg      .levels = 1,
99b8e80941Smrg      .swizzle = ISL_SWIZZLE_IDENTITY,
100b8e80941Smrg   };
101b8e80941Smrg
102b8e80941Smrg   info->view.array_len = MAX2(info->surf.logical_level0_px.depth,
103b8e80941Smrg                               info->surf.logical_level0_px.array_len);
104b8e80941Smrg
105b8e80941Smrg   if (!is_render_target &&
106b8e80941Smrg       (info->surf.dim == ISL_SURF_DIM_3D ||
107b8e80941Smrg        info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) {
108b8e80941Smrg      /* 3-D textures don't support base_array layer and neither do 2-D
109b8e80941Smrg       * multisampled textures on IVB so we need to pass it through the
110b8e80941Smrg       * sampler in those cases.  These are also two cases where we are
111b8e80941Smrg       * guaranteed that we won't be doing any funny surface hacks.
112b8e80941Smrg       */
113b8e80941Smrg      info->view.base_array_layer = 0;
114b8e80941Smrg      info->z_offset = layer;
115b8e80941Smrg   } else {
116b8e80941Smrg      info->view.base_array_layer = layer;
117b8e80941Smrg
118b8e80941Smrg      assert(info->view.array_len >= info->view.base_array_layer);
119b8e80941Smrg      info->view.array_len -= info->view.base_array_layer;
120b8e80941Smrg      info->z_offset = 0;
121b8e80941Smrg   }
122b8e80941Smrg
123b8e80941Smrg   /* Sandy Bridge and earlier have a limit of a maximum of 512 layers for
124b8e80941Smrg    * layered rendering.
125b8e80941Smrg    */
126b8e80941Smrg   if (is_render_target && blorp->isl_dev->info->gen <= 6)
127b8e80941Smrg      info->view.array_len = MIN2(info->view.array_len, 512);
128b8e80941Smrg
129b8e80941Smrg   if (surf->tile_x_sa || surf->tile_y_sa) {
130b8e80941Smrg      /* This is only allowed on simple 2D surfaces without MSAA */
131b8e80941Smrg      assert(info->surf.dim == ISL_SURF_DIM_2D);
132b8e80941Smrg      assert(info->surf.samples == 1);
133b8e80941Smrg      assert(info->surf.levels == 1);
134b8e80941Smrg      assert(info->surf.logical_level0_px.array_len == 1);
135b8e80941Smrg      assert(info->aux_usage == ISL_AUX_USAGE_NONE);
136b8e80941Smrg
137b8e80941Smrg      info->tile_x_sa = surf->tile_x_sa;
138b8e80941Smrg      info->tile_y_sa = surf->tile_y_sa;
139b8e80941Smrg
140b8e80941Smrg      /* Instead of using the X/Y Offset fields in RENDER_SURFACE_STATE, we
141b8e80941Smrg       * place the image at the tile boundary and offset our sampling or
142b8e80941Smrg       * rendering.  For this reason, we need to grow the image by the offset
143b8e80941Smrg       * to ensure that the hardware doesn't think we've gone past the edge.
144b8e80941Smrg       */
145b8e80941Smrg      info->surf.logical_level0_px.w += surf->tile_x_sa;
146b8e80941Smrg      info->surf.logical_level0_px.h += surf->tile_y_sa;
147b8e80941Smrg      info->surf.phys_level0_sa.w += surf->tile_x_sa;
148b8e80941Smrg      info->surf.phys_level0_sa.h += surf->tile_y_sa;
149b8e80941Smrg   }
150b8e80941Smrg}
151b8e80941Smrg
152b8e80941Smrg
153b8e80941Smrgvoid
154b8e80941Smrgblorp_params_init(struct blorp_params *params)
155b8e80941Smrg{
156b8e80941Smrg   memset(params, 0, sizeof(*params));
157b8e80941Smrg   params->num_samples = 1;
158b8e80941Smrg   params->num_draw_buffers = 1;
159b8e80941Smrg   params->num_layers = 1;
160b8e80941Smrg}
161b8e80941Smrg
162b8e80941Smrgvoid
163b8e80941Smrgbrw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key)
164b8e80941Smrg{
165b8e80941Smrg   memset(wm_key, 0, sizeof(*wm_key));
166b8e80941Smrg   wm_key->nr_color_regions = 1;
167b8e80941Smrg   for (int i = 0; i < MAX_SAMPLERS; i++)
168b8e80941Smrg      wm_key->tex.swizzles[i] = SWIZZLE_XYZW;
169b8e80941Smrg}
170b8e80941Smrg
171b8e80941Smrgconst unsigned *
172b8e80941Smrgblorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
173b8e80941Smrg                 struct nir_shader *nir,
174b8e80941Smrg                 struct brw_wm_prog_key *wm_key,
175b8e80941Smrg                 bool use_repclear,
176b8e80941Smrg                 struct brw_wm_prog_data *wm_prog_data)
177b8e80941Smrg{
178b8e80941Smrg   const struct brw_compiler *compiler = blorp->compiler;
179b8e80941Smrg
180b8e80941Smrg   nir->options =
181b8e80941Smrg      compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions;
182b8e80941Smrg
183b8e80941Smrg   memset(wm_prog_data, 0, sizeof(*wm_prog_data));
184b8e80941Smrg
185b8e80941Smrg   assert(exec_list_is_empty(&nir->uniforms));
186b8e80941Smrg   wm_prog_data->base.nr_params = 0;
187b8e80941Smrg   wm_prog_data->base.param = NULL;
188b8e80941Smrg
189b8e80941Smrg   /* BLORP always uses the first two binding table entries:
190b8e80941Smrg    * - Surface 0 is the render target (which always start from 0)
191b8e80941Smrg    * - Surface 1 is the source texture
192b8e80941Smrg    */
193b8e80941Smrg   wm_prog_data->base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX;
194b8e80941Smrg
195b8e80941Smrg   nir = brw_preprocess_nir(compiler, nir, NULL);
196b8e80941Smrg   nir_remove_dead_variables(nir, nir_var_shader_in);
197b8e80941Smrg   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
198b8e80941Smrg
199b8e80941Smrg   if (blorp->compiler->devinfo->gen < 6) {
200b8e80941Smrg      if (nir->info.fs.uses_discard)
201b8e80941Smrg         wm_key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT;
202b8e80941Smrg
203b8e80941Smrg      wm_key->input_slots_valid = nir->info.inputs_read | VARYING_BIT_POS;
204b8e80941Smrg   }
205b8e80941Smrg
206b8e80941Smrg   const unsigned *program =
207b8e80941Smrg      brw_compile_fs(compiler, blorp->driver_ctx, mem_ctx, wm_key,
208b8e80941Smrg                     wm_prog_data, nir, NULL, -1, -1, -1, false, use_repclear,
209b8e80941Smrg                     NULL, NULL);
210b8e80941Smrg
211b8e80941Smrg   return program;
212b8e80941Smrg}
213b8e80941Smrg
214b8e80941Smrgconst unsigned *
215b8e80941Smrgblorp_compile_vs(struct blorp_context *blorp, void *mem_ctx,
216b8e80941Smrg                 struct nir_shader *nir,
217b8e80941Smrg                 struct brw_vs_prog_data *vs_prog_data)
218b8e80941Smrg{
219b8e80941Smrg   const struct brw_compiler *compiler = blorp->compiler;
220b8e80941Smrg
221b8e80941Smrg   nir->options =
222b8e80941Smrg      compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions;
223b8e80941Smrg
224b8e80941Smrg   nir = brw_preprocess_nir(compiler, nir, NULL);
225b8e80941Smrg   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
226b8e80941Smrg
227b8e80941Smrg   vs_prog_data->inputs_read = nir->info.inputs_read;
228b8e80941Smrg
229b8e80941Smrg   brw_compute_vue_map(compiler->devinfo,
230b8e80941Smrg                       &vs_prog_data->base.vue_map,
231b8e80941Smrg                       nir->info.outputs_written,
232b8e80941Smrg                       nir->info.separate_shader);
233b8e80941Smrg
234b8e80941Smrg   struct brw_vs_prog_key vs_key = { 0, };
235b8e80941Smrg
236b8e80941Smrg   const unsigned *program =
237b8e80941Smrg      brw_compile_vs(compiler, blorp->driver_ctx, mem_ctx,
238b8e80941Smrg                     &vs_key, vs_prog_data, nir, -1, NULL);
239b8e80941Smrg
240b8e80941Smrg   return program;
241b8e80941Smrg}
242b8e80941Smrg
243b8e80941Smrgstruct blorp_sf_key {
244b8e80941Smrg   enum blorp_shader_type shader_type; /* Must be BLORP_SHADER_TYPE_GEN4_SF */
245b8e80941Smrg
246b8e80941Smrg   struct brw_sf_prog_key key;
247b8e80941Smrg};
248b8e80941Smrg
249b8e80941Smrgbool
250b8e80941Smrgblorp_ensure_sf_program(struct blorp_batch *batch,
251b8e80941Smrg                        struct blorp_params *params)
252b8e80941Smrg{
253b8e80941Smrg   struct blorp_context *blorp = batch->blorp;
254b8e80941Smrg   const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data;
255b8e80941Smrg   assert(params->wm_prog_data);
256b8e80941Smrg
257b8e80941Smrg   /* Gen6+ doesn't need a strips and fans program */
258b8e80941Smrg   if (blorp->compiler->devinfo->gen >= 6)
259b8e80941Smrg      return true;
260b8e80941Smrg
261b8e80941Smrg   struct blorp_sf_key key = {
262b8e80941Smrg      .shader_type = BLORP_SHADER_TYPE_GEN4_SF,
263b8e80941Smrg   };
264b8e80941Smrg
265b8e80941Smrg   /* Everything gets compacted in vertex setup, so we just need a
266b8e80941Smrg    * pass-through for the correct number of input varyings.
267b8e80941Smrg    */
268b8e80941Smrg   const uint64_t slots_valid = VARYING_BIT_POS |
269b8e80941Smrg      ((1ull << wm_prog_data->num_varying_inputs) - 1) << VARYING_SLOT_VAR0;
270b8e80941Smrg
271b8e80941Smrg   key.key.attrs = slots_valid;
272b8e80941Smrg   key.key.primitive = BRW_SF_PRIM_TRIANGLES;
273b8e80941Smrg   key.key.contains_flat_varying = wm_prog_data->contains_flat_varying;
274b8e80941Smrg
275b8e80941Smrg   STATIC_ASSERT(sizeof(key.key.interp_mode) ==
276b8e80941Smrg                 sizeof(wm_prog_data->interp_mode));
277b8e80941Smrg   memcpy(key.key.interp_mode, wm_prog_data->interp_mode,
278b8e80941Smrg          sizeof(key.key.interp_mode));
279b8e80941Smrg
280b8e80941Smrg   if (blorp->lookup_shader(batch, &key, sizeof(key),
281b8e80941Smrg                            &params->sf_prog_kernel, &params->sf_prog_data))
282b8e80941Smrg      return true;
283b8e80941Smrg
284b8e80941Smrg   void *mem_ctx = ralloc_context(NULL);
285b8e80941Smrg
286b8e80941Smrg   const unsigned *program;
287b8e80941Smrg   unsigned program_size;
288b8e80941Smrg
289b8e80941Smrg   struct brw_vue_map vue_map;
290b8e80941Smrg   brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false);
291b8e80941Smrg
292b8e80941Smrg   struct brw_sf_prog_data prog_data_tmp;
293b8e80941Smrg   program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key,
294b8e80941Smrg                            &prog_data_tmp, &vue_map, &program_size);
295b8e80941Smrg
296b8e80941Smrg   bool result =
297b8e80941Smrg      blorp->upload_shader(batch, &key, sizeof(key), program, program_size,
298b8e80941Smrg                           (void *)&prog_data_tmp, sizeof(prog_data_tmp),
299b8e80941Smrg                           &params->sf_prog_kernel, &params->sf_prog_data);
300b8e80941Smrg
301b8e80941Smrg   ralloc_free(mem_ctx);
302b8e80941Smrg
303b8e80941Smrg   return result;
304b8e80941Smrg}
305b8e80941Smrg
306b8e80941Smrgvoid
307b8e80941Smrgblorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf,
308b8e80941Smrg             uint32_t level, uint32_t start_layer, uint32_t num_layers,
309b8e80941Smrg             enum isl_aux_op op)
310b8e80941Smrg{
311b8e80941Smrg   struct blorp_params params;
312b8e80941Smrg   blorp_params_init(&params);
313b8e80941Smrg
314b8e80941Smrg   params.hiz_op = op;
315b8e80941Smrg   params.full_surface_hiz_op = true;
316b8e80941Smrg
317b8e80941Smrg   for (uint32_t a = 0; a < num_layers; a++) {
318b8e80941Smrg      const uint32_t layer = start_layer + a;
319b8e80941Smrg
320b8e80941Smrg      brw_blorp_surface_info_init(batch->blorp, &params.depth, surf, level,
321b8e80941Smrg                                  layer, surf->surf->format, true);
322b8e80941Smrg
323b8e80941Smrg      /* Align the rectangle primitive to 8x4 pixels.
324b8e80941Smrg       *
325b8e80941Smrg       * During fast depth clears, the emitted rectangle primitive  must be
326b8e80941Smrg       * aligned to 8x4 pixels.  From the Ivybridge PRM, Vol 2 Part 1 Section
327b8e80941Smrg       * 11.5.3.1 Depth Buffer Clear (and the matching section in the
328b8e80941Smrg       * Sandybridge PRM):
329b8e80941Smrg       *
330b8e80941Smrg       *     If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
331b8e80941Smrg       *     aligned to an 8x4 pixel block relative to the upper left corner
332b8e80941Smrg       *     of the depth buffer [...]
333b8e80941Smrg       *
334b8e80941Smrg       * For hiz resolves, the rectangle must also be 8x4 aligned. Item
335b8e80941Smrg       * WaHizAmbiguate8x4Aligned from the Haswell workarounds page and the
336b8e80941Smrg       * Ivybridge simulator require the alignment.
337b8e80941Smrg       *
338b8e80941Smrg       * To be safe, let's just align the rect for all hiz operations and all
339b8e80941Smrg       * hardware generations.
340b8e80941Smrg       *
341b8e80941Smrg       * However, for some miptree slices of a Z24 texture, emitting an 8x4
342b8e80941Smrg       * aligned rectangle that covers the slice may clobber adjacent slices
343b8e80941Smrg       * if we strictly adhered to the texture alignments specified in the
344b8e80941Smrg       * PRM.  The Ivybridge PRM, Section "Alignment Unit Size", states that
345b8e80941Smrg       * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24
346b8e80941Smrg       * surfaces, not 8. But commit 1f112cc increased the alignment from 4 to
347b8e80941Smrg       * 8, which prevents the clobbering.
348b8e80941Smrg       */
349b8e80941Smrg      params.x1 = minify(params.depth.surf.logical_level0_px.width,
350b8e80941Smrg                         params.depth.view.base_level);
351b8e80941Smrg      params.y1 = minify(params.depth.surf.logical_level0_px.height,
352b8e80941Smrg                         params.depth.view.base_level);
353b8e80941Smrg      params.x1 = ALIGN(params.x1, 8);
354b8e80941Smrg      params.y1 = ALIGN(params.y1, 4);
355b8e80941Smrg
356b8e80941Smrg      if (params.depth.view.base_level == 0) {
357b8e80941Smrg         /* TODO: What about MSAA? */
358b8e80941Smrg         params.depth.surf.logical_level0_px.width = params.x1;
359b8e80941Smrg         params.depth.surf.logical_level0_px.height = params.y1;
360b8e80941Smrg      }
361b8e80941Smrg
362b8e80941Smrg      params.dst.surf.samples = params.depth.surf.samples;
363b8e80941Smrg      params.dst.surf.logical_level0_px = params.depth.surf.logical_level0_px;
364b8e80941Smrg      params.depth_format =
365b8e80941Smrg         isl_format_get_depth_format(surf->surf->format, false);
366b8e80941Smrg      params.num_samples = params.depth.surf.samples;
367b8e80941Smrg
368b8e80941Smrg      batch->blorp->exec(batch, &params);
369b8e80941Smrg   }
370b8e80941Smrg}
371