1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_context.h"
25#include "brw_cs.h"
26#include "brw_state.h"
27#include "brw_defines.h"
28#include "brw_program.h"
29#include "brw_batch.h"
30#include "brw_buffer_objects.h"
31#include "program/prog_parameter.h"
32#include "main/shaderapi.h"
33
34static uint32_t
35f_as_u32(float f)
36{
37   union fi fi = { .f = f };
38   return fi.ui;
39}
40
41static uint32_t
42brw_param_value(struct brw_context *brw,
43                const struct gl_program *prog,
44                const struct brw_stage_state *stage_state,
45                uint32_t param)
46{
47   struct gl_context *ctx = &brw->ctx;
48
49   switch (BRW_PARAM_DOMAIN(param)) {
50   case BRW_PARAM_DOMAIN_BUILTIN:
51      if (param == BRW_PARAM_BUILTIN_ZERO) {
52         return 0;
53      } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) {
54         gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
55         unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param);
56         unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param);
57         return ((uint32_t *)clip_planes[idx])[comp];
58      } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
59                 param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
60         unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
61         return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]);
62      } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
63         return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]);
64      } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
65         return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]);
66      } else if (param >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X &&
67                 param <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) {
68         unsigned i = param - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X;
69         return brw->compute.group_size[i];
70      } else {
71         unreachable("Invalid param builtin");
72      }
73
74   case BRW_PARAM_DOMAIN_PARAMETER: {
75      unsigned idx = BRW_PARAM_PARAMETER_IDX(param);
76      unsigned offset = prog->Parameters->Parameters[idx].ValueOffset;
77      unsigned comp = BRW_PARAM_PARAMETER_COMP(param);
78      assert(idx < prog->Parameters->NumParameters);
79      return prog->Parameters->ParameterValues[offset + comp].u;
80   }
81
82   case BRW_PARAM_DOMAIN_UNIFORM: {
83      unsigned idx = BRW_PARAM_UNIFORM_IDX(param);
84      assert(idx < prog->sh.data->NumUniformDataSlots);
85      return prog->sh.data->UniformDataSlots[idx].u;
86   }
87
88   case BRW_PARAM_DOMAIN_IMAGE: {
89      unsigned idx = BRW_PARAM_IMAGE_IDX(param);
90      unsigned offset = BRW_PARAM_IMAGE_OFFSET(param);
91      assert(offset < ARRAY_SIZE(stage_state->image_param));
92      return ((uint32_t *)&stage_state->image_param[idx])[offset];
93   }
94
95   default:
96      unreachable("Invalid param domain");
97   }
98}
99
100
101void
102brw_populate_constant_data(struct brw_context *brw,
103                           const struct gl_program *prog,
104                           const struct brw_stage_state *stage_state,
105                           void *void_dst,
106                           const uint32_t *param,
107                           unsigned nr_params)
108{
109   uint32_t *dst = void_dst;
110   for (unsigned i = 0; i < nr_params; i++)
111      dst[i] = brw_param_value(brw, prog, stage_state, param[i]);
112}
113
114
115/**
116 * Creates a streamed BO containing the push constants for the VS or GS on
117 * gfx6+.
118 *
119 * Push constants are constant values (such as GLSL uniforms) that are
120 * pre-loaded into a shader stage's register space at thread spawn time.
121 *
122 * Not all GLSL uniforms will be uploaded as push constants: The hardware has
123 * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
124 * uploaded as push constants, while GL 4.4 requires at least 1024 components
125 * to be usable for the VS.  Plus, currently we always use pull constants
126 * instead of push constants when doing variable-index array access.
127 *
128 * See brw_curbe.c for the equivalent gfx4/5 code.
129 */
130void
131gfx6_upload_push_constants(struct brw_context *brw,
132                           const struct gl_program *prog,
133                           const struct brw_stage_prog_data *prog_data,
134                           struct brw_stage_state *stage_state)
135{
136   const struct intel_device_info *devinfo = &brw->screen->devinfo;
137   struct gl_context *ctx = &brw->ctx;
138
139   bool active = prog_data &&
140      (stage_state->stage != MESA_SHADER_TESS_CTRL ||
141       brw->programs[MESA_SHADER_TESS_EVAL]);
142
143   if (active)
144      _mesa_shader_write_subroutine_indices(ctx, stage_state->stage);
145
146   if (!active || prog_data->nr_params == 0) {
147      stage_state->push_const_size = 0;
148   } else {
149      /* Updates the ParamaterValues[i] pointers for all parameters of the
150       * basic type of PROGRAM_STATE_VAR.
151       */
152      /* XXX: Should this happen somewhere before to get our state flag set? */
153      if (prog)
154         _mesa_load_state_parameters(ctx, prog->Parameters);
155
156      int i;
157      const int size = prog_data->nr_params * sizeof(gl_constant_value);
158      gl_constant_value *param;
159      if (devinfo->verx10 >= 75) {
160         param = brw_upload_space(&brw->upload, size, 32,
161                                  &stage_state->push_const_bo,
162                                  &stage_state->push_const_offset);
163      } else {
164         param = brw_state_batch(brw, size, 32,
165                                 &stage_state->push_const_offset);
166      }
167
168      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
169
170      /* _NEW_PROGRAM_CONSTANTS
171       *
172       * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
173       * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
174       * wouldn't be set for them.
175       */
176      brw_populate_constant_data(brw, prog, stage_state, param,
177                                 prog_data->param,
178                                 prog_data->nr_params);
179
180      if (0) {
181         fprintf(stderr, "%s constants:\n",
182                 _mesa_shader_stage_to_string(stage_state->stage));
183         for (i = 0; i < prog_data->nr_params; i++) {
184            if ((i & 7) == 0)
185               fprintf(stderr, "g%d: ",
186                       prog_data->dispatch_grf_start_reg + i / 8);
187            fprintf(stderr, "%8f ", param[i].f);
188            if ((i & 7) == 7)
189               fprintf(stderr, "\n");
190         }
191         if ((i & 7) != 0)
192            fprintf(stderr, "\n");
193         fprintf(stderr, "\n");
194      }
195
196      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
197      /* We can only push 32 registers of constants at a time. */
198
199      /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
200       *
201       *     "The sum of all four read length fields (each incremented to
202       *      represent the actual read length) must be less than or equal to
203       *      32"
204       *
205       * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
206       *
207       *     "The sum of all four read length fields must be less than or
208       *      equal to the size of 64"
209       *
210       * The other shader stages all match the VS's limits.
211       */
212      assert(stage_state->push_const_size <= 32);
213   }
214
215   stage_state->push_constants_dirty = true;
216}
217
218
219/**
220 * Creates a temporary BO containing the pull constant data for the shader
221 * stage, and the SURFACE_STATE struct that points at it.
222 *
223 * Pull constants are GLSL uniforms (and other constant data) beyond what we
224 * could fit as push constants, or that have variable-index array access
225 * (which is easiest to support using pull constants, and avoids filling
226 * register space with mostly-unused data).
227 *
228 * Compare this path to brw_curbe.c for gfx4/5 push constants, and
229 * gfx6_vs_state.c for gfx6+ push constants.
230 */
231void
232brw_upload_pull_constants(struct brw_context *brw,
233                          GLbitfield64 brw_new_constbuf,
234                          const struct gl_program *prog,
235                          struct brw_stage_state *stage_state,
236                          const struct brw_stage_prog_data *prog_data)
237{
238   unsigned i;
239   uint32_t surf_index = prog_data->binding_table.pull_constants_start;
240
241   if (!prog_data->nr_pull_params) {
242      if (stage_state->surf_offset[surf_index]) {
243         stage_state->surf_offset[surf_index] = 0;
244         brw->ctx.NewDriverState |= brw_new_constbuf;
245      }
246      return;
247   }
248
249   /* Updates the ParamaterValues[i] pointers for all parameters of the
250    * basic type of PROGRAM_STATE_VAR.
251    */
252   _mesa_load_state_parameters(&brw->ctx, prog->Parameters);
253
254   /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */
255   uint32_t size = prog_data->nr_pull_params * 4;
256   struct brw_bo *const_bo = NULL;
257   uint32_t const_offset;
258   gl_constant_value *constants = brw_upload_space(&brw->upload, size, 64,
259                                                   &const_bo, &const_offset);
260
261   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
262
263   brw_populate_constant_data(brw, prog, stage_state, constants,
264                              prog_data->pull_param,
265                              prog_data->nr_pull_params);
266
267   if (0) {
268      for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
269         const gl_constant_value *row = &constants[i * 4];
270         fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
271                 i, row[0].f, row[1].f, row[2].f, row[3].f);
272      }
273   }
274
275   brw_emit_buffer_surface_state(brw, &stage_state->surf_offset[surf_index],
276                                 const_bo, const_offset,
277                                 ISL_FORMAT_R32G32B32A32_FLOAT,
278                                 size, 1, 0);
279
280   brw_bo_unreference(const_bo);
281
282   brw->ctx.NewDriverState |= brw_new_constbuf;
283}
284
285/**
286 * Creates a region containing the push constants for the CS on gfx7+.
287 *
288 * Push constants are constant values (such as GLSL uniforms) that are
289 * pre-loaded into a shader stage's register space at thread spawn time.
290 *
291 * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
292 * equivalent gfx4/5 code and gfx6_vs_state.c:gfx6_upload_push_constants for
293 * gfx6+.
294 */
295void
296brw_upload_cs_push_constants(struct brw_context *brw,
297                             const struct gl_program *prog,
298                             const struct brw_cs_prog_data *cs_prog_data,
299                             struct brw_stage_state *stage_state)
300{
301   struct gl_context *ctx = &brw->ctx;
302   const struct brw_stage_prog_data *prog_data =
303      (struct brw_stage_prog_data*) cs_prog_data;
304
305   /* Updates the ParamaterValues[i] pointers for all parameters of the
306    * basic type of PROGRAM_STATE_VAR.
307    */
308   /* XXX: Should this happen somewhere before to get our state flag set? */
309   _mesa_load_state_parameters(ctx, prog->Parameters);
310
311   const struct brw_cs_dispatch_info dispatch =
312      brw_cs_get_dispatch_info(&brw->screen->devinfo, cs_prog_data,
313                               brw->compute.group_size);
314   const unsigned push_const_size =
315      brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
316
317   if (push_const_size == 0) {
318      stage_state->push_const_size = 0;
319      return;
320   }
321
322
323   uint32_t *param =
324      brw_state_batch(brw, ALIGN(push_const_size, 64),
325                      64, &stage_state->push_const_offset);
326   assert(param);
327
328   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
329
330   if (cs_prog_data->push.cross_thread.size > 0) {
331      uint32_t *param_copy = param;
332      for (unsigned i = 0;
333           i < cs_prog_data->push.cross_thread.dwords;
334           i++) {
335         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
336         param_copy[i] = brw_param_value(brw, prog, stage_state,
337                                         prog_data->param[i]);
338      }
339   }
340
341   if (cs_prog_data->push.per_thread.size > 0) {
342      for (unsigned t = 0; t < dispatch.threads; t++) {
343         unsigned dst =
344            8 * (cs_prog_data->push.per_thread.regs * t +
345                 cs_prog_data->push.cross_thread.regs);
346         unsigned src = cs_prog_data->push.cross_thread.dwords;
347         for ( ; src < prog_data->nr_params; src++, dst++) {
348            if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
349               param[dst] = t;
350            } else {
351               param[dst] = brw_param_value(brw, prog, stage_state,
352                                            prog_data->param[src]);
353            }
354         }
355      }
356   }
357
358   stage_state->push_const_size =
359      cs_prog_data->push.cross_thread.regs +
360      cs_prog_data->push.per_thread.regs;
361}
362