1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_context.h"
25#include "brw_state.h"
26#include "brw_defines.h"
27#include "brw_program.h"
28#include "intel_batchbuffer.h"
29#include "intel_buffer_objects.h"
30#include "program/prog_parameter.h"
31#include "main/shaderapi.h"
32
33static uint32_t
34f_as_u32(float f)
35{
36   union fi fi = { .f = f };
37   return fi.ui;
38}
39
40static uint32_t
41brw_param_value(struct brw_context *brw,
42                const struct gl_program *prog,
43                const struct brw_stage_state *stage_state,
44                uint32_t param)
45{
46   struct gl_context *ctx = &brw->ctx;
47
48   switch (BRW_PARAM_DOMAIN(param)) {
49   case BRW_PARAM_DOMAIN_BUILTIN:
50      if (param == BRW_PARAM_BUILTIN_ZERO) {
51         return 0;
52      } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) {
53         gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
54         unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param);
55         unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param);
56         return ((uint32_t *)clip_planes[idx])[comp];
57      } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X &&
58                 param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) {
59         unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
60         return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]);
61      } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) {
62         return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]);
63      } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) {
64         return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]);
65      } else {
66         unreachable("Invalid param builtin");
67      }
68
69   case BRW_PARAM_DOMAIN_PARAMETER: {
70      unsigned idx = BRW_PARAM_PARAMETER_IDX(param);
71      unsigned offset = prog->Parameters->ParameterValueOffset[idx];
72      unsigned comp = BRW_PARAM_PARAMETER_COMP(param);
73      assert(idx < prog->Parameters->NumParameters);
74      return prog->Parameters->ParameterValues[offset + comp].u;
75   }
76
77   case BRW_PARAM_DOMAIN_UNIFORM: {
78      unsigned idx = BRW_PARAM_UNIFORM_IDX(param);
79      assert(idx < prog->sh.data->NumUniformDataSlots);
80      return prog->sh.data->UniformDataSlots[idx].u;
81   }
82
83   case BRW_PARAM_DOMAIN_IMAGE: {
84      unsigned idx = BRW_PARAM_IMAGE_IDX(param);
85      unsigned offset = BRW_PARAM_IMAGE_OFFSET(param);
86      assert(offset < ARRAY_SIZE(stage_state->image_param));
87      return ((uint32_t *)&stage_state->image_param[idx])[offset];
88   }
89
90   default:
91      unreachable("Invalid param domain");
92   }
93}
94
95
96void
97brw_populate_constant_data(struct brw_context *brw,
98                           const struct gl_program *prog,
99                           const struct brw_stage_state *stage_state,
100                           void *void_dst,
101                           const uint32_t *param,
102                           unsigned nr_params)
103{
104   uint32_t *dst = void_dst;
105   for (unsigned i = 0; i < nr_params; i++)
106      dst[i] = brw_param_value(brw, prog, stage_state, param[i]);
107}
108
109
110/**
111 * Creates a streamed BO containing the push constants for the VS or GS on
112 * gen6+.
113 *
114 * Push constants are constant values (such as GLSL uniforms) that are
115 * pre-loaded into a shader stage's register space at thread spawn time.
116 *
117 * Not all GLSL uniforms will be uploaded as push constants: The hardware has
118 * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be
119 * uploaded as push constants, while GL 4.4 requires at least 1024 components
120 * to be usable for the VS.  Plus, currently we always use pull constants
121 * instead of push constants when doing variable-index array access.
122 *
123 * See brw_curbe.c for the equivalent gen4/5 code.
124 */
125void
126gen6_upload_push_constants(struct brw_context *brw,
127                           const struct gl_program *prog,
128                           const struct brw_stage_prog_data *prog_data,
129                           struct brw_stage_state *stage_state)
130{
131   const struct gen_device_info *devinfo = &brw->screen->devinfo;
132   struct gl_context *ctx = &brw->ctx;
133
134   bool active = prog_data &&
135      (stage_state->stage != MESA_SHADER_TESS_CTRL ||
136       brw->programs[MESA_SHADER_TESS_EVAL]);
137
138   if (active)
139      _mesa_shader_write_subroutine_indices(ctx, stage_state->stage);
140
141   if (!active || prog_data->nr_params == 0) {
142      stage_state->push_const_size = 0;
143   } else {
144      /* Updates the ParamaterValues[i] pointers for all parameters of the
145       * basic type of PROGRAM_STATE_VAR.
146       */
147      /* XXX: Should this happen somewhere before to get our state flag set? */
148      if (prog)
149         _mesa_load_state_parameters(ctx, prog->Parameters);
150
151      int i;
152      const int size = prog_data->nr_params * sizeof(gl_constant_value);
153      gl_constant_value *param;
154      if (devinfo->gen >= 8 || devinfo->is_haswell) {
155         param = brw_upload_space(&brw->upload, size, 32,
156                                  &stage_state->push_const_bo,
157                                  &stage_state->push_const_offset);
158      } else {
159         param = brw_state_batch(brw, size, 32,
160                                 &stage_state->push_const_offset);
161      }
162
163      STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
164
165      /* _NEW_PROGRAM_CONSTANTS
166       *
167       * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
168       * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
169       * wouldn't be set for them.
170       */
171      brw_populate_constant_data(brw, prog, stage_state, param,
172                                 prog_data->param,
173                                 prog_data->nr_params);
174
175      if (0) {
176         fprintf(stderr, "%s constants:\n",
177                 _mesa_shader_stage_to_string(stage_state->stage));
178         for (i = 0; i < prog_data->nr_params; i++) {
179            if ((i & 7) == 0)
180               fprintf(stderr, "g%d: ",
181                       prog_data->dispatch_grf_start_reg + i / 8);
182            fprintf(stderr, "%8f ", param[i].f);
183            if ((i & 7) == 7)
184               fprintf(stderr, "\n");
185         }
186         if ((i & 7) != 0)
187            fprintf(stderr, "\n");
188         fprintf(stderr, "\n");
189      }
190
191      stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
192      /* We can only push 32 registers of constants at a time. */
193
194      /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS:
195       *
196       *     "The sum of all four read length fields (each incremented to
197       *      represent the actual read length) must be less than or equal to
198       *      32"
199       *
200       * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS:
201       *
202       *     "The sum of all four read length fields must be less than or
203       *      equal to the size of 64"
204       *
205       * The other shader stages all match the VS's limits.
206       */
207      assert(stage_state->push_const_size <= 32);
208   }
209
210   stage_state->push_constants_dirty = true;
211}
212
213
214/**
215 * Creates a temporary BO containing the pull constant data for the shader
216 * stage, and the SURFACE_STATE struct that points at it.
217 *
218 * Pull constants are GLSL uniforms (and other constant data) beyond what we
219 * could fit as push constants, or that have variable-index array access
220 * (which is easiest to support using pull constants, and avoids filling
221 * register space with mostly-unused data).
222 *
223 * Compare this path to brw_curbe.c for gen4/5 push constants, and
224 * gen6_vs_state.c for gen6+ push constants.
225 */
226void
227brw_upload_pull_constants(struct brw_context *brw,
228                          GLbitfield64 brw_new_constbuf,
229                          const struct gl_program *prog,
230                          struct brw_stage_state *stage_state,
231                          const struct brw_stage_prog_data *prog_data)
232{
233   unsigned i;
234   uint32_t surf_index = prog_data->binding_table.pull_constants_start;
235
236   if (!prog_data->nr_pull_params) {
237      if (stage_state->surf_offset[surf_index]) {
238	 stage_state->surf_offset[surf_index] = 0;
239	 brw->ctx.NewDriverState |= brw_new_constbuf;
240      }
241      return;
242   }
243
244   /* Updates the ParamaterValues[i] pointers for all parameters of the
245    * basic type of PROGRAM_STATE_VAR.
246    */
247   _mesa_load_state_parameters(&brw->ctx, prog->Parameters);
248
249   /* BRW_NEW_*_PROG_DATA | _NEW_PROGRAM_CONSTANTS */
250   uint32_t size = prog_data->nr_pull_params * 4;
251   struct brw_bo *const_bo = NULL;
252   uint32_t const_offset;
253   gl_constant_value *constants = brw_upload_space(&brw->upload, size, 64,
254                                                   &const_bo, &const_offset);
255
256   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
257
258   brw_populate_constant_data(brw, prog, stage_state, constants,
259                              prog_data->pull_param,
260                              prog_data->nr_pull_params);
261
262   if (0) {
263      for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
264	 const gl_constant_value *row = &constants[i * 4];
265	 fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
266                 i, row[0].f, row[1].f, row[2].f, row[3].f);
267      }
268   }
269
270   brw_emit_buffer_surface_state(brw, &stage_state->surf_offset[surf_index],
271                                 const_bo, const_offset,
272                                 ISL_FORMAT_R32G32B32A32_FLOAT,
273                                 size, 1, 0);
274
275   brw_bo_unreference(const_bo);
276
277   brw->ctx.NewDriverState |= brw_new_constbuf;
278}
279
280/**
281 * Creates a region containing the push constants for the CS on gen7+.
282 *
283 * Push constants are constant values (such as GLSL uniforms) that are
284 * pre-loaded into a shader stage's register space at thread spawn time.
285 *
286 * For other stages, see brw_curbe.c:brw_upload_constant_buffer for the
287 * equivalent gen4/5 code and gen6_vs_state.c:gen6_upload_push_constants for
288 * gen6+.
289 */
290void
291brw_upload_cs_push_constants(struct brw_context *brw,
292                             const struct gl_program *prog,
293                             const struct brw_cs_prog_data *cs_prog_data,
294                             struct brw_stage_state *stage_state)
295{
296   struct gl_context *ctx = &brw->ctx;
297   const struct brw_stage_prog_data *prog_data =
298      (struct brw_stage_prog_data*) cs_prog_data;
299
300   /* Updates the ParamaterValues[i] pointers for all parameters of the
301    * basic type of PROGRAM_STATE_VAR.
302    */
303   /* XXX: Should this happen somewhere before to get our state flag set? */
304   _mesa_load_state_parameters(ctx, prog->Parameters);
305
306   if (cs_prog_data->push.total.size == 0) {
307      stage_state->push_const_size = 0;
308      return;
309   }
310
311
312   uint32_t *param =
313      brw_state_batch(brw, ALIGN(cs_prog_data->push.total.size, 64),
314                      64, &stage_state->push_const_offset);
315   assert(param);
316
317   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
318
319   if (cs_prog_data->push.cross_thread.size > 0) {
320      uint32_t *param_copy = param;
321      for (unsigned i = 0;
322           i < cs_prog_data->push.cross_thread.dwords;
323           i++) {
324         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
325         param_copy[i] = brw_param_value(brw, prog, stage_state,
326                                         prog_data->param[i]);
327      }
328   }
329
330   if (cs_prog_data->push.per_thread.size > 0) {
331      for (unsigned t = 0; t < cs_prog_data->threads; t++) {
332         unsigned dst =
333            8 * (cs_prog_data->push.per_thread.regs * t +
334                 cs_prog_data->push.cross_thread.regs);
335         unsigned src = cs_prog_data->push.cross_thread.dwords;
336         for ( ; src < prog_data->nr_params; src++, dst++) {
337            if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
338               param[dst] = t;
339            } else {
340               param[dst] = brw_param_value(brw, prog, stage_state,
341                                            prog_data->param[src]);
342            }
343         }
344      }
345   }
346
347   stage_state->push_const_size =
348      cs_prog_data->push.cross_thread.regs +
349      cs_prog_data->push.per_thread.regs;
350}
351