1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "glheader.h"
25#include "bufferobj.h"
26#include "compute.h"
27#include "context.h"
28
29static bool
30check_valid_to_compute(struct gl_context *ctx, const char *function)
31{
32   if (!_mesa_has_compute_shaders(ctx)) {
33      _mesa_error(ctx, GL_INVALID_OPERATION,
34                  "unsupported function (%s) called",
35                  function);
36      return false;
37   }
38
39   /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
40    *
41    * "An INVALID_OPERATION error is generated if there is no active program
42    *  for the compute shader stage."
43    */
44   if (ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE] == NULL) {
45      _mesa_error(ctx, GL_INVALID_OPERATION,
46                  "%s(no active compute shader)",
47                  function);
48      return false;
49   }
50
51   return true;
52}
53
54static bool
55validate_DispatchCompute(struct gl_context *ctx, const GLuint *num_groups)
56{
57   if (!check_valid_to_compute(ctx, "glDispatchCompute"))
58      return GL_FALSE;
59
60   for (int i = 0; i < 3; i++) {
61      /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
62       *
63       * "An INVALID_VALUE error is generated if any of num_groups_x,
64       *  num_groups_y and num_groups_z are greater than or equal to the
65       *  maximum work group count for the corresponding dimension."
66       *
67       * However, the "or equal to" portions appears to be a specification
68       * bug. In all other areas, the specification appears to indicate that
69       * the number of workgroups can match the MAX_COMPUTE_WORK_GROUP_COUNT
70       * value. For example, under DispatchComputeIndirect:
71       *
72       * "If any of num_groups_x, num_groups_y or num_groups_z is greater than
73       *  the value of MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding
74       *  dimension then the results are undefined."
75       *
76       * Additionally, the OpenGLES 3.1 specification does not contain "or
77       * equal to" as an error condition.
78       */
79      if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
80         _mesa_error(ctx, GL_INVALID_VALUE,
81                     "glDispatchCompute(num_groups_%c)", 'x' + i);
82         return GL_FALSE;
83      }
84   }
85
86   /* The ARB_compute_variable_group_size spec says:
87    *
88    * "An INVALID_OPERATION error is generated by DispatchCompute if the active
89    *  program for the compute shader stage has a variable work group size."
90    */
91   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
92   if (prog->info.workgroup_size_variable) {
93      _mesa_error(ctx, GL_INVALID_OPERATION,
94                  "glDispatchCompute(variable work group size forbidden)");
95      return GL_FALSE;
96   }
97
98   return GL_TRUE;
99}
100
101static bool
102validate_DispatchComputeGroupSizeARB(struct gl_context *ctx,
103                                     const GLuint *num_groups,
104                                     const GLuint *group_size)
105{
106   if (!check_valid_to_compute(ctx, "glDispatchComputeGroupSizeARB"))
107      return GL_FALSE;
108
109   /* The ARB_compute_variable_group_size spec says:
110    *
111    * "An INVALID_OPERATION error is generated by
112    *  DispatchComputeGroupSizeARB if the active program for the compute
113    *  shader stage has a fixed work group size."
114    */
115   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
116   if (!prog->info.workgroup_size_variable) {
117      _mesa_error(ctx, GL_INVALID_OPERATION,
118                  "glDispatchComputeGroupSizeARB(fixed work group size "
119                  "forbidden)");
120      return GL_FALSE;
121   }
122
123   for (int i = 0; i < 3; i++) {
124      /* The ARB_compute_variable_group_size spec says:
125       *
126       * "An INVALID_VALUE error is generated if any of num_groups_x,
127       *  num_groups_y and num_groups_z are greater than or equal to the
128       *  maximum work group count for the corresponding dimension."
129       */
130      if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
131         _mesa_error(ctx, GL_INVALID_VALUE,
132                     "glDispatchComputeGroupSizeARB(num_groups_%c)", 'x' + i);
133         return GL_FALSE;
134      }
135
136      /* The ARB_compute_variable_group_size spec says:
137       *
138       * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
139       *  any of <group_size_x>, <group_size_y>, or <group_size_z> is less than
140       *  or equal to zero or greater than the maximum local work group size
141       *  for compute shaders with variable group size
142       *  (MAX_COMPUTE_VARIABLE_GROUP_SIZE_ARB) in the corresponding
143       *  dimension."
144       *
145       * However, the "less than" is a spec bug because they are declared as
146       * unsigned integers.
147       */
148      if (group_size[i] == 0 ||
149          group_size[i] > ctx->Const.MaxComputeVariableGroupSize[i]) {
150         _mesa_error(ctx, GL_INVALID_VALUE,
151                     "glDispatchComputeGroupSizeARB(group_size_%c)", 'x' + i);
152         return GL_FALSE;
153      }
154   }
155
156   /* The ARB_compute_variable_group_size spec says:
157    *
158    * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
159    *  the product of <group_size_x>, <group_size_y>, and <group_size_z> exceeds
160    *  the implementation-dependent maximum local work group invocation count
161    *  for compute shaders with variable group size
162    *  (MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB)."
163    */
164   uint64_t total_invocations = group_size[0] * group_size[1];
165   if (total_invocations <= UINT32_MAX) {
166      /* Only bother multiplying the third value if total still fits in
167       * 32-bit, since MaxComputeVariableGroupInvocations is also 32-bit.
168       */
169      total_invocations *= group_size[2];
170   }
171   if (total_invocations > ctx->Const.MaxComputeVariableGroupInvocations) {
172      _mesa_error(ctx, GL_INVALID_VALUE,
173                  "glDispatchComputeGroupSizeARB(product of local_sizes "
174                  "exceeds MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB "
175                  "(%u * %u * %u > %u))",
176                  group_size[0], group_size[1], group_size[2],
177                  ctx->Const.MaxComputeVariableGroupInvocations);
178      return GL_FALSE;
179   }
180
181   /* The NV_compute_shader_derivatives spec says:
182    *
183    * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
184    *  the active program for the compute shader stage has a compute shader
185    *  using the "derivative_group_quadsNV" layout qualifier and
186    *  <group_size_x> or <group_size_y> is not a multiple of two.
187    *
188    *  An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
189    *  the active program for the compute shader stage has a compute shader
190    *  using the "derivative_group_linearNV" layout qualifier and the product
191    *  of <group_size_x>, <group_size_y>, and <group_size_z> is not a multiple
192    *  of four."
193    */
194   if (prog->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS &&
195       ((group_size[0] & 1) || (group_size[1] & 1))) {
196      _mesa_error(ctx, GL_INVALID_VALUE,
197                  "glDispatchComputeGroupSizeARB(derivative_group_quadsNV "
198                  "requires group_size_x (%d) and group_size_y (%d) to be "
199                  "divisble by 2)", group_size[0], group_size[1]);
200      return GL_FALSE;
201   }
202
203   if (prog->info.cs.derivative_group == DERIVATIVE_GROUP_LINEAR &&
204       total_invocations & 3) {
205      _mesa_error(ctx, GL_INVALID_VALUE,
206                  "glDispatchComputeGroupSizeARB(derivative_group_linearNV "
207                  "requires product of group sizes (%"PRIu64") to be divisible "
208                  "by 4)", total_invocations);
209      return GL_FALSE;
210   }
211
212   return GL_TRUE;
213}
214
215static bool
216valid_dispatch_indirect(struct gl_context *ctx,  GLintptr indirect)
217{
218   GLsizei size = 3 * sizeof(GLuint);
219   const uint64_t end = (uint64_t) indirect + size;
220   const char *name = "glDispatchComputeIndirect";
221
222   if (!check_valid_to_compute(ctx, name))
223      return GL_FALSE;
224
225   /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
226    *
227    * "An INVALID_VALUE error is generated if indirect is negative or is not a
228    *  multiple of four."
229    */
230   if (indirect & (sizeof(GLuint) - 1)) {
231      _mesa_error(ctx, GL_INVALID_VALUE,
232                  "%s(indirect is not aligned)", name);
233      return GL_FALSE;
234   }
235
236   if (indirect < 0) {
237      _mesa_error(ctx, GL_INVALID_VALUE,
238                  "%s(indirect is less than zero)", name);
239      return GL_FALSE;
240   }
241
242   /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
243    *
244    * "An INVALID_OPERATION error is generated if no buffer is bound to the
245    *  DRAW_INDIRECT_BUFFER binding, or if the command would source data
246    *  beyond the end of the buffer object."
247    */
248   if (!ctx->DispatchIndirectBuffer) {
249      _mesa_error(ctx, GL_INVALID_OPERATION,
250                  "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
251      return GL_FALSE;
252   }
253
254   if (_mesa_check_disallowed_mapping(ctx->DispatchIndirectBuffer)) {
255      _mesa_error(ctx, GL_INVALID_OPERATION,
256                  "%s(DISPATCH_INDIRECT_BUFFER is mapped)", name);
257      return GL_FALSE;
258   }
259
260   if (ctx->DispatchIndirectBuffer->Size < end) {
261      _mesa_error(ctx, GL_INVALID_OPERATION,
262                  "%s(DISPATCH_INDIRECT_BUFFER too small)", name);
263      return GL_FALSE;
264   }
265
266   /* The ARB_compute_variable_group_size spec says:
267    *
268    * "An INVALID_OPERATION error is generated if the active program for the
269    *  compute shader stage has a variable work group size."
270    */
271   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
272   if (prog->info.workgroup_size_variable) {
273      _mesa_error(ctx, GL_INVALID_OPERATION,
274                  "%s(variable work group size forbidden)", name);
275      return GL_FALSE;
276   }
277
278   return GL_TRUE;
279}
280
281static ALWAYS_INLINE void
282dispatch_compute(GLuint num_groups_x, GLuint num_groups_y,
283                 GLuint num_groups_z, bool no_error)
284{
285   GET_CURRENT_CONTEXT(ctx);
286   const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
287
288   FLUSH_VERTICES(ctx, 0, 0);
289
290   if (MESA_VERBOSE & VERBOSE_API)
291      _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n",
292                  num_groups_x, num_groups_y, num_groups_z);
293
294   if (!no_error && !validate_DispatchCompute(ctx, num_groups))
295      return;
296
297   if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
298       return;
299
300   ctx->Driver.DispatchCompute(ctx, num_groups);
301
302   if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
303      _mesa_flush(ctx);
304}
305
306void GLAPIENTRY
307_mesa_DispatchCompute_no_error(GLuint num_groups_x, GLuint num_groups_y,
308                               GLuint num_groups_z)
309{
310   dispatch_compute(num_groups_x, num_groups_y, num_groups_z, true);
311}
312
313void GLAPIENTRY
314_mesa_DispatchCompute(GLuint num_groups_x,
315                      GLuint num_groups_y,
316                      GLuint num_groups_z)
317{
318   dispatch_compute(num_groups_x, num_groups_y, num_groups_z, false);
319}
320
321static ALWAYS_INLINE void
322dispatch_compute_indirect(GLintptr indirect, bool no_error)
323{
324   GET_CURRENT_CONTEXT(ctx);
325
326   FLUSH_VERTICES(ctx, 0, 0);
327
328   if (MESA_VERBOSE & VERBOSE_API)
329      _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect);
330
331   if (!no_error && !valid_dispatch_indirect(ctx, indirect))
332      return;
333
334   ctx->Driver.DispatchComputeIndirect(ctx, indirect);
335
336   if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
337      _mesa_flush(ctx);
338}
339
340extern void GLAPIENTRY
341_mesa_DispatchComputeIndirect_no_error(GLintptr indirect)
342{
343   dispatch_compute_indirect(indirect, true);
344}
345
346extern void GLAPIENTRY
347_mesa_DispatchComputeIndirect(GLintptr indirect)
348{
349   dispatch_compute_indirect(indirect, false);
350}
351
352static ALWAYS_INLINE void
353dispatch_compute_group_size(GLuint num_groups_x, GLuint num_groups_y,
354                            GLuint num_groups_z, GLuint group_size_x,
355                            GLuint group_size_y, GLuint group_size_z,
356                            bool no_error)
357{
358   GET_CURRENT_CONTEXT(ctx);
359   const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
360   const GLuint group_size[3] = { group_size_x, group_size_y, group_size_z };
361
362   FLUSH_VERTICES(ctx, 0, 0);
363
364   if (MESA_VERBOSE & VERBOSE_API)
365      _mesa_debug(ctx,
366                  "glDispatchComputeGroupSizeARB(%d, %d, %d, %d, %d, %d)\n",
367                  num_groups_x, num_groups_y, num_groups_z,
368                  group_size_x, group_size_y, group_size_z);
369
370   if (!no_error &&
371       !validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size))
372      return;
373
374   if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
375       return;
376
377   ctx->Driver.DispatchComputeGroupSize(ctx, num_groups, group_size);
378
379   if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
380      _mesa_flush(ctx);
381}
382
383void GLAPIENTRY
384_mesa_DispatchComputeGroupSizeARB_no_error(GLuint num_groups_x,
385                                           GLuint num_groups_y,
386                                           GLuint num_groups_z,
387                                           GLuint group_size_x,
388                                           GLuint group_size_y,
389                                           GLuint group_size_z)
390{
391   dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z,
392                               group_size_x, group_size_y, group_size_z,
393                               true);
394}
395
396void GLAPIENTRY
397_mesa_DispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y,
398                                  GLuint num_groups_z, GLuint group_size_x,
399                                  GLuint group_size_y, GLuint group_size_z)
400{
401   dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z,
402                               group_size_x, group_size_y, group_size_z,
403                               false);
404}
405