1848b8605Smrg/*
2848b8605Smrg * Copyright © 2014 Intel Corporation
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5848b8605Smrg * copy of this software and associated documentation files (the "Software"),
6848b8605Smrg * to deal in the Software without restriction, including without limitation
7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
9848b8605Smrg * Software is furnished to do so, subject to the following conditions:
10848b8605Smrg *
11848b8605Smrg * The above copyright notice and this permission notice (including the next
12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
13848b8605Smrg * Software.
14848b8605Smrg *
15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21848b8605Smrg * DEALINGS IN THE SOFTWARE.
22848b8605Smrg */
23848b8605Smrg
24848b8605Smrg#include "glheader.h"
25b8e80941Smrg#include "bufferobj.h"
26848b8605Smrg#include "compute.h"
27848b8605Smrg#include "context.h"
28848b8605Smrg
29b8e80941Smrgstatic bool
30b8e80941Smrgcheck_valid_to_compute(struct gl_context *ctx, const char *function)
31b8e80941Smrg{
32b8e80941Smrg   if (!_mesa_has_compute_shaders(ctx)) {
33b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
34b8e80941Smrg                  "unsupported function (%s) called",
35b8e80941Smrg                  function);
36b8e80941Smrg      return false;
37b8e80941Smrg   }
38b8e80941Smrg
39b8e80941Smrg   /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
40b8e80941Smrg    *
41b8e80941Smrg    * "An INVALID_OPERATION error is generated if there is no active program
42b8e80941Smrg    *  for the compute shader stage."
43b8e80941Smrg    */
44b8e80941Smrg   if (ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE] == NULL) {
45b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
46b8e80941Smrg                  "%s(no active compute shader)",
47b8e80941Smrg                  function);
48b8e80941Smrg      return false;
49b8e80941Smrg   }
50b8e80941Smrg
51b8e80941Smrg   return true;
52b8e80941Smrg}
53b8e80941Smrg
54b8e80941Smrgstatic bool
55b8e80941Smrgvalidate_DispatchCompute(struct gl_context *ctx, const GLuint *num_groups)
56b8e80941Smrg{
57b8e80941Smrg   if (!check_valid_to_compute(ctx, "glDispatchCompute"))
58b8e80941Smrg      return GL_FALSE;
59b8e80941Smrg
60b8e80941Smrg   for (int i = 0; i < 3; i++) {
61b8e80941Smrg      /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
62b8e80941Smrg       *
63b8e80941Smrg       * "An INVALID_VALUE error is generated if any of num_groups_x,
64b8e80941Smrg       *  num_groups_y and num_groups_z are greater than or equal to the
65b8e80941Smrg       *  maximum work group count for the corresponding dimension."
66b8e80941Smrg       *
67b8e80941Smrg       * However, the "or equal to" portions appears to be a specification
68b8e80941Smrg       * bug. In all other areas, the specification appears to indicate that
69b8e80941Smrg       * the number of workgroups can match the MAX_COMPUTE_WORK_GROUP_COUNT
70b8e80941Smrg       * value. For example, under DispatchComputeIndirect:
71b8e80941Smrg       *
72b8e80941Smrg       * "If any of num_groups_x, num_groups_y or num_groups_z is greater than
73b8e80941Smrg       *  the value of MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding
74b8e80941Smrg       *  dimension then the results are undefined."
75b8e80941Smrg       *
76b8e80941Smrg       * Additionally, the OpenGLES 3.1 specification does not contain "or
77b8e80941Smrg       * equal to" as an error condition.
78b8e80941Smrg       */
79b8e80941Smrg      if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
80b8e80941Smrg         _mesa_error(ctx, GL_INVALID_VALUE,
81b8e80941Smrg                     "glDispatchCompute(num_groups_%c)", 'x' + i);
82b8e80941Smrg         return GL_FALSE;
83b8e80941Smrg      }
84b8e80941Smrg   }
85b8e80941Smrg
86b8e80941Smrg   /* The ARB_compute_variable_group_size spec says:
87b8e80941Smrg    *
88b8e80941Smrg    * "An INVALID_OPERATION error is generated by DispatchCompute if the active
89b8e80941Smrg    *  program for the compute shader stage has a variable work group size."
90b8e80941Smrg    */
91b8e80941Smrg   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
92b8e80941Smrg   if (prog->info.cs.local_size_variable) {
93b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
94b8e80941Smrg                  "glDispatchCompute(variable work group size forbidden)");
95b8e80941Smrg      return GL_FALSE;
96b8e80941Smrg   }
97b8e80941Smrg
98b8e80941Smrg   return GL_TRUE;
99b8e80941Smrg}
100b8e80941Smrg
101b8e80941Smrgstatic bool
102b8e80941Smrgvalidate_DispatchComputeGroupSizeARB(struct gl_context *ctx,
103b8e80941Smrg                                     const GLuint *num_groups,
104b8e80941Smrg                                     const GLuint *group_size)
105b8e80941Smrg{
106b8e80941Smrg   GLuint total_invocations = 1;
107b8e80941Smrg
108b8e80941Smrg   if (!check_valid_to_compute(ctx, "glDispatchComputeGroupSizeARB"))
109b8e80941Smrg      return GL_FALSE;
110b8e80941Smrg
111b8e80941Smrg   /* The ARB_compute_variable_group_size spec says:
112b8e80941Smrg    *
113b8e80941Smrg    * "An INVALID_OPERATION error is generated by
114b8e80941Smrg    *  DispatchComputeGroupSizeARB if the active program for the compute
115b8e80941Smrg    *  shader stage has a fixed work group size."
116b8e80941Smrg    */
117b8e80941Smrg   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
118b8e80941Smrg   if (!prog->info.cs.local_size_variable) {
119b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
120b8e80941Smrg                  "glDispatchComputeGroupSizeARB(fixed work group size "
121b8e80941Smrg                  "forbidden)");
122b8e80941Smrg      return GL_FALSE;
123b8e80941Smrg   }
124b8e80941Smrg
125b8e80941Smrg   for (int i = 0; i < 3; i++) {
126b8e80941Smrg      /* The ARB_compute_variable_group_size spec says:
127b8e80941Smrg       *
128b8e80941Smrg       * "An INVALID_VALUE error is generated if any of num_groups_x,
129b8e80941Smrg       *  num_groups_y and num_groups_z are greater than or equal to the
130b8e80941Smrg       *  maximum work group count for the corresponding dimension."
131b8e80941Smrg       */
132b8e80941Smrg      if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
133b8e80941Smrg         _mesa_error(ctx, GL_INVALID_VALUE,
134b8e80941Smrg                     "glDispatchComputeGroupSizeARB(num_groups_%c)", 'x' + i);
135b8e80941Smrg         return GL_FALSE;
136b8e80941Smrg      }
137b8e80941Smrg
138b8e80941Smrg      /* The ARB_compute_variable_group_size spec says:
139b8e80941Smrg       *
140b8e80941Smrg       * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
141b8e80941Smrg       *  any of <group_size_x>, <group_size_y>, or <group_size_z> is less than
142b8e80941Smrg       *  or equal to zero or greater than the maximum local work group size
143b8e80941Smrg       *  for compute shaders with variable group size
144b8e80941Smrg       *  (MAX_COMPUTE_VARIABLE_GROUP_SIZE_ARB) in the corresponding
145b8e80941Smrg       *  dimension."
146b8e80941Smrg       *
147b8e80941Smrg       * However, the "less than" is a spec bug because they are declared as
148b8e80941Smrg       * unsigned integers.
149b8e80941Smrg       */
150b8e80941Smrg      if (group_size[i] == 0 ||
151b8e80941Smrg          group_size[i] > ctx->Const.MaxComputeVariableGroupSize[i]) {
152b8e80941Smrg         _mesa_error(ctx, GL_INVALID_VALUE,
153b8e80941Smrg                     "glDispatchComputeGroupSizeARB(group_size_%c)", 'x' + i);
154b8e80941Smrg         return GL_FALSE;
155b8e80941Smrg      }
156b8e80941Smrg
157b8e80941Smrg      total_invocations *= group_size[i];
158b8e80941Smrg   }
159b8e80941Smrg
160b8e80941Smrg   /* The ARB_compute_variable_group_size spec says:
161b8e80941Smrg    *
162b8e80941Smrg    * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
163b8e80941Smrg    *  the product of <group_size_x>, <group_size_y>, and <group_size_z> exceeds
164b8e80941Smrg    *  the implementation-dependent maximum local work group invocation count
165b8e80941Smrg    *  for compute shaders with variable group size
166b8e80941Smrg    *  (MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB)."
167b8e80941Smrg    */
168b8e80941Smrg   if (total_invocations > ctx->Const.MaxComputeVariableGroupInvocations) {
169b8e80941Smrg      _mesa_error(ctx, GL_INVALID_VALUE,
170b8e80941Smrg                  "glDispatchComputeGroupSizeARB(product of local_sizes "
171b8e80941Smrg                  "exceeds MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB "
172b8e80941Smrg                  "(%d > %d))", total_invocations,
173b8e80941Smrg                  ctx->Const.MaxComputeVariableGroupInvocations);
174b8e80941Smrg      return GL_FALSE;
175b8e80941Smrg   }
176b8e80941Smrg
177b8e80941Smrg   return GL_TRUE;
178b8e80941Smrg}
179b8e80941Smrg
180b8e80941Smrgstatic bool
181b8e80941Smrgvalid_dispatch_indirect(struct gl_context *ctx,  GLintptr indirect)
182b8e80941Smrg{
183b8e80941Smrg   GLsizei size = 3 * sizeof(GLuint);
184b8e80941Smrg   const uint64_t end = (uint64_t) indirect + size;
185b8e80941Smrg   const char *name = "glDispatchComputeIndirect";
186b8e80941Smrg
187b8e80941Smrg   if (!check_valid_to_compute(ctx, name))
188b8e80941Smrg      return GL_FALSE;
189b8e80941Smrg
190b8e80941Smrg   /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
191b8e80941Smrg    *
192b8e80941Smrg    * "An INVALID_VALUE error is generated if indirect is negative or is not a
193b8e80941Smrg    *  multiple of four."
194b8e80941Smrg    */
195b8e80941Smrg   if (indirect & (sizeof(GLuint) - 1)) {
196b8e80941Smrg      _mesa_error(ctx, GL_INVALID_VALUE,
197b8e80941Smrg                  "%s(indirect is not aligned)", name);
198b8e80941Smrg      return GL_FALSE;
199b8e80941Smrg   }
200b8e80941Smrg
201b8e80941Smrg   if (indirect < 0) {
202b8e80941Smrg      _mesa_error(ctx, GL_INVALID_VALUE,
203b8e80941Smrg                  "%s(indirect is less than zero)", name);
204b8e80941Smrg      return GL_FALSE;
205b8e80941Smrg   }
206b8e80941Smrg
207b8e80941Smrg   /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
208b8e80941Smrg    *
209b8e80941Smrg    * "An INVALID_OPERATION error is generated if no buffer is bound to the
210b8e80941Smrg    *  DRAW_INDIRECT_BUFFER binding, or if the command would source data
211b8e80941Smrg    *  beyond the end of the buffer object."
212b8e80941Smrg    */
213b8e80941Smrg   if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) {
214b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
215b8e80941Smrg                  "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
216b8e80941Smrg      return GL_FALSE;
217b8e80941Smrg   }
218b8e80941Smrg
219b8e80941Smrg   if (_mesa_check_disallowed_mapping(ctx->DispatchIndirectBuffer)) {
220b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
221b8e80941Smrg                  "%s(DISPATCH_INDIRECT_BUFFER is mapped)", name);
222b8e80941Smrg      return GL_FALSE;
223b8e80941Smrg   }
224b8e80941Smrg
225b8e80941Smrg   if (ctx->DispatchIndirectBuffer->Size < end) {
226b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
227b8e80941Smrg                  "%s(DISPATCH_INDIRECT_BUFFER too small)", name);
228b8e80941Smrg      return GL_FALSE;
229b8e80941Smrg   }
230b8e80941Smrg
231b8e80941Smrg   /* The ARB_compute_variable_group_size spec says:
232b8e80941Smrg    *
233b8e80941Smrg    * "An INVALID_OPERATION error is generated if the active program for the
234b8e80941Smrg    *  compute shader stage has a variable work group size."
235b8e80941Smrg    */
236b8e80941Smrg   struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
237b8e80941Smrg   if (prog->info.cs.local_size_variable) {
238b8e80941Smrg      _mesa_error(ctx, GL_INVALID_OPERATION,
239b8e80941Smrg                  "%s(variable work group size forbidden)", name);
240b8e80941Smrg      return GL_FALSE;
241b8e80941Smrg   }
242b8e80941Smrg
243b8e80941Smrg   return GL_TRUE;
244b8e80941Smrg}
245b8e80941Smrg
246b8e80941Smrgstatic ALWAYS_INLINE void
247b8e80941Smrgdispatch_compute(GLuint num_groups_x, GLuint num_groups_y,
248b8e80941Smrg                 GLuint num_groups_z, bool no_error)
249b8e80941Smrg{
250b8e80941Smrg   GET_CURRENT_CONTEXT(ctx);
251b8e80941Smrg   const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
252b8e80941Smrg
253b8e80941Smrg   FLUSH_CURRENT(ctx, 0);
254b8e80941Smrg
255b8e80941Smrg   if (MESA_VERBOSE & VERBOSE_API)
256b8e80941Smrg      _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n",
257b8e80941Smrg                  num_groups_x, num_groups_y, num_groups_z);
258b8e80941Smrg
259b8e80941Smrg   if (!no_error && !validate_DispatchCompute(ctx, num_groups))
260b8e80941Smrg      return;
261b8e80941Smrg
262b8e80941Smrg   if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
263b8e80941Smrg       return;
264b8e80941Smrg
265b8e80941Smrg   ctx->Driver.DispatchCompute(ctx, num_groups);
266b8e80941Smrg}
267b8e80941Smrg
268b8e80941Smrgvoid GLAPIENTRY
269b8e80941Smrg_mesa_DispatchCompute_no_error(GLuint num_groups_x, GLuint num_groups_y,
270b8e80941Smrg                               GLuint num_groups_z)
271b8e80941Smrg{
272b8e80941Smrg   dispatch_compute(num_groups_x, num_groups_y, num_groups_z, true);
273b8e80941Smrg}
274b8e80941Smrg
275848b8605Smrgvoid GLAPIENTRY
276848b8605Smrg_mesa_DispatchCompute(GLuint num_groups_x,
277848b8605Smrg                      GLuint num_groups_y,
278848b8605Smrg                      GLuint num_groups_z)
279b8e80941Smrg{
280b8e80941Smrg   dispatch_compute(num_groups_x, num_groups_y, num_groups_z, false);
281b8e80941Smrg}
282b8e80941Smrg
283b8e80941Smrgstatic ALWAYS_INLINE void
284b8e80941Smrgdispatch_compute_indirect(GLintptr indirect, bool no_error)
285848b8605Smrg{
286848b8605Smrg   GET_CURRENT_CONTEXT(ctx);
287848b8605Smrg
288b8e80941Smrg   FLUSH_CURRENT(ctx, 0);
289b8e80941Smrg
290b8e80941Smrg   if (MESA_VERBOSE & VERBOSE_API)
291b8e80941Smrg      _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect);
292b8e80941Smrg
293b8e80941Smrg   if (!no_error && !valid_dispatch_indirect(ctx, indirect))
294b8e80941Smrg      return;
295b8e80941Smrg
296b8e80941Smrg   ctx->Driver.DispatchComputeIndirect(ctx, indirect);
297b8e80941Smrg}
298b8e80941Smrg
299b8e80941Smrgextern void GLAPIENTRY
300b8e80941Smrg_mesa_DispatchComputeIndirect_no_error(GLintptr indirect)
301b8e80941Smrg{
302b8e80941Smrg   dispatch_compute_indirect(indirect, true);
303848b8605Smrg}
304848b8605Smrg
305848b8605Smrgextern void GLAPIENTRY
306848b8605Smrg_mesa_DispatchComputeIndirect(GLintptr indirect)
307b8e80941Smrg{
308b8e80941Smrg   dispatch_compute_indirect(indirect, false);
309b8e80941Smrg}
310b8e80941Smrg
311b8e80941Smrgstatic ALWAYS_INLINE void
312b8e80941Smrgdispatch_compute_group_size(GLuint num_groups_x, GLuint num_groups_y,
313b8e80941Smrg                            GLuint num_groups_z, GLuint group_size_x,
314b8e80941Smrg                            GLuint group_size_y, GLuint group_size_z,
315b8e80941Smrg                            bool no_error)
316848b8605Smrg{
317848b8605Smrg   GET_CURRENT_CONTEXT(ctx);
318b8e80941Smrg   const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
319b8e80941Smrg   const GLuint group_size[3] = { group_size_x, group_size_y, group_size_z };
320848b8605Smrg
321b8e80941Smrg   FLUSH_CURRENT(ctx, 0);
322b8e80941Smrg
323b8e80941Smrg   if (MESA_VERBOSE & VERBOSE_API)
324b8e80941Smrg      _mesa_debug(ctx,
325b8e80941Smrg                  "glDispatchComputeGroupSizeARB(%d, %d, %d, %d, %d, %d)\n",
326b8e80941Smrg                  num_groups_x, num_groups_y, num_groups_z,
327b8e80941Smrg                  group_size_x, group_size_y, group_size_z);
328b8e80941Smrg
329b8e80941Smrg   if (!no_error &&
330b8e80941Smrg       !validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size))
331b8e80941Smrg      return;
332b8e80941Smrg
333b8e80941Smrg   if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
334b8e80941Smrg       return;
335b8e80941Smrg
336b8e80941Smrg   ctx->Driver.DispatchComputeGroupSize(ctx, num_groups, group_size);
337b8e80941Smrg}
338b8e80941Smrg
339b8e80941Smrgvoid GLAPIENTRY
340b8e80941Smrg_mesa_DispatchComputeGroupSizeARB_no_error(GLuint num_groups_x,
341b8e80941Smrg                                           GLuint num_groups_y,
342b8e80941Smrg                                           GLuint num_groups_z,
343b8e80941Smrg                                           GLuint group_size_x,
344b8e80941Smrg                                           GLuint group_size_y,
345b8e80941Smrg                                           GLuint group_size_z)
346b8e80941Smrg{
347b8e80941Smrg   dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z,
348b8e80941Smrg                               group_size_x, group_size_y, group_size_z,
349b8e80941Smrg                               true);
350b8e80941Smrg}
351b8e80941Smrg
352b8e80941Smrgvoid GLAPIENTRY
353b8e80941Smrg_mesa_DispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y,
354b8e80941Smrg                                  GLuint num_groups_z, GLuint group_size_x,
355b8e80941Smrg                                  GLuint group_size_y, GLuint group_size_z)
356b8e80941Smrg{
357b8e80941Smrg   dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z,
358b8e80941Smrg                               group_size_x, group_size_y, group_size_z,
359b8e80941Smrg                               false);
360848b8605Smrg}
361