1848b8605Smrg/* 2848b8605Smrg * Copyright © 2014 Intel Corporation 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 9848b8605Smrg * Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice (including the next 12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 13848b8605Smrg * Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21848b8605Smrg * DEALINGS IN THE SOFTWARE. 22848b8605Smrg */ 23848b8605Smrg 24848b8605Smrg#include "glheader.h" 25b8e80941Smrg#include "bufferobj.h" 26848b8605Smrg#include "compute.h" 27848b8605Smrg#include "context.h" 28848b8605Smrg 29b8e80941Smrgstatic bool 30b8e80941Smrgcheck_valid_to_compute(struct gl_context *ctx, const char *function) 31b8e80941Smrg{ 32b8e80941Smrg if (!_mesa_has_compute_shaders(ctx)) { 33b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 34b8e80941Smrg "unsupported function (%s) called", 35b8e80941Smrg function); 36b8e80941Smrg return false; 37b8e80941Smrg } 38b8e80941Smrg 39b8e80941Smrg /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: 40b8e80941Smrg * 41b8e80941Smrg * "An INVALID_OPERATION error is generated if there is no active program 42b8e80941Smrg * for the compute shader stage." 43b8e80941Smrg */ 44b8e80941Smrg if (ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE] == NULL) { 45b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 46b8e80941Smrg "%s(no active compute shader)", 47b8e80941Smrg function); 48b8e80941Smrg return false; 49b8e80941Smrg } 50b8e80941Smrg 51b8e80941Smrg return true; 52b8e80941Smrg} 53b8e80941Smrg 54b8e80941Smrgstatic bool 55b8e80941Smrgvalidate_DispatchCompute(struct gl_context *ctx, const GLuint *num_groups) 56b8e80941Smrg{ 57b8e80941Smrg if (!check_valid_to_compute(ctx, "glDispatchCompute")) 58b8e80941Smrg return GL_FALSE; 59b8e80941Smrg 60b8e80941Smrg for (int i = 0; i < 3; i++) { 61b8e80941Smrg /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: 62b8e80941Smrg * 63b8e80941Smrg * "An INVALID_VALUE error is generated if any of num_groups_x, 64b8e80941Smrg * num_groups_y and num_groups_z are greater than or equal to the 65b8e80941Smrg * maximum work group count for the corresponding dimension." 66b8e80941Smrg * 67b8e80941Smrg * However, the "or equal to" portions appears to be a specification 68b8e80941Smrg * bug. In all other areas, the specification appears to indicate that 69b8e80941Smrg * the number of workgroups can match the MAX_COMPUTE_WORK_GROUP_COUNT 70b8e80941Smrg * value. For example, under DispatchComputeIndirect: 71b8e80941Smrg * 72b8e80941Smrg * "If any of num_groups_x, num_groups_y or num_groups_z is greater than 73b8e80941Smrg * the value of MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding 74b8e80941Smrg * dimension then the results are undefined." 75b8e80941Smrg * 76b8e80941Smrg * Additionally, the OpenGLES 3.1 specification does not contain "or 77b8e80941Smrg * equal to" as an error condition. 78b8e80941Smrg */ 79b8e80941Smrg if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) { 80b8e80941Smrg _mesa_error(ctx, GL_INVALID_VALUE, 81b8e80941Smrg "glDispatchCompute(num_groups_%c)", 'x' + i); 82b8e80941Smrg return GL_FALSE; 83b8e80941Smrg } 84b8e80941Smrg } 85b8e80941Smrg 86b8e80941Smrg /* The ARB_compute_variable_group_size spec says: 87b8e80941Smrg * 88b8e80941Smrg * "An INVALID_OPERATION error is generated by DispatchCompute if the active 89b8e80941Smrg * program for the compute shader stage has a variable work group size." 90b8e80941Smrg */ 91b8e80941Smrg struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; 92b8e80941Smrg if (prog->info.cs.local_size_variable) { 93b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 94b8e80941Smrg "glDispatchCompute(variable work group size forbidden)"); 95b8e80941Smrg return GL_FALSE; 96b8e80941Smrg } 97b8e80941Smrg 98b8e80941Smrg return GL_TRUE; 99b8e80941Smrg} 100b8e80941Smrg 101b8e80941Smrgstatic bool 102b8e80941Smrgvalidate_DispatchComputeGroupSizeARB(struct gl_context *ctx, 103b8e80941Smrg const GLuint *num_groups, 104b8e80941Smrg const GLuint *group_size) 105b8e80941Smrg{ 106b8e80941Smrg GLuint total_invocations = 1; 107b8e80941Smrg 108b8e80941Smrg if (!check_valid_to_compute(ctx, "glDispatchComputeGroupSizeARB")) 109b8e80941Smrg return GL_FALSE; 110b8e80941Smrg 111b8e80941Smrg /* The ARB_compute_variable_group_size spec says: 112b8e80941Smrg * 113b8e80941Smrg * "An INVALID_OPERATION error is generated by 114b8e80941Smrg * DispatchComputeGroupSizeARB if the active program for the compute 115b8e80941Smrg * shader stage has a fixed work group size." 116b8e80941Smrg */ 117b8e80941Smrg struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; 118b8e80941Smrg if (!prog->info.cs.local_size_variable) { 119b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 120b8e80941Smrg "glDispatchComputeGroupSizeARB(fixed work group size " 121b8e80941Smrg "forbidden)"); 122b8e80941Smrg return GL_FALSE; 123b8e80941Smrg } 124b8e80941Smrg 125b8e80941Smrg for (int i = 0; i < 3; i++) { 126b8e80941Smrg /* The ARB_compute_variable_group_size spec says: 127b8e80941Smrg * 128b8e80941Smrg * "An INVALID_VALUE error is generated if any of num_groups_x, 129b8e80941Smrg * num_groups_y and num_groups_z are greater than or equal to the 130b8e80941Smrg * maximum work group count for the corresponding dimension." 131b8e80941Smrg */ 132b8e80941Smrg if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) { 133b8e80941Smrg _mesa_error(ctx, GL_INVALID_VALUE, 134b8e80941Smrg "glDispatchComputeGroupSizeARB(num_groups_%c)", 'x' + i); 135b8e80941Smrg return GL_FALSE; 136b8e80941Smrg } 137b8e80941Smrg 138b8e80941Smrg /* The ARB_compute_variable_group_size spec says: 139b8e80941Smrg * 140b8e80941Smrg * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if 141b8e80941Smrg * any of <group_size_x>, <group_size_y>, or <group_size_z> is less than 142b8e80941Smrg * or equal to zero or greater than the maximum local work group size 143b8e80941Smrg * for compute shaders with variable group size 144b8e80941Smrg * (MAX_COMPUTE_VARIABLE_GROUP_SIZE_ARB) in the corresponding 145b8e80941Smrg * dimension." 146b8e80941Smrg * 147b8e80941Smrg * However, the "less than" is a spec bug because they are declared as 148b8e80941Smrg * unsigned integers. 149b8e80941Smrg */ 150b8e80941Smrg if (group_size[i] == 0 || 151b8e80941Smrg group_size[i] > ctx->Const.MaxComputeVariableGroupSize[i]) { 152b8e80941Smrg _mesa_error(ctx, GL_INVALID_VALUE, 153b8e80941Smrg "glDispatchComputeGroupSizeARB(group_size_%c)", 'x' + i); 154b8e80941Smrg return GL_FALSE; 155b8e80941Smrg } 156b8e80941Smrg 157b8e80941Smrg total_invocations *= group_size[i]; 158b8e80941Smrg } 159b8e80941Smrg 160b8e80941Smrg /* The ARB_compute_variable_group_size spec says: 161b8e80941Smrg * 162b8e80941Smrg * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if 163b8e80941Smrg * the product of <group_size_x>, <group_size_y>, and <group_size_z> exceeds 164b8e80941Smrg * the implementation-dependent maximum local work group invocation count 165b8e80941Smrg * for compute shaders with variable group size 166b8e80941Smrg * (MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB)." 167b8e80941Smrg */ 168b8e80941Smrg if (total_invocations > ctx->Const.MaxComputeVariableGroupInvocations) { 169b8e80941Smrg _mesa_error(ctx, GL_INVALID_VALUE, 170b8e80941Smrg "glDispatchComputeGroupSizeARB(product of local_sizes " 171b8e80941Smrg "exceeds MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB " 172b8e80941Smrg "(%d > %d))", total_invocations, 173b8e80941Smrg ctx->Const.MaxComputeVariableGroupInvocations); 174b8e80941Smrg return GL_FALSE; 175b8e80941Smrg } 176b8e80941Smrg 177b8e80941Smrg return GL_TRUE; 178b8e80941Smrg} 179b8e80941Smrg 180b8e80941Smrgstatic bool 181b8e80941Smrgvalid_dispatch_indirect(struct gl_context *ctx, GLintptr indirect) 182b8e80941Smrg{ 183b8e80941Smrg GLsizei size = 3 * sizeof(GLuint); 184b8e80941Smrg const uint64_t end = (uint64_t) indirect + size; 185b8e80941Smrg const char *name = "glDispatchComputeIndirect"; 186b8e80941Smrg 187b8e80941Smrg if (!check_valid_to_compute(ctx, name)) 188b8e80941Smrg return GL_FALSE; 189b8e80941Smrg 190b8e80941Smrg /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: 191b8e80941Smrg * 192b8e80941Smrg * "An INVALID_VALUE error is generated if indirect is negative or is not a 193b8e80941Smrg * multiple of four." 194b8e80941Smrg */ 195b8e80941Smrg if (indirect & (sizeof(GLuint) - 1)) { 196b8e80941Smrg _mesa_error(ctx, GL_INVALID_VALUE, 197b8e80941Smrg "%s(indirect is not aligned)", name); 198b8e80941Smrg return GL_FALSE; 199b8e80941Smrg } 200b8e80941Smrg 201b8e80941Smrg if (indirect < 0) { 202b8e80941Smrg _mesa_error(ctx, GL_INVALID_VALUE, 203b8e80941Smrg "%s(indirect is less than zero)", name); 204b8e80941Smrg return GL_FALSE; 205b8e80941Smrg } 206b8e80941Smrg 207b8e80941Smrg /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: 208b8e80941Smrg * 209b8e80941Smrg * "An INVALID_OPERATION error is generated if no buffer is bound to the 210b8e80941Smrg * DRAW_INDIRECT_BUFFER binding, or if the command would source data 211b8e80941Smrg * beyond the end of the buffer object." 212b8e80941Smrg */ 213b8e80941Smrg if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) { 214b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 215b8e80941Smrg "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name); 216b8e80941Smrg return GL_FALSE; 217b8e80941Smrg } 218b8e80941Smrg 219b8e80941Smrg if (_mesa_check_disallowed_mapping(ctx->DispatchIndirectBuffer)) { 220b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 221b8e80941Smrg "%s(DISPATCH_INDIRECT_BUFFER is mapped)", name); 222b8e80941Smrg return GL_FALSE; 223b8e80941Smrg } 224b8e80941Smrg 225b8e80941Smrg if (ctx->DispatchIndirectBuffer->Size < end) { 226b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 227b8e80941Smrg "%s(DISPATCH_INDIRECT_BUFFER too small)", name); 228b8e80941Smrg return GL_FALSE; 229b8e80941Smrg } 230b8e80941Smrg 231b8e80941Smrg /* The ARB_compute_variable_group_size spec says: 232b8e80941Smrg * 233b8e80941Smrg * "An INVALID_OPERATION error is generated if the active program for the 234b8e80941Smrg * compute shader stage has a variable work group size." 235b8e80941Smrg */ 236b8e80941Smrg struct gl_program *prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; 237b8e80941Smrg if (prog->info.cs.local_size_variable) { 238b8e80941Smrg _mesa_error(ctx, GL_INVALID_OPERATION, 239b8e80941Smrg "%s(variable work group size forbidden)", name); 240b8e80941Smrg return GL_FALSE; 241b8e80941Smrg } 242b8e80941Smrg 243b8e80941Smrg return GL_TRUE; 244b8e80941Smrg} 245b8e80941Smrg 246b8e80941Smrgstatic ALWAYS_INLINE void 247b8e80941Smrgdispatch_compute(GLuint num_groups_x, GLuint num_groups_y, 248b8e80941Smrg GLuint num_groups_z, bool no_error) 249b8e80941Smrg{ 250b8e80941Smrg GET_CURRENT_CONTEXT(ctx); 251b8e80941Smrg const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z }; 252b8e80941Smrg 253b8e80941Smrg FLUSH_CURRENT(ctx, 0); 254b8e80941Smrg 255b8e80941Smrg if (MESA_VERBOSE & VERBOSE_API) 256b8e80941Smrg _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n", 257b8e80941Smrg num_groups_x, num_groups_y, num_groups_z); 258b8e80941Smrg 259b8e80941Smrg if (!no_error && !validate_DispatchCompute(ctx, num_groups)) 260b8e80941Smrg return; 261b8e80941Smrg 262b8e80941Smrg if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u) 263b8e80941Smrg return; 264b8e80941Smrg 265b8e80941Smrg ctx->Driver.DispatchCompute(ctx, num_groups); 266b8e80941Smrg} 267b8e80941Smrg 268b8e80941Smrgvoid GLAPIENTRY 269b8e80941Smrg_mesa_DispatchCompute_no_error(GLuint num_groups_x, GLuint num_groups_y, 270b8e80941Smrg GLuint num_groups_z) 271b8e80941Smrg{ 272b8e80941Smrg dispatch_compute(num_groups_x, num_groups_y, num_groups_z, true); 273b8e80941Smrg} 274b8e80941Smrg 275848b8605Smrgvoid GLAPIENTRY 276848b8605Smrg_mesa_DispatchCompute(GLuint num_groups_x, 277848b8605Smrg GLuint num_groups_y, 278848b8605Smrg GLuint num_groups_z) 279b8e80941Smrg{ 280b8e80941Smrg dispatch_compute(num_groups_x, num_groups_y, num_groups_z, false); 281b8e80941Smrg} 282b8e80941Smrg 283b8e80941Smrgstatic ALWAYS_INLINE void 284b8e80941Smrgdispatch_compute_indirect(GLintptr indirect, bool no_error) 285848b8605Smrg{ 286848b8605Smrg GET_CURRENT_CONTEXT(ctx); 287848b8605Smrg 288b8e80941Smrg FLUSH_CURRENT(ctx, 0); 289b8e80941Smrg 290b8e80941Smrg if (MESA_VERBOSE & VERBOSE_API) 291b8e80941Smrg _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect); 292b8e80941Smrg 293b8e80941Smrg if (!no_error && !valid_dispatch_indirect(ctx, indirect)) 294b8e80941Smrg return; 295b8e80941Smrg 296b8e80941Smrg ctx->Driver.DispatchComputeIndirect(ctx, indirect); 297b8e80941Smrg} 298b8e80941Smrg 299b8e80941Smrgextern void GLAPIENTRY 300b8e80941Smrg_mesa_DispatchComputeIndirect_no_error(GLintptr indirect) 301b8e80941Smrg{ 302b8e80941Smrg dispatch_compute_indirect(indirect, true); 303848b8605Smrg} 304848b8605Smrg 305848b8605Smrgextern void GLAPIENTRY 306848b8605Smrg_mesa_DispatchComputeIndirect(GLintptr indirect) 307b8e80941Smrg{ 308b8e80941Smrg dispatch_compute_indirect(indirect, false); 309b8e80941Smrg} 310b8e80941Smrg 311b8e80941Smrgstatic ALWAYS_INLINE void 312b8e80941Smrgdispatch_compute_group_size(GLuint num_groups_x, GLuint num_groups_y, 313b8e80941Smrg GLuint num_groups_z, GLuint group_size_x, 314b8e80941Smrg GLuint group_size_y, GLuint group_size_z, 315b8e80941Smrg bool no_error) 316848b8605Smrg{ 317848b8605Smrg GET_CURRENT_CONTEXT(ctx); 318b8e80941Smrg const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z }; 319b8e80941Smrg const GLuint group_size[3] = { group_size_x, group_size_y, group_size_z }; 320848b8605Smrg 321b8e80941Smrg FLUSH_CURRENT(ctx, 0); 322b8e80941Smrg 323b8e80941Smrg if (MESA_VERBOSE & VERBOSE_API) 324b8e80941Smrg _mesa_debug(ctx, 325b8e80941Smrg "glDispatchComputeGroupSizeARB(%d, %d, %d, %d, %d, %d)\n", 326b8e80941Smrg num_groups_x, num_groups_y, num_groups_z, 327b8e80941Smrg group_size_x, group_size_y, group_size_z); 328b8e80941Smrg 329b8e80941Smrg if (!no_error && 330b8e80941Smrg !validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size)) 331b8e80941Smrg return; 332b8e80941Smrg 333b8e80941Smrg if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u) 334b8e80941Smrg return; 335b8e80941Smrg 336b8e80941Smrg ctx->Driver.DispatchComputeGroupSize(ctx, num_groups, group_size); 337b8e80941Smrg} 338b8e80941Smrg 339b8e80941Smrgvoid GLAPIENTRY 340b8e80941Smrg_mesa_DispatchComputeGroupSizeARB_no_error(GLuint num_groups_x, 341b8e80941Smrg GLuint num_groups_y, 342b8e80941Smrg GLuint num_groups_z, 343b8e80941Smrg GLuint group_size_x, 344b8e80941Smrg GLuint group_size_y, 345b8e80941Smrg GLuint group_size_z) 346b8e80941Smrg{ 347b8e80941Smrg dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z, 348b8e80941Smrg group_size_x, group_size_y, group_size_z, 349b8e80941Smrg true); 350b8e80941Smrg} 351b8e80941Smrg 352b8e80941Smrgvoid GLAPIENTRY 353b8e80941Smrg_mesa_DispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y, 354b8e80941Smrg GLuint num_groups_z, GLuint group_size_x, 355b8e80941Smrg GLuint group_size_y, GLuint group_size_z) 356b8e80941Smrg{ 357b8e80941Smrg dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z, 358b8e80941Smrg group_size_x, group_size_y, group_size_z, 359b8e80941Smrg false); 360848b8605Smrg} 361