1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "main/glthread_marshal.h" 25#include "main/dispatch.h" 26#include "main/bufferobj.h" 27 28/** 29 * Create an upload buffer. This is called from the app thread, so everything 30 * has to be thread-safe in the driver. 31 */ 32static struct gl_buffer_object * 33new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr) 34{ 35 assert(ctx->GLThread.SupportsBufferUploads); 36 37 struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1); 38 if (!obj) 39 return NULL; 40 41 obj->Immutable = true; 42 43 if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL, 44 GL_WRITE_ONLY, 45 GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT, 46 obj)) { 47 ctx->Driver.DeleteBuffer(ctx, obj); 48 return NULL; 49 } 50 51 *ptr = ctx->Driver.MapBufferRange(ctx, 0, size, 52 GL_MAP_WRITE_BIT | 53 GL_MAP_UNSYNCHRONIZED_BIT | 54 MESA_MAP_THREAD_SAFE_BIT, 55 obj, MAP_GLTHREAD); 56 if (!*ptr) { 57 ctx->Driver.DeleteBuffer(ctx, obj); 58 return NULL; 59 } 60 61 return obj; 62} 63 64void 65_mesa_glthread_upload(struct gl_context *ctx, const void *data, 66 GLsizeiptr size, unsigned *out_offset, 67 struct gl_buffer_object **out_buffer, 68 uint8_t **out_ptr) 69{ 70 struct glthread_state *glthread = &ctx->GLThread; 71 const unsigned default_size = 1024 * 1024; 72 73 if (unlikely(size > INT_MAX)) 74 return; 75 76 /* The alignment was chosen arbitrarily. */ 77 unsigned offset = align(glthread->upload_offset, 8); 78 79 /* Allocate a new buffer if needed. */ 80 if (unlikely(!glthread->upload_buffer || offset + size > default_size)) { 81 /* If the size is greater than the buffer size, allocate a separate buffer 82 * just for this upload. 83 */ 84 if (unlikely(size > default_size)) { 85 uint8_t *ptr; 86 87 assert(*out_buffer == NULL); 88 *out_buffer = new_upload_buffer(ctx, size, &ptr); 89 if (!*out_buffer) 90 return; 91 92 *out_offset = 0; 93 if (data) 94 memcpy(ptr, data, size); 95 else 96 *out_ptr = ptr; 97 return; 98 } 99 100 if (glthread->upload_buffer_private_refcount > 0) { 101 p_atomic_add(&glthread->upload_buffer->RefCount, 102 -glthread->upload_buffer_private_refcount); 103 glthread->upload_buffer_private_refcount = 0; 104 } 105 _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL); 106 glthread->upload_buffer = 107 new_upload_buffer(ctx, default_size, &glthread->upload_ptr); 108 glthread->upload_offset = 0; 109 offset = 0; 110 111 /* Since atomic operations are very very slow when 2 threads are not 112 * sharing one L3 cache (which can happen on AMD Zen), prevent using 113 * atomics as follows: 114 * 115 * This function has to return a buffer reference to the caller. 116 * Instead of atomic_inc for every call, it does all possible future 117 * increments in advance when the upload buffer is allocated. 118 * The maximum number of times the function can be called per upload 119 * buffer is default_size, because the minimum allocation size is 1. 120 * Therefore the function can only return default_size number of 121 * references at most, so we will never need more. This is the number 122 * that is added to RefCount at allocation. 123 * 124 * upload_buffer_private_refcount tracks how many buffer references 125 * are left to return to callers. If the buffer is full and there are 126 * still references left, they are atomically subtracted from RefCount 127 * before the buffer is unreferenced. 128 * 129 * This can increase performance by 20%. 130 */ 131 glthread->upload_buffer->RefCount += default_size; 132 glthread->upload_buffer_private_refcount = default_size; 133 } 134 135 /* Upload data. */ 136 if (data) 137 memcpy(glthread->upload_ptr + offset, data, size); 138 else 139 *out_ptr = glthread->upload_ptr + offset; 140 141 glthread->upload_offset = offset + size; 142 *out_offset = offset; 143 144 assert(*out_buffer == NULL); 145 assert(glthread->upload_buffer_private_refcount > 0); 146 *out_buffer = glthread->upload_buffer; 147 glthread->upload_buffer_private_refcount--; 148} 149 150/** Tracks the current bindings for the vertex array and index array buffers. 151 * 152 * This is part of what we need to enable glthread on compat-GL contexts that 153 * happen to use VBOs, without also supporting the full tracking of VBO vs 154 * user vertex array bindings per attribute on each vertex array for 155 * determining what to upload at draw call time. 156 * 157 * Note that GL core makes it so that a buffer binding with an invalid handle 158 * in the "buffer" parameter will throw an error, and then a 159 * glVertexAttribPointer() that followsmight not end up pointing at a VBO. 160 * However, in GL core the draw call would throw an error as well, so we don't 161 * really care if our tracking is wrong for this case -- we never need to 162 * marshal user data for draw calls, and the unmarshal will just generate an 163 * error or not as appropriate. 164 * 165 * For compatibility GL, we do need to accurately know whether the draw call 166 * on the unmarshal side will dereference a user pointer or load data from a 167 * VBO per vertex. That would make it seem like we need to track whether a 168 * "buffer" is valid, so that we can know when an error will be generated 169 * instead of updating the binding. However, compat GL has the ridiculous 170 * feature that if you pass a bad name, it just gens a buffer object for you, 171 * so we escape without having to know if things are valid or not. 172 */ 173void 174_mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer) 175{ 176 struct glthread_state *glthread = &ctx->GLThread; 177 178 switch (target) { 179 case GL_ARRAY_BUFFER: 180 glthread->CurrentArrayBufferName = buffer; 181 break; 182 case GL_ELEMENT_ARRAY_BUFFER: 183 /* The current element array buffer binding is actually tracked in the 184 * vertex array object instead of the context, so this would need to 185 * change on vertex array object updates. 186 */ 187 glthread->CurrentVAO->CurrentElementBufferName = buffer; 188 break; 189 case GL_DRAW_INDIRECT_BUFFER: 190 glthread->CurrentDrawIndirectBufferName = buffer; 191 break; 192 case GL_PIXEL_PACK_BUFFER: 193 glthread->CurrentPixelPackBufferName = buffer; 194 break; 195 case GL_PIXEL_UNPACK_BUFFER: 196 glthread->CurrentPixelUnpackBufferName = buffer; 197 break; 198 } 199} 200 201void 202_mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n, 203 const GLuint *buffers) 204{ 205 struct glthread_state *glthread = &ctx->GLThread; 206 207 if (!buffers) 208 return; 209 210 for (unsigned i = 0; i < n; i++) { 211 GLuint id = buffers[i]; 212 213 if (id == glthread->CurrentArrayBufferName) 214 _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0); 215 if (id == glthread->CurrentVAO->CurrentElementBufferName) 216 _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0); 217 if (id == glthread->CurrentDrawIndirectBufferName) 218 _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0); 219 if (id == glthread->CurrentPixelPackBufferName) 220 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0); 221 if (id == glthread->CurrentPixelUnpackBufferName) 222 _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0); 223 } 224} 225 226/* BufferData: marshalled asynchronously */ 227struct marshal_cmd_BufferData 228{ 229 struct marshal_cmd_base cmd_base; 230 GLuint target_or_name; 231 GLsizeiptr size; 232 GLenum usage; 233 const GLvoid *data_external_mem; 234 bool data_null; /* If set, no data follows for "data" */ 235 bool named; 236 bool ext_dsa; 237 /* Next size bytes are GLubyte data[size] */ 238}; 239 240uint32_t 241_mesa_unmarshal_BufferData(struct gl_context *ctx, 242 const struct marshal_cmd_BufferData *cmd, 243 const uint64_t *last) 244{ 245 const GLuint target_or_name = cmd->target_or_name; 246 const GLsizei size = cmd->size; 247 const GLenum usage = cmd->usage; 248 const void *data; 249 250 if (cmd->data_null) 251 data = NULL; 252 else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) 253 data = cmd->data_external_mem; 254 else 255 data = (const void *) (cmd + 1); 256 257 if (cmd->ext_dsa) { 258 CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch, 259 (target_or_name, size, data, usage)); 260 } else if (cmd->named) { 261 CALL_NamedBufferData(ctx->CurrentServerDispatch, 262 (target_or_name, size, data, usage)); 263 } else { 264 CALL_BufferData(ctx->CurrentServerDispatch, 265 (target_or_name, size, data, usage)); 266 } 267 return cmd->cmd_base.cmd_size; 268} 269 270uint32_t 271_mesa_unmarshal_NamedBufferData(struct gl_context *ctx, 272 const struct marshal_cmd_NamedBufferData *cmd, 273 const uint64_t *last) 274{ 275 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData"); 276 return 0; 277} 278 279uint32_t 280_mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx, 281 const struct marshal_cmd_NamedBufferDataEXT *cmd, 282 const uint64_t *last) 283{ 284 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData"); 285 return 0; 286} 287 288static void 289_mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size, 290 const GLvoid *data, GLenum usage, bool named, 291 bool ext_dsa, const char *func) 292{ 293 GET_CURRENT_CONTEXT(ctx); 294 bool external_mem = !named && 295 target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD; 296 bool copy_data = data && !external_mem; 297 size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0); 298 299 if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE || 300 (named && target_or_name == 0))) { 301 _mesa_glthread_finish_before(ctx, func); 302 if (named) { 303 CALL_NamedBufferData(ctx->CurrentServerDispatch, 304 (target_or_name, size, data, usage)); 305 } else { 306 CALL_BufferData(ctx->CurrentServerDispatch, 307 (target_or_name, size, data, usage)); 308 } 309 return; 310 } 311 312 struct marshal_cmd_BufferData *cmd = 313 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData, 314 cmd_size); 315 316 cmd->target_or_name = target_or_name; 317 cmd->size = size; 318 cmd->usage = usage; 319 cmd->data_null = !data; 320 cmd->named = named; 321 cmd->ext_dsa = ext_dsa; 322 cmd->data_external_mem = data; 323 324 if (copy_data) { 325 char *variable_data = (char *) (cmd + 1); 326 memcpy(variable_data, data, size); 327 } 328} 329 330void GLAPIENTRY 331_mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data, 332 GLenum usage) 333{ 334 _mesa_marshal_BufferData_merged(target, size, data, usage, false, false, 335 "BufferData"); 336} 337 338void GLAPIENTRY 339_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size, 340 const GLvoid * data, GLenum usage) 341{ 342 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false, 343 "NamedBufferData"); 344} 345 346void GLAPIENTRY 347_mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size, 348 const GLvoid *data, GLenum usage) 349{ 350 _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true, 351 "NamedBufferDataEXT"); 352} 353 354 355/* BufferSubData: marshalled asynchronously */ 356struct marshal_cmd_BufferSubData 357{ 358 struct marshal_cmd_base cmd_base; 359 GLenum target_or_name; 360 GLintptr offset; 361 GLsizeiptr size; 362 bool named; 363 bool ext_dsa; 364 /* Next size bytes are GLubyte data[size] */ 365}; 366 367uint32_t 368_mesa_unmarshal_BufferSubData(struct gl_context *ctx, 369 const struct marshal_cmd_BufferSubData *cmd, 370 const uint64_t *last) 371{ 372 const GLenum target_or_name = cmd->target_or_name; 373 const GLintptr offset = cmd->offset; 374 const GLsizeiptr size = cmd->size; 375 const void *data = (const void *) (cmd + 1); 376 377 if (cmd->ext_dsa) { 378 CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch, 379 (target_or_name, offset, size, data)); 380 } else if (cmd->named) { 381 CALL_NamedBufferSubData(ctx->CurrentServerDispatch, 382 (target_or_name, offset, size, data)); 383 } else { 384 CALL_BufferSubData(ctx->CurrentServerDispatch, 385 (target_or_name, offset, size, data)); 386 } 387 return cmd->cmd_base.cmd_size; 388} 389 390uint32_t 391_mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx, 392 const struct marshal_cmd_NamedBufferSubData *cmd, 393 const uint64_t *last) 394{ 395 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData"); 396 return 0; 397} 398 399uint32_t 400_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx, 401 const struct marshal_cmd_NamedBufferSubDataEXT *cmd, 402 const uint64_t *last) 403{ 404 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData"); 405 return 0; 406} 407 408static void 409_mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset, 410 GLsizeiptr size, const GLvoid *data, 411 bool named, bool ext_dsa, const char *func) 412{ 413 GET_CURRENT_CONTEXT(ctx); 414 size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size; 415 416 /* Fast path: Copy the data to an upload buffer, and use the GPU 417 * to copy the uploaded data to the destination buffer. 418 */ 419 /* TODO: Handle offset == 0 && size < buffer_size. 420 * If offset == 0 and size == buffer_size, it's better to discard 421 * the buffer storage, but we don't know the buffer size in glthread. 422 */ 423 if (ctx->GLThread.SupportsBufferUploads && 424 data && offset > 0 && size > 0) { 425 struct gl_buffer_object *upload_buffer = NULL; 426 unsigned upload_offset = 0; 427 428 _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer, 429 NULL); 430 431 if (upload_buffer) { 432 _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer, 433 upload_offset, 434 target_or_name, 435 offset, size, named, 436 ext_dsa); 437 return; 438 } 439 } 440 441 if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 || 442 cmd_size > MARSHAL_MAX_CMD_SIZE || !data || 443 (named && target_or_name == 0))) { 444 _mesa_glthread_finish_before(ctx, func); 445 if (named) { 446 CALL_NamedBufferSubData(ctx->CurrentServerDispatch, 447 (target_or_name, offset, size, data)); 448 } else { 449 CALL_BufferSubData(ctx->CurrentServerDispatch, 450 (target_or_name, offset, size, data)); 451 } 452 return; 453 } 454 455 struct marshal_cmd_BufferSubData *cmd = 456 _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData, 457 cmd_size); 458 cmd->target_or_name = target_or_name; 459 cmd->offset = offset; 460 cmd->size = size; 461 cmd->named = named; 462 cmd->ext_dsa = ext_dsa; 463 464 char *variable_data = (char *) (cmd + 1); 465 memcpy(variable_data, data, size); 466} 467 468void GLAPIENTRY 469_mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, 470 const GLvoid * data) 471{ 472 _mesa_marshal_BufferSubData_merged(target, offset, size, data, false, 473 false, "BufferSubData"); 474} 475 476void GLAPIENTRY 477_mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset, 478 GLsizeiptr size, const GLvoid * data) 479{ 480 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true, 481 false, "NamedBufferSubData"); 482} 483 484void GLAPIENTRY 485_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset, 486 GLsizeiptr size, const GLvoid * data) 487{ 488 _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true, 489 true, "NamedBufferSubDataEXT"); 490} 491