1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "main/glthread_marshal.h"
25#include "main/dispatch.h"
26#include "main/bufferobj.h"
27
28/**
29 * Create an upload buffer. This is called from the app thread, so everything
30 * has to be thread-safe in the driver.
31 */
32static struct gl_buffer_object *
33new_upload_buffer(struct gl_context *ctx, GLsizeiptr size, uint8_t **ptr)
34{
35   assert(ctx->GLThread.SupportsBufferUploads);
36
37   struct gl_buffer_object *obj = ctx->Driver.NewBufferObject(ctx, -1);
38   if (!obj)
39      return NULL;
40
41   obj->Immutable = true;
42
43   if (!ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER, size, NULL,
44                               GL_WRITE_ONLY,
45                               GL_CLIENT_STORAGE_BIT | GL_MAP_WRITE_BIT,
46                               obj)) {
47      ctx->Driver.DeleteBuffer(ctx, obj);
48      return NULL;
49   }
50
51   *ptr = ctx->Driver.MapBufferRange(ctx, 0, size,
52                                     GL_MAP_WRITE_BIT |
53                                     GL_MAP_UNSYNCHRONIZED_BIT |
54                                     MESA_MAP_THREAD_SAFE_BIT,
55                                     obj, MAP_GLTHREAD);
56   if (!*ptr) {
57      ctx->Driver.DeleteBuffer(ctx, obj);
58      return NULL;
59   }
60
61   return obj;
62}
63
64void
65_mesa_glthread_upload(struct gl_context *ctx, const void *data,
66                      GLsizeiptr size, unsigned *out_offset,
67                      struct gl_buffer_object **out_buffer,
68                      uint8_t **out_ptr)
69{
70   struct glthread_state *glthread = &ctx->GLThread;
71   const unsigned default_size = 1024 * 1024;
72
73   if (unlikely(size > INT_MAX))
74      return;
75
76   /* The alignment was chosen arbitrarily. */
77   unsigned offset = align(glthread->upload_offset, 8);
78
79   /* Allocate a new buffer if needed. */
80   if (unlikely(!glthread->upload_buffer || offset + size > default_size)) {
81      /* If the size is greater than the buffer size, allocate a separate buffer
82       * just for this upload.
83       */
84      if (unlikely(size > default_size)) {
85         uint8_t *ptr;
86
87         assert(*out_buffer == NULL);
88         *out_buffer = new_upload_buffer(ctx, size, &ptr);
89         if (!*out_buffer)
90            return;
91
92         *out_offset = 0;
93         if (data)
94            memcpy(ptr, data, size);
95         else
96            *out_ptr = ptr;
97         return;
98      }
99
100      if (glthread->upload_buffer_private_refcount > 0) {
101         p_atomic_add(&glthread->upload_buffer->RefCount,
102                      -glthread->upload_buffer_private_refcount);
103         glthread->upload_buffer_private_refcount = 0;
104      }
105      _mesa_reference_buffer_object(ctx, &glthread->upload_buffer, NULL);
106      glthread->upload_buffer =
107         new_upload_buffer(ctx, default_size, &glthread->upload_ptr);
108      glthread->upload_offset = 0;
109      offset = 0;
110
111      /* Since atomic operations are very very slow when 2 threads are not
112       * sharing one L3 cache (which can happen on AMD Zen), prevent using
113       * atomics as follows:
114       *
115       * This function has to return a buffer reference to the caller.
116       * Instead of atomic_inc for every call, it does all possible future
117       * increments in advance when the upload buffer is allocated.
118       * The maximum number of times the function can be called per upload
119       * buffer is default_size, because the minimum allocation size is 1.
120       * Therefore the function can only return default_size number of
121       * references at most, so we will never need more. This is the number
122       * that is added to RefCount at allocation.
123       *
124       * upload_buffer_private_refcount tracks how many buffer references
125       * are left to return to callers. If the buffer is full and there are
126       * still references left, they are atomically subtracted from RefCount
127       * before the buffer is unreferenced.
128       *
129       * This can increase performance by 20%.
130       */
131      glthread->upload_buffer->RefCount += default_size;
132      glthread->upload_buffer_private_refcount = default_size;
133   }
134
135   /* Upload data. */
136   if (data)
137      memcpy(glthread->upload_ptr + offset, data, size);
138   else
139      *out_ptr = glthread->upload_ptr + offset;
140
141   glthread->upload_offset = offset + size;
142   *out_offset = offset;
143
144   assert(*out_buffer == NULL);
145   assert(glthread->upload_buffer_private_refcount > 0);
146   *out_buffer = glthread->upload_buffer;
147   glthread->upload_buffer_private_refcount--;
148}
149
150/** Tracks the current bindings for the vertex array and index array buffers.
151 *
152 * This is part of what we need to enable glthread on compat-GL contexts that
153 * happen to use VBOs, without also supporting the full tracking of VBO vs
154 * user vertex array bindings per attribute on each vertex array for
155 * determining what to upload at draw call time.
156 *
157 * Note that GL core makes it so that a buffer binding with an invalid handle
158 * in the "buffer" parameter will throw an error, and then a
159 * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
160 * However, in GL core the draw call would throw an error as well, so we don't
161 * really care if our tracking is wrong for this case -- we never need to
162 * marshal user data for draw calls, and the unmarshal will just generate an
163 * error or not as appropriate.
164 *
165 * For compatibility GL, we do need to accurately know whether the draw call
166 * on the unmarshal side will dereference a user pointer or load data from a
167 * VBO per vertex.  That would make it seem like we need to track whether a
168 * "buffer" is valid, so that we can know when an error will be generated
169 * instead of updating the binding.  However, compat GL has the ridiculous
170 * feature that if you pass a bad name, it just gens a buffer object for you,
171 * so we escape without having to know if things are valid or not.
172 */
173void
174_mesa_glthread_BindBuffer(struct gl_context *ctx, GLenum target, GLuint buffer)
175{
176   struct glthread_state *glthread = &ctx->GLThread;
177
178   switch (target) {
179   case GL_ARRAY_BUFFER:
180      glthread->CurrentArrayBufferName = buffer;
181      break;
182   case GL_ELEMENT_ARRAY_BUFFER:
183      /* The current element array buffer binding is actually tracked in the
184       * vertex array object instead of the context, so this would need to
185       * change on vertex array object updates.
186       */
187      glthread->CurrentVAO->CurrentElementBufferName = buffer;
188      break;
189   case GL_DRAW_INDIRECT_BUFFER:
190      glthread->CurrentDrawIndirectBufferName = buffer;
191      break;
192   case GL_PIXEL_PACK_BUFFER:
193      glthread->CurrentPixelPackBufferName = buffer;
194      break;
195   case GL_PIXEL_UNPACK_BUFFER:
196      glthread->CurrentPixelUnpackBufferName = buffer;
197      break;
198   }
199}
200
201void
202_mesa_glthread_DeleteBuffers(struct gl_context *ctx, GLsizei n,
203                             const GLuint *buffers)
204{
205   struct glthread_state *glthread = &ctx->GLThread;
206
207   if (!buffers)
208      return;
209
210   for (unsigned i = 0; i < n; i++) {
211      GLuint id = buffers[i];
212
213      if (id == glthread->CurrentArrayBufferName)
214         _mesa_glthread_BindBuffer(ctx, GL_ARRAY_BUFFER, 0);
215      if (id == glthread->CurrentVAO->CurrentElementBufferName)
216         _mesa_glthread_BindBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, 0);
217      if (id == glthread->CurrentDrawIndirectBufferName)
218         _mesa_glthread_BindBuffer(ctx, GL_DRAW_INDIRECT_BUFFER, 0);
219      if (id == glthread->CurrentPixelPackBufferName)
220         _mesa_glthread_BindBuffer(ctx, GL_PIXEL_PACK_BUFFER, 0);
221      if (id == glthread->CurrentPixelUnpackBufferName)
222         _mesa_glthread_BindBuffer(ctx, GL_PIXEL_UNPACK_BUFFER, 0);
223   }
224}
225
226/* BufferData: marshalled asynchronously */
227struct marshal_cmd_BufferData
228{
229   struct marshal_cmd_base cmd_base;
230   GLuint target_or_name;
231   GLsizeiptr size;
232   GLenum usage;
233   const GLvoid *data_external_mem;
234   bool data_null; /* If set, no data follows for "data" */
235   bool named;
236   bool ext_dsa;
237   /* Next size bytes are GLubyte data[size] */
238};
239
240uint32_t
241_mesa_unmarshal_BufferData(struct gl_context *ctx,
242                           const struct marshal_cmd_BufferData *cmd,
243                           const uint64_t *last)
244{
245   const GLuint target_or_name = cmd->target_or_name;
246   const GLsizei size = cmd->size;
247   const GLenum usage = cmd->usage;
248   const void *data;
249
250   if (cmd->data_null)
251      data = NULL;
252   else if (!cmd->named && target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD)
253      data = cmd->data_external_mem;
254   else
255      data = (const void *) (cmd + 1);
256
257   if (cmd->ext_dsa) {
258      CALL_NamedBufferDataEXT(ctx->CurrentServerDispatch,
259                              (target_or_name, size, data, usage));
260   } else if (cmd->named) {
261      CALL_NamedBufferData(ctx->CurrentServerDispatch,
262                           (target_or_name, size, data, usage));
263   } else {
264      CALL_BufferData(ctx->CurrentServerDispatch,
265                      (target_or_name, size, data, usage));
266   }
267   return cmd->cmd_base.cmd_size;
268}
269
270uint32_t
271_mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
272                                const struct marshal_cmd_NamedBufferData *cmd,
273                                const uint64_t *last)
274{
275   unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
276   return 0;
277}
278
279uint32_t
280_mesa_unmarshal_NamedBufferDataEXT(struct gl_context *ctx,
281                                   const struct marshal_cmd_NamedBufferDataEXT *cmd,
282                                   const uint64_t *last)
283{
284   unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
285   return 0;
286}
287
288static void
289_mesa_marshal_BufferData_merged(GLuint target_or_name, GLsizeiptr size,
290                                const GLvoid *data, GLenum usage, bool named,
291                                bool ext_dsa, const char *func)
292{
293   GET_CURRENT_CONTEXT(ctx);
294   bool external_mem = !named &&
295                       target_or_name == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD;
296   bool copy_data = data && !external_mem;
297   size_t cmd_size = sizeof(struct marshal_cmd_BufferData) + (copy_data ? size : 0);
298
299   if (unlikely(size < 0 || size > INT_MAX || cmd_size > MARSHAL_MAX_CMD_SIZE ||
300                (named && target_or_name == 0))) {
301      _mesa_glthread_finish_before(ctx, func);
302      if (named) {
303         CALL_NamedBufferData(ctx->CurrentServerDispatch,
304                              (target_or_name, size, data, usage));
305      } else {
306         CALL_BufferData(ctx->CurrentServerDispatch,
307                         (target_or_name, size, data, usage));
308      }
309      return;
310   }
311
312   struct marshal_cmd_BufferData *cmd =
313      _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferData,
314                                      cmd_size);
315
316   cmd->target_or_name = target_or_name;
317   cmd->size = size;
318   cmd->usage = usage;
319   cmd->data_null = !data;
320   cmd->named = named;
321   cmd->ext_dsa = ext_dsa;
322   cmd->data_external_mem = data;
323
324   if (copy_data) {
325      char *variable_data = (char *) (cmd + 1);
326      memcpy(variable_data, data, size);
327   }
328}
329
330void GLAPIENTRY
331_mesa_marshal_BufferData(GLenum target, GLsizeiptr size, const GLvoid * data,
332                         GLenum usage)
333{
334   _mesa_marshal_BufferData_merged(target, size, data, usage, false, false,
335                                   "BufferData");
336}
337
338void GLAPIENTRY
339_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
340                              const GLvoid * data, GLenum usage)
341{
342   _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, false,
343                                   "NamedBufferData");
344}
345
346void GLAPIENTRY
347_mesa_marshal_NamedBufferDataEXT(GLuint buffer, GLsizeiptr size,
348                                 const GLvoid *data, GLenum usage)
349{
350   _mesa_marshal_BufferData_merged(buffer, size, data, usage, true, true,
351                                   "NamedBufferDataEXT");
352}
353
354
355/* BufferSubData: marshalled asynchronously */
356struct marshal_cmd_BufferSubData
357{
358   struct marshal_cmd_base cmd_base;
359   GLenum target_or_name;
360   GLintptr offset;
361   GLsizeiptr size;
362   bool named;
363   bool ext_dsa;
364   /* Next size bytes are GLubyte data[size] */
365};
366
367uint32_t
368_mesa_unmarshal_BufferSubData(struct gl_context *ctx,
369                              const struct marshal_cmd_BufferSubData *cmd,
370                              const uint64_t *last)
371{
372   const GLenum target_or_name = cmd->target_or_name;
373   const GLintptr offset = cmd->offset;
374   const GLsizeiptr size = cmd->size;
375   const void *data = (const void *) (cmd + 1);
376
377   if (cmd->ext_dsa) {
378      CALL_NamedBufferSubDataEXT(ctx->CurrentServerDispatch,
379                                 (target_or_name, offset, size, data));
380   } else if (cmd->named) {
381      CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
382                              (target_or_name, offset, size, data));
383   } else {
384      CALL_BufferSubData(ctx->CurrentServerDispatch,
385                         (target_or_name, offset, size, data));
386   }
387   return cmd->cmd_base.cmd_size;
388}
389
390uint32_t
391_mesa_unmarshal_NamedBufferSubData(struct gl_context *ctx,
392                                   const struct marshal_cmd_NamedBufferSubData *cmd,
393                                   const uint64_t *last)
394{
395   unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
396   return 0;
397}
398
399uint32_t
400_mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context *ctx,
401                                      const struct marshal_cmd_NamedBufferSubDataEXT *cmd,
402                                      const uint64_t *last)
403{
404   unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
405   return 0;
406}
407
408static void
409_mesa_marshal_BufferSubData_merged(GLuint target_or_name, GLintptr offset,
410                                   GLsizeiptr size, const GLvoid *data,
411                                   bool named, bool ext_dsa, const char *func)
412{
413   GET_CURRENT_CONTEXT(ctx);
414   size_t cmd_size = sizeof(struct marshal_cmd_BufferSubData) + size;
415
416   /* Fast path: Copy the data to an upload buffer, and use the GPU
417    * to copy the uploaded data to the destination buffer.
418    */
419   /* TODO: Handle offset == 0 && size < buffer_size.
420    *       If offset == 0 and size == buffer_size, it's better to discard
421    *       the buffer storage, but we don't know the buffer size in glthread.
422    */
423   if (ctx->GLThread.SupportsBufferUploads &&
424       data && offset > 0 && size > 0) {
425      struct gl_buffer_object *upload_buffer = NULL;
426      unsigned upload_offset = 0;
427
428      _mesa_glthread_upload(ctx, data, size, &upload_offset, &upload_buffer,
429                            NULL);
430
431      if (upload_buffer) {
432         _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr)upload_buffer,
433                                                     upload_offset,
434                                                     target_or_name,
435                                                     offset, size, named,
436                                                     ext_dsa);
437         return;
438      }
439   }
440
441   if (unlikely(size < 0 || size > INT_MAX || cmd_size < 0 ||
442                cmd_size > MARSHAL_MAX_CMD_SIZE || !data ||
443                (named && target_or_name == 0))) {
444      _mesa_glthread_finish_before(ctx, func);
445      if (named) {
446         CALL_NamedBufferSubData(ctx->CurrentServerDispatch,
447                                 (target_or_name, offset, size, data));
448      } else {
449         CALL_BufferSubData(ctx->CurrentServerDispatch,
450                            (target_or_name, offset, size, data));
451      }
452      return;
453   }
454
455   struct marshal_cmd_BufferSubData *cmd =
456      _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_BufferSubData,
457                                      cmd_size);
458   cmd->target_or_name = target_or_name;
459   cmd->offset = offset;
460   cmd->size = size;
461   cmd->named = named;
462   cmd->ext_dsa = ext_dsa;
463
464   char *variable_data = (char *) (cmd + 1);
465   memcpy(variable_data, data, size);
466}
467
468void GLAPIENTRY
469_mesa_marshal_BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
470                            const GLvoid * data)
471{
472   _mesa_marshal_BufferSubData_merged(target, offset, size, data, false,
473                                      false, "BufferSubData");
474}
475
476void GLAPIENTRY
477_mesa_marshal_NamedBufferSubData(GLuint buffer, GLintptr offset,
478                                 GLsizeiptr size, const GLvoid * data)
479{
480   _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
481                                      false, "NamedBufferSubData");
482}
483
484void GLAPIENTRY
485_mesa_marshal_NamedBufferSubDataEXT(GLuint buffer, GLintptr offset,
486                                    GLsizeiptr size, const GLvoid * data)
487{
488   _mesa_marshal_BufferSubData_merged(buffer, offset, size, data, true,
489                                      true, "NamedBufferSubDataEXT");
490}
491