1/**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#include "util/u_threaded_context.h"
28#include "util/u_cpu_detect.h"
29#include "util/format/u_format.h"
30#include "util/u_inlines.h"
31#include "util/u_memory.h"
32#include "util/u_upload_mgr.h"
33#include "driver_trace/tr_context.h"
34#include "util/log.h"
35#include "compiler/shader_info.h"
36
37#if TC_DEBUG >= 1
38#define tc_assert assert
39#else
40#define tc_assert(x)
41#endif
42
43#if TC_DEBUG >= 2
44#define tc_printf mesa_logi
45#define tc_asprintf asprintf
46#define tc_strcmp strcmp
47#else
48#define tc_printf(...)
49#define tc_asprintf(...) 0
50#define tc_strcmp(...) 0
51#endif
52
53#define TC_SENTINEL 0x5ca1ab1e
54
55enum tc_call_id {
56#define CALL(name) TC_CALL_##name,
57#include "u_threaded_context_calls.h"
58#undef CALL
59   TC_NUM_CALLS,
60};
61
62#if TC_DEBUG >= 3
63static const char *tc_call_names[] = {
64#define CALL(name) #name,
65#include "u_threaded_context_calls.h"
66#undef CALL
67};
68#endif
69
70typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71
72static const tc_execute execute_func[TC_NUM_CALLS];
73
74static void
75tc_batch_check(UNUSED struct tc_batch *batch)
76{
77   tc_assert(batch->sentinel == TC_SENTINEL);
78   tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
79}
80
81static void
82tc_debug_check(struct threaded_context *tc)
83{
84   for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
85      tc_batch_check(&tc->batch_slots[i]);
86      tc_assert(tc->batch_slots[i].tc == tc);
87   }
88}
89
90static void
91tc_set_driver_thread(struct threaded_context *tc)
92{
93#ifndef NDEBUG
94   tc->driver_thread = util_get_thread_id();
95#endif
96}
97
98static void
99tc_clear_driver_thread(struct threaded_context *tc)
100{
101#ifndef NDEBUG
102   memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
103#endif
104}
105
106static void *
107to_call_check(void *ptr, unsigned num_slots)
108{
109#if TC_DEBUG >= 1
110   struct tc_call_base *call = ptr;
111   tc_assert(call->num_slots == num_slots);
112#endif
113   return ptr;
114}
115#define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
116
117#define size_to_slots(size)      DIV_ROUND_UP(size, 8)
118#define call_size(type)          size_to_slots(sizeof(struct type))
119#define call_size_with_slots(type, num_slots) size_to_slots( \
120   sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
121#define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
122
123/* Assign src to dst while dst is uninitialized. */
124static inline void
125tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
126{
127   *dst = src;
128   pipe_reference(NULL, &src->reference); /* only increment refcount */
129}
130
131/* Assign src to dst while dst is uninitialized. */
132static inline void
133tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
134                              struct pipe_vertex_state *src)
135{
136   *dst = src;
137   pipe_reference(NULL, &src->reference); /* only increment refcount */
138}
139
140/* Unreference dst but don't touch the dst pointer. */
141static inline void
142tc_drop_resource_reference(struct pipe_resource *dst)
143{
144   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
145      pipe_resource_destroy(dst);
146}
147
148/* Unreference dst but don't touch the dst pointer. */
149static inline void
150tc_drop_surface_reference(struct pipe_surface *dst)
151{
152   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
153      dst->context->surface_destroy(dst->context, dst);
154}
155
156/* Unreference dst but don't touch the dst pointer. */
157static inline void
158tc_drop_sampler_view_reference(struct pipe_sampler_view *dst)
159{
160   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
161      dst->context->sampler_view_destroy(dst->context, dst);
162}
163
164/* Unreference dst but don't touch the dst pointer. */
165static inline void
166tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
167{
168   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
169      dst->context->stream_output_target_destroy(dst->context, dst);
170}
171
172/**
173 * Subtract the given number of references.
174 */
175static inline void
176tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
177{
178   int count = p_atomic_add_return(&dst->reference.count, -num_refs);
179
180   assert(count >= 0);
181   /* Underflows shouldn't happen, but let's be safe. */
182   if (count <= 0)
183      dst->screen->vertex_state_destroy(dst->screen, dst);
184}
185
186/* We don't want to read or write min_index and max_index, because
187 * it shouldn't be needed by drivers at this point.
188 */
189#define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
190   offsetof(struct pipe_draw_info, min_index)
191
192static void
193tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
194{
195   struct tc_batch *batch = job;
196   struct pipe_context *pipe = batch->tc->pipe;
197   uint64_t *last = &batch->slots[batch->num_total_slots];
198
199   tc_batch_check(batch);
200   tc_set_driver_thread(batch->tc);
201
202   assert(!batch->token);
203
204   for (uint64_t *iter = batch->slots; iter != last;) {
205      struct tc_call_base *call = (struct tc_call_base *)iter;
206
207      tc_assert(call->sentinel == TC_SENTINEL);
208
209#if TC_DEBUG >= 3
210      tc_printf("CALL: %s", tc_call_names[call->call_id]);
211#endif
212
213      iter += execute_func[call->call_id](pipe, call, last);
214   }
215
216   /* Add the fence to the list of fences for the driver to signal at the next
217    * flush, which we use for tracking which buffers are referenced by
218    * an unflushed command buffer.
219    */
220   struct threaded_context *tc = batch->tc;
221   struct util_queue_fence *fence =
222      &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
223
224   if (tc->options.driver_calls_flush_notify) {
225      tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
226
227      /* Since our buffer lists are chained as a ring, we need to flush
228       * the context twice as we go around the ring to make the driver signal
229       * the buffer list fences, so that the producer thread can reuse the buffer
230       * list structures for the next batches without waiting.
231       */
232      unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
233      if (batch->buffer_list_index % half_ring == half_ring - 1)
234         pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
235   } else {
236      util_queue_fence_signal(fence);
237   }
238
239   tc_clear_driver_thread(batch->tc);
240   tc_batch_check(batch);
241   batch->num_total_slots = 0;
242}
243
244static void
245tc_begin_next_buffer_list(struct threaded_context *tc)
246{
247   tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
248
249   tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
250
251   /* Clear the buffer list in the new empty batch. */
252   struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
253   assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
254   util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
255   BITSET_ZERO(buf_list->buffer_list);
256
257   tc->add_all_gfx_bindings_to_buffer_list = true;
258   tc->add_all_compute_bindings_to_buffer_list = true;
259}
260
261static void
262tc_batch_flush(struct threaded_context *tc)
263{
264   struct tc_batch *next = &tc->batch_slots[tc->next];
265
266   tc_assert(next->num_total_slots != 0);
267   tc_batch_check(next);
268   tc_debug_check(tc);
269   tc->bytes_mapped_estimate = 0;
270   p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
271
272   if (next->token) {
273      next->token->tc = NULL;
274      tc_unflushed_batch_token_reference(&next->token, NULL);
275   }
276
277   util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
278                      NULL, 0);
279   tc->last = tc->next;
280   tc->next = (tc->next + 1) % TC_MAX_BATCHES;
281   tc_begin_next_buffer_list(tc);
282}
283
284/* This is the function that adds variable-sized calls into the current
285 * batch. It also flushes the batch if there is not enough space there.
286 * All other higher-level "add" functions use it.
287 */
288static void *
289tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
290                  unsigned num_slots)
291{
292   struct tc_batch *next = &tc->batch_slots[tc->next];
293   assert(num_slots <= TC_SLOTS_PER_BATCH);
294   tc_debug_check(tc);
295
296   if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
297      tc_batch_flush(tc);
298      next = &tc->batch_slots[tc->next];
299      tc_assert(next->num_total_slots == 0);
300   }
301
302   tc_assert(util_queue_fence_is_signalled(&next->fence));
303
304   struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
305   next->num_total_slots += num_slots;
306
307#if !defined(NDEBUG) && TC_DEBUG >= 1
308   call->sentinel = TC_SENTINEL;
309#endif
310   call->call_id = id;
311   call->num_slots = num_slots;
312
313#if TC_DEBUG >= 3
314   tc_printf("ENQUEUE: %s", tc_call_names[id]);
315#endif
316
317   tc_debug_check(tc);
318   return call;
319}
320
321#define tc_add_call(tc, execute, type) \
322   ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
323
324#define tc_add_slot_based_call(tc, execute, type, num_slots) \
325   ((struct type*)tc_add_sized_call(tc, execute, \
326                                    call_size_with_slots(type, num_slots)))
327
328static bool
329tc_is_sync(struct threaded_context *tc)
330{
331   struct tc_batch *last = &tc->batch_slots[tc->last];
332   struct tc_batch *next = &tc->batch_slots[tc->next];
333
334   return util_queue_fence_is_signalled(&last->fence) &&
335          !next->num_total_slots;
336}
337
338static void
339_tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
340{
341   struct tc_batch *last = &tc->batch_slots[tc->last];
342   struct tc_batch *next = &tc->batch_slots[tc->next];
343   bool synced = false;
344
345   tc_debug_check(tc);
346
347   /* Only wait for queued calls... */
348   if (!util_queue_fence_is_signalled(&last->fence)) {
349      util_queue_fence_wait(&last->fence);
350      synced = true;
351   }
352
353   tc_debug_check(tc);
354
355   if (next->token) {
356      next->token->tc = NULL;
357      tc_unflushed_batch_token_reference(&next->token, NULL);
358   }
359
360   /* .. and execute unflushed calls directly. */
361   if (next->num_total_slots) {
362      p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
363      tc->bytes_mapped_estimate = 0;
364      tc_batch_execute(next, NULL, 0);
365      tc_begin_next_buffer_list(tc);
366      synced = true;
367   }
368
369   if (synced) {
370      p_atomic_inc(&tc->num_syncs);
371
372      if (tc_strcmp(func, "tc_destroy") != 0) {
373         tc_printf("sync %s %s", func, info);
374	  }
375   }
376
377   tc_debug_check(tc);
378}
379
380#define tc_sync(tc) _tc_sync(tc, "", __func__)
381#define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
382
383/**
384 * Call this from fence_finish for same-context fence waits of deferred fences
385 * that haven't been flushed yet.
386 *
387 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
388 * i.e., the wrapped one.
389 */
390void
391threaded_context_flush(struct pipe_context *_pipe,
392                       struct tc_unflushed_batch_token *token,
393                       bool prefer_async)
394{
395   struct threaded_context *tc = threaded_context(_pipe);
396
397   /* This is called from the gallium frontend / application thread. */
398   if (token->tc && token->tc == tc) {
399      struct tc_batch *last = &tc->batch_slots[tc->last];
400
401      /* Prefer to do the flush in the driver thread if it is already
402       * running. That should be better for cache locality.
403       */
404      if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
405         tc_batch_flush(tc);
406      else
407         tc_sync(token->tc);
408   }
409}
410
411static void
412tc_add_to_buffer_list(struct tc_buffer_list *next, struct pipe_resource *buf)
413{
414   uint32_t id = threaded_resource(buf)->buffer_id_unique;
415   BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
416}
417
418/* Set a buffer binding and add it to the buffer list. */
419static void
420tc_bind_buffer(uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
421{
422   uint32_t id = threaded_resource(buf)->buffer_id_unique;
423   *binding = id;
424   BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
425}
426
427/* Reset a buffer binding. */
428static void
429tc_unbind_buffer(uint32_t *binding)
430{
431   *binding = 0;
432}
433
434/* Reset a range of buffer binding slots. */
435static void
436tc_unbind_buffers(uint32_t *binding, unsigned count)
437{
438   if (count)
439      memset(binding, 0, sizeof(*binding) * count);
440}
441
442static void
443tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
444                               unsigned count)
445{
446   for (unsigned i = 0; i < count; i++) {
447      if (bindings[i])
448         BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
449   }
450}
451
452static bool
453tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
454                   unsigned count)
455{
456   unsigned rebind_count = 0;
457
458   for (unsigned i = 0; i < count; i++) {
459      if (bindings[i] == old_id) {
460         bindings[i] = new_id;
461         rebind_count++;
462      }
463   }
464   return rebind_count;
465}
466
467static void
468tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
469                                      BITSET_WORD *buffer_list,
470                                      enum pipe_shader_type shader)
471{
472   tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
473                                  tc->max_const_buffers);
474   if (tc->seen_shader_buffers[shader]) {
475      tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
476                                     tc->max_shader_buffers);
477   }
478   if (tc->seen_image_buffers[shader]) {
479      tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
480                                     tc->max_images);
481   }
482   if (tc->seen_sampler_buffers[shader]) {
483      tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
484                                     tc->max_samplers);
485   }
486}
487
488static unsigned
489tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
490                          uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
491{
492   unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
493
494   ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
495                            tc->max_const_buffers);
496   if (ubo)
497      *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
498   if (tc->seen_shader_buffers[shader]) {
499      ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
500                                tc->max_shader_buffers);
501      if (ssbo)
502         *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
503   }
504   if (tc->seen_image_buffers[shader]) {
505      img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
506                               tc->max_images);
507      if (img)
508         *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
509   }
510   if (tc->seen_sampler_buffers[shader]) {
511      sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
512                                   tc->max_samplers);
513      if (sampler)
514         *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
515   }
516   return ubo + ssbo + img + sampler;
517}
518
519/* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
520 * This is called by the first draw call in a batch when we want to inherit
521 * all bindings set by the previous batch.
522 */
523static void
524tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
525{
526   BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
527
528   tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
529   if (tc->seen_streamout_buffers)
530      tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
531
532   tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
533   tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
534
535   if (tc->seen_tcs)
536      tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
537   if (tc->seen_tes)
538      tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
539   if (tc->seen_gs)
540      tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
541
542   tc->add_all_gfx_bindings_to_buffer_list = false;
543}
544
545/* Add all bound buffers used by compute to the buffer list.
546 * This is called by the first compute call in a batch when we want to inherit
547 * all bindings set by the previous batch.
548 */
549static void
550tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
551{
552   BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
553
554   tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
555   tc->add_all_compute_bindings_to_buffer_list = false;
556}
557
558static unsigned
559tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
560{
561   unsigned vbo = 0, so = 0;
562
563   vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
564                            tc->max_vertex_buffers);
565   if (vbo)
566      *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
567
568   if (tc->seen_streamout_buffers) {
569      so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
570                              PIPE_MAX_SO_BUFFERS);
571      if (so)
572         *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
573   }
574   unsigned rebound = vbo + so;
575
576   rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
577   rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
578
579   if (tc->seen_tcs)
580      rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
581   if (tc->seen_tes)
582      rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
583   if (tc->seen_gs)
584      rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
585
586   rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
587
588   if (rebound)
589      BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
590   return rebound;
591}
592
593static bool
594tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
595{
596   while (binding_mask) {
597      if (bindings[u_bit_scan(&binding_mask)] == id)
598         return true;
599   }
600   return false;
601}
602
603static bool
604tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
605                                    enum pipe_shader_type shader)
606{
607   if (tc->seen_shader_buffers[shader] &&
608       tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
609                                    tc->shader_buffers_writeable_mask[shader]))
610      return true;
611
612   if (tc->seen_image_buffers[shader] &&
613       tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
614                                    tc->image_buffers_writeable_mask[shader]))
615      return true;
616
617   return false;
618}
619
620static bool
621tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
622{
623   if (tc->seen_streamout_buffers &&
624       tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
625                                    BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
626      return true;
627
628   if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
629       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
630       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
631      return true;
632
633   if (tc->seen_tcs &&
634       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
635      return true;
636
637   if (tc->seen_tes &&
638       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
639      return true;
640
641   if (tc->seen_gs &&
642       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
643      return true;
644
645   return false;
646}
647
648static bool
649tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
650                  unsigned map_usage)
651{
652   if (!tc->options.is_resource_busy)
653      return true;
654
655   uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
656
657   for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
658      struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
659
660      /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
661       * then the buffer is considered busy. */
662      if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
663          BITSET_TEST(buf_list->buffer_list, id_hash))
664         return true;
665   }
666
667   /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
668    * this buffer is busy or not. */
669   return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
670}
671
672void
673threaded_resource_init(struct pipe_resource *res)
674{
675   struct threaded_resource *tres = threaded_resource(res);
676
677   tres->latest = &tres->b;
678   util_range_init(&tres->valid_buffer_range);
679   tres->is_shared = false;
680   tres->is_user_ptr = false;
681   tres->buffer_id_unique = 0;
682   tres->pending_staging_uploads = 0;
683   util_range_init(&tres->pending_staging_uploads_range);
684}
685
686void
687threaded_resource_deinit(struct pipe_resource *res)
688{
689   struct threaded_resource *tres = threaded_resource(res);
690
691   if (tres->latest != &tres->b)
692           pipe_resource_reference(&tres->latest, NULL);
693   util_range_destroy(&tres->valid_buffer_range);
694   util_range_destroy(&tres->pending_staging_uploads_range);
695}
696
697struct pipe_context *
698threaded_context_unwrap_sync(struct pipe_context *pipe)
699{
700   if (!pipe || !pipe->priv)
701      return pipe;
702
703   tc_sync(threaded_context(pipe));
704   return (struct pipe_context*)pipe->priv;
705}
706
707
708/********************************************************************
709 * simple functions
710 */
711
712#define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
713   struct tc_call_##func { \
714      struct tc_call_base base; \
715      type state; \
716   }; \
717   \
718   static uint16_t \
719   tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
720   { \
721      pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
722      return call_size(tc_call_##func); \
723   } \
724   \
725   static void \
726   tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
727   { \
728      struct threaded_context *tc = threaded_context(_pipe); \
729      struct tc_call_##func *p = (struct tc_call_##func*) \
730                     tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
731      p->state = deref(param); \
732      __VA_ARGS__; \
733   }
734
735TC_FUNC1(set_active_query_state, , bool, , )
736
737TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
738TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
739TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
740TC_FUNC1(set_sample_mask, , unsigned, , )
741TC_FUNC1(set_min_samples, , unsigned, , )
742TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
743
744TC_FUNC1(texture_barrier, , unsigned, , )
745TC_FUNC1(memory_barrier, , unsigned, , )
746TC_FUNC1(delete_texture_handle, , uint64_t, , )
747TC_FUNC1(delete_image_handle, , uint64_t, , )
748TC_FUNC1(set_frontend_noop, , bool, , )
749
750
751/********************************************************************
752 * queries
753 */
754
755static struct pipe_query *
756tc_create_query(struct pipe_context *_pipe, unsigned query_type,
757                unsigned index)
758{
759   struct threaded_context *tc = threaded_context(_pipe);
760   struct pipe_context *pipe = tc->pipe;
761
762   return pipe->create_query(pipe, query_type, index);
763}
764
765static struct pipe_query *
766tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
767                      unsigned *query_types)
768{
769   struct threaded_context *tc = threaded_context(_pipe);
770   struct pipe_context *pipe = tc->pipe;
771
772   return pipe->create_batch_query(pipe, num_queries, query_types);
773}
774
775struct tc_query_call {
776   struct tc_call_base base;
777   struct pipe_query *query;
778};
779
780static uint16_t
781tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
782{
783   struct pipe_query *query = to_call(call, tc_query_call)->query;
784   struct threaded_query *tq = threaded_query(query);
785
786   if (list_is_linked(&tq->head_unflushed))
787      list_del(&tq->head_unflushed);
788
789   pipe->destroy_query(pipe, query);
790   return call_size(tc_query_call);
791}
792
793static void
794tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
795{
796   struct threaded_context *tc = threaded_context(_pipe);
797
798   tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
799}
800
801static uint16_t
802tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
803{
804   pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
805   return call_size(tc_query_call);
806}
807
808static bool
809tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
810{
811   struct threaded_context *tc = threaded_context(_pipe);
812
813   tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
814   return true; /* we don't care about the return value for this call */
815}
816
817struct tc_end_query_call {
818   struct tc_call_base base;
819   struct threaded_context *tc;
820   struct pipe_query *query;
821};
822
823static uint16_t
824tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
825{
826   struct tc_end_query_call *p = to_call(call, tc_end_query_call);
827   struct threaded_query *tq = threaded_query(p->query);
828
829   if (!list_is_linked(&tq->head_unflushed))
830      list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
831
832   pipe->end_query(pipe, p->query);
833   return call_size(tc_end_query_call);
834}
835
836static bool
837tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
838{
839   struct threaded_context *tc = threaded_context(_pipe);
840   struct threaded_query *tq = threaded_query(query);
841   struct tc_end_query_call *call =
842      tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
843
844   call->tc = tc;
845   call->query = query;
846
847   tq->flushed = false;
848
849   return true; /* we don't care about the return value for this call */
850}
851
852static bool
853tc_get_query_result(struct pipe_context *_pipe,
854                    struct pipe_query *query, bool wait,
855                    union pipe_query_result *result)
856{
857   struct threaded_context *tc = threaded_context(_pipe);
858   struct threaded_query *tq = threaded_query(query);
859   struct pipe_context *pipe = tc->pipe;
860   bool flushed = tq->flushed;
861
862   if (!flushed) {
863      tc_sync_msg(tc, wait ? "wait" : "nowait");
864      tc_set_driver_thread(tc);
865   }
866
867   bool success = pipe->get_query_result(pipe, query, wait, result);
868
869   if (!flushed)
870      tc_clear_driver_thread(tc);
871
872   if (success) {
873      tq->flushed = true;
874      if (list_is_linked(&tq->head_unflushed)) {
875         /* This is safe because it can only happen after we sync'd. */
876         list_del(&tq->head_unflushed);
877      }
878   }
879   return success;
880}
881
882struct tc_query_result_resource {
883   struct tc_call_base base;
884   bool wait;
885   enum pipe_query_value_type result_type:8;
886   int8_t index; /* it can be -1 */
887   unsigned offset;
888   struct pipe_query *query;
889   struct pipe_resource *resource;
890};
891
892static uint16_t
893tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
894{
895   struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
896
897   pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
898                                   p->index, p->resource, p->offset);
899   tc_drop_resource_reference(p->resource);
900   return call_size(tc_query_result_resource);
901}
902
903static void
904tc_get_query_result_resource(struct pipe_context *_pipe,
905                             struct pipe_query *query, bool wait,
906                             enum pipe_query_value_type result_type, int index,
907                             struct pipe_resource *resource, unsigned offset)
908{
909   struct threaded_context *tc = threaded_context(_pipe);
910   struct tc_query_result_resource *p =
911      tc_add_call(tc, TC_CALL_get_query_result_resource,
912                  tc_query_result_resource);
913
914   p->query = query;
915   p->wait = wait;
916   p->result_type = result_type;
917   p->index = index;
918   tc_set_resource_reference(&p->resource, resource);
919   tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
920   p->offset = offset;
921}
922
923struct tc_render_condition {
924   struct tc_call_base base;
925   bool condition;
926   unsigned mode;
927   struct pipe_query *query;
928};
929
930static uint16_t
931tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
932{
933   struct tc_render_condition *p = to_call(call, tc_render_condition);
934   pipe->render_condition(pipe, p->query, p->condition, p->mode);
935   return call_size(tc_render_condition);
936}
937
938static void
939tc_render_condition(struct pipe_context *_pipe,
940                    struct pipe_query *query, bool condition,
941                    enum pipe_render_cond_flag mode)
942{
943   struct threaded_context *tc = threaded_context(_pipe);
944   struct tc_render_condition *p =
945      tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
946
947   p->query = query;
948   p->condition = condition;
949   p->mode = mode;
950}
951
952
953/********************************************************************
954 * constant (immutable) states
955 */
956
957#define TC_CSO_CREATE(name, sname) \
958   static void * \
959   tc_create_##name##_state(struct pipe_context *_pipe, \
960                            const struct pipe_##sname##_state *state) \
961   { \
962      struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
963      return pipe->create_##name##_state(pipe, state); \
964   }
965
966#define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
967#define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
968
969#define TC_CSO(name, sname, ...) \
970   TC_CSO_CREATE(name, sname) \
971   TC_CSO_BIND(name, ##__VA_ARGS__) \
972   TC_CSO_DELETE(name)
973
974#define TC_CSO_WHOLE(name) TC_CSO(name, name)
975#define TC_CSO_SHADER(name) TC_CSO(name, shader)
976#define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
977
978TC_CSO_WHOLE(blend)
979TC_CSO_WHOLE(rasterizer)
980TC_CSO_WHOLE(depth_stencil_alpha)
981TC_CSO_WHOLE(compute)
982TC_CSO_SHADER(fs)
983TC_CSO_SHADER(vs)
984TC_CSO_SHADER_TRACK(gs)
985TC_CSO_SHADER_TRACK(tcs)
986TC_CSO_SHADER_TRACK(tes)
987TC_CSO_CREATE(sampler, sampler)
988TC_CSO_DELETE(sampler)
989TC_CSO_BIND(vertex_elements)
990TC_CSO_DELETE(vertex_elements)
991
992static void *
993tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
994                                const struct pipe_vertex_element *elems)
995{
996   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
997
998   return pipe->create_vertex_elements_state(pipe, count, elems);
999}
1000
1001struct tc_sampler_states {
1002   struct tc_call_base base;
1003   ubyte shader, start, count;
1004   void *slot[0]; /* more will be allocated if needed */
1005};
1006
1007static uint16_t
1008tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1009{
1010   struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1011
1012   pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1013   return p->base.num_slots;
1014}
1015
1016static void
1017tc_bind_sampler_states(struct pipe_context *_pipe,
1018                       enum pipe_shader_type shader,
1019                       unsigned start, unsigned count, void **states)
1020{
1021   if (!count)
1022      return;
1023
1024   struct threaded_context *tc = threaded_context(_pipe);
1025   struct tc_sampler_states *p =
1026      tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1027
1028   p->shader = shader;
1029   p->start = start;
1030   p->count = count;
1031   memcpy(p->slot, states, count * sizeof(states[0]));
1032}
1033
1034
1035/********************************************************************
1036 * immediate states
1037 */
1038
1039struct tc_framebuffer {
1040   struct tc_call_base base;
1041   struct pipe_framebuffer_state state;
1042};
1043
1044static uint16_t
1045tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1046{
1047   struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1048
1049   pipe->set_framebuffer_state(pipe, p);
1050
1051   unsigned nr_cbufs = p->nr_cbufs;
1052   for (unsigned i = 0; i < nr_cbufs; i++)
1053      tc_drop_surface_reference(p->cbufs[i]);
1054   tc_drop_surface_reference(p->zsbuf);
1055   return call_size(tc_framebuffer);
1056}
1057
1058static void
1059tc_set_framebuffer_state(struct pipe_context *_pipe,
1060                         const struct pipe_framebuffer_state *fb)
1061{
1062   struct threaded_context *tc = threaded_context(_pipe);
1063   struct tc_framebuffer *p =
1064      tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1065   unsigned nr_cbufs = fb->nr_cbufs;
1066
1067   p->state.width = fb->width;
1068   p->state.height = fb->height;
1069   p->state.samples = fb->samples;
1070   p->state.layers = fb->layers;
1071   p->state.nr_cbufs = nr_cbufs;
1072
1073   for (unsigned i = 0; i < nr_cbufs; i++) {
1074      p->state.cbufs[i] = NULL;
1075      pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1076   }
1077   p->state.zsbuf = NULL;
1078   pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1079}
1080
1081struct tc_tess_state {
1082   struct tc_call_base base;
1083   float state[6];
1084};
1085
1086static uint16_t
1087tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1088{
1089   float *p = to_call(call, tc_tess_state)->state;
1090
1091   pipe->set_tess_state(pipe, p, p + 4);
1092   return call_size(tc_tess_state);
1093}
1094
1095static void
1096tc_set_tess_state(struct pipe_context *_pipe,
1097                  const float default_outer_level[4],
1098                  const float default_inner_level[2])
1099{
1100   struct threaded_context *tc = threaded_context(_pipe);
1101   float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1102
1103   memcpy(p, default_outer_level, 4 * sizeof(float));
1104   memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1105}
1106
1107struct tc_patch_vertices {
1108   struct tc_call_base base;
1109   ubyte patch_vertices;
1110};
1111
1112static uint16_t
1113tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1114{
1115   uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1116
1117   pipe->set_patch_vertices(pipe, patch_vertices);
1118   return call_size(tc_patch_vertices);
1119}
1120
1121static void
1122tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1123{
1124   struct threaded_context *tc = threaded_context(_pipe);
1125
1126   tc_add_call(tc, TC_CALL_set_patch_vertices,
1127               tc_patch_vertices)->patch_vertices = patch_vertices;
1128}
1129
1130struct tc_constant_buffer_base {
1131   struct tc_call_base base;
1132   ubyte shader, index;
1133   bool is_null;
1134};
1135
1136struct tc_constant_buffer {
1137   struct tc_constant_buffer_base base;
1138   struct pipe_constant_buffer cb;
1139};
1140
1141static uint16_t
1142tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1143{
1144   struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1145
1146   if (unlikely(p->base.is_null)) {
1147      pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1148      return call_size(tc_constant_buffer_base);
1149   }
1150
1151   pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1152   return call_size(tc_constant_buffer);
1153}
1154
1155static void
1156tc_set_constant_buffer(struct pipe_context *_pipe,
1157                       enum pipe_shader_type shader, uint index,
1158                       bool take_ownership,
1159                       const struct pipe_constant_buffer *cb)
1160{
1161   struct threaded_context *tc = threaded_context(_pipe);
1162
1163   if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1164      struct tc_constant_buffer_base *p =
1165         tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1166      p->shader = shader;
1167      p->index = index;
1168      p->is_null = true;
1169      tc_unbind_buffer(&tc->const_buffers[shader][index]);
1170      return;
1171   }
1172
1173   struct pipe_resource *buffer;
1174   unsigned offset;
1175
1176   if (cb->user_buffer) {
1177      /* This must be done before adding set_constant_buffer, because it could
1178       * generate e.g. transfer_unmap and flush partially-uninitialized
1179       * set_constant_buffer to the driver if it was done afterwards.
1180       */
1181      buffer = NULL;
1182      u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1183                    tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1184      u_upload_unmap(tc->base.const_uploader);
1185      take_ownership = true;
1186   } else {
1187      buffer = cb->buffer;
1188      offset = cb->buffer_offset;
1189   }
1190
1191   struct tc_constant_buffer *p =
1192      tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1193   p->base.shader = shader;
1194   p->base.index = index;
1195   p->base.is_null = false;
1196   p->cb.user_buffer = NULL;
1197   p->cb.buffer_offset = offset;
1198   p->cb.buffer_size = cb->buffer_size;
1199
1200   if (take_ownership)
1201      p->cb.buffer = buffer;
1202   else
1203      tc_set_resource_reference(&p->cb.buffer, buffer);
1204
1205   if (buffer) {
1206      tc_bind_buffer(&tc->const_buffers[shader][index],
1207                     &tc->buffer_lists[tc->next_buf_list], buffer);
1208   } else {
1209      tc_unbind_buffer(&tc->const_buffers[shader][index]);
1210   }
1211}
1212
1213struct tc_inlinable_constants {
1214   struct tc_call_base base;
1215   ubyte shader;
1216   ubyte num_values;
1217   uint32_t values[MAX_INLINABLE_UNIFORMS];
1218};
1219
1220static uint16_t
1221tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1222{
1223   struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1224
1225   pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1226   return call_size(tc_inlinable_constants);
1227}
1228
1229static void
1230tc_set_inlinable_constants(struct pipe_context *_pipe,
1231                           enum pipe_shader_type shader,
1232                           uint num_values, uint32_t *values)
1233{
1234   struct threaded_context *tc = threaded_context(_pipe);
1235   struct tc_inlinable_constants *p =
1236      tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1237   p->shader = shader;
1238   p->num_values = num_values;
1239   memcpy(p->values, values, num_values * 4);
1240}
1241
1242struct tc_sample_locations {
1243   struct tc_call_base base;
1244   uint16_t size;
1245   uint8_t slot[0];
1246};
1247
1248
1249static uint16_t
1250tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1251{
1252   struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1253
1254   pipe->set_sample_locations(pipe, p->size, p->slot);
1255   return p->base.num_slots;
1256}
1257
1258static void
1259tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1260{
1261   struct threaded_context *tc = threaded_context(_pipe);
1262   struct tc_sample_locations *p =
1263      tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1264                             tc_sample_locations, size);
1265
1266   p->size = size;
1267   memcpy(p->slot, locations, size);
1268}
1269
1270struct tc_scissors {
1271   struct tc_call_base base;
1272   ubyte start, count;
1273   struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1274};
1275
1276static uint16_t
1277tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1278{
1279   struct tc_scissors *p = (struct tc_scissors *)call;
1280
1281   pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1282   return p->base.num_slots;
1283}
1284
1285static void
1286tc_set_scissor_states(struct pipe_context *_pipe,
1287                      unsigned start, unsigned count,
1288                      const struct pipe_scissor_state *states)
1289{
1290   struct threaded_context *tc = threaded_context(_pipe);
1291   struct tc_scissors *p =
1292      tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1293
1294   p->start = start;
1295   p->count = count;
1296   memcpy(&p->slot, states, count * sizeof(states[0]));
1297}
1298
1299struct tc_viewports {
1300   struct tc_call_base base;
1301   ubyte start, count;
1302   struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1303};
1304
1305static uint16_t
1306tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1307{
1308   struct tc_viewports *p = (struct tc_viewports *)call;
1309
1310   pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1311   return p->base.num_slots;
1312}
1313
1314static void
1315tc_set_viewport_states(struct pipe_context *_pipe,
1316                       unsigned start, unsigned count,
1317                       const struct pipe_viewport_state *states)
1318{
1319   if (!count)
1320      return;
1321
1322   struct threaded_context *tc = threaded_context(_pipe);
1323   struct tc_viewports *p =
1324      tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1325
1326   p->start = start;
1327   p->count = count;
1328   memcpy(&p->slot, states, count * sizeof(states[0]));
1329}
1330
1331struct tc_window_rects {
1332   struct tc_call_base base;
1333   bool include;
1334   ubyte count;
1335   struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1336};
1337
1338static uint16_t
1339tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1340{
1341   struct tc_window_rects *p = (struct tc_window_rects *)call;
1342
1343   pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1344   return p->base.num_slots;
1345}
1346
1347static void
1348tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1349                         unsigned count,
1350                         const struct pipe_scissor_state *rects)
1351{
1352   struct threaded_context *tc = threaded_context(_pipe);
1353   struct tc_window_rects *p =
1354      tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1355
1356   p->include = include;
1357   p->count = count;
1358   memcpy(p->slot, rects, count * sizeof(rects[0]));
1359}
1360
1361struct tc_sampler_views {
1362   struct tc_call_base base;
1363   ubyte shader, start, count, unbind_num_trailing_slots;
1364   struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1365};
1366
1367static uint16_t
1368tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1369{
1370   struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1371
1372   pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1373                           p->unbind_num_trailing_slots, true, p->slot);
1374   return p->base.num_slots;
1375}
1376
1377static void
1378tc_set_sampler_views(struct pipe_context *_pipe,
1379                     enum pipe_shader_type shader,
1380                     unsigned start, unsigned count,
1381                     unsigned unbind_num_trailing_slots, bool take_ownership,
1382                     struct pipe_sampler_view **views)
1383{
1384   if (!count && !unbind_num_trailing_slots)
1385      return;
1386
1387   struct threaded_context *tc = threaded_context(_pipe);
1388   struct tc_sampler_views *p =
1389      tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1390                             views ? count : 0);
1391
1392   p->shader = shader;
1393   p->start = start;
1394
1395   if (views) {
1396      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1397
1398      p->count = count;
1399      p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1400
1401      if (take_ownership) {
1402         memcpy(p->slot, views, sizeof(*views) * count);
1403
1404         for (unsigned i = 0; i < count; i++) {
1405            if (views[i] && views[i]->target == PIPE_BUFFER) {
1406               tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1407                              views[i]->texture);
1408            } else {
1409               tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1410            }
1411         }
1412      } else {
1413         for (unsigned i = 0; i < count; i++) {
1414            p->slot[i] = NULL;
1415            pipe_sampler_view_reference(&p->slot[i], views[i]);
1416
1417            if (views[i] && views[i]->target == PIPE_BUFFER) {
1418               tc_bind_buffer(&tc->sampler_buffers[shader][start + i], next,
1419                              views[i]->texture);
1420            } else {
1421               tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1422            }
1423         }
1424      }
1425
1426      tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1427                        unbind_num_trailing_slots);
1428      tc->seen_sampler_buffers[shader] = true;
1429   } else {
1430      p->count = 0;
1431      p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1432
1433      tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1434                        count + unbind_num_trailing_slots);
1435   }
1436}
1437
1438struct tc_shader_images {
1439   struct tc_call_base base;
1440   ubyte shader, start, count;
1441   ubyte unbind_num_trailing_slots;
1442   struct pipe_image_view slot[0]; /* more will be allocated if needed */
1443};
1444
1445static uint16_t
1446tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1447{
1448   struct tc_shader_images *p = (struct tc_shader_images *)call;
1449   unsigned count = p->count;
1450
1451   if (!p->count) {
1452      pipe->set_shader_images(pipe, p->shader, p->start, 0,
1453                              p->unbind_num_trailing_slots, NULL);
1454      return call_size(tc_shader_images);
1455   }
1456
1457   pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1458                           p->unbind_num_trailing_slots, p->slot);
1459
1460   for (unsigned i = 0; i < count; i++)
1461      tc_drop_resource_reference(p->slot[i].resource);
1462
1463   return p->base.num_slots;
1464}
1465
1466static void
1467tc_set_shader_images(struct pipe_context *_pipe,
1468                     enum pipe_shader_type shader,
1469                     unsigned start, unsigned count,
1470                     unsigned unbind_num_trailing_slots,
1471                     const struct pipe_image_view *images)
1472{
1473   if (!count && !unbind_num_trailing_slots)
1474      return;
1475
1476   struct threaded_context *tc = threaded_context(_pipe);
1477   struct tc_shader_images *p =
1478      tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1479                             images ? count : 0);
1480   unsigned writable_buffers = 0;
1481
1482   p->shader = shader;
1483   p->start = start;
1484
1485   if (images) {
1486      p->count = count;
1487      p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1488
1489      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1490
1491      for (unsigned i = 0; i < count; i++) {
1492         struct pipe_resource *resource = images[i].resource;
1493
1494         tc_set_resource_reference(&p->slot[i].resource, resource);
1495
1496         if (resource && resource->target == PIPE_BUFFER) {
1497            tc_bind_buffer(&tc->image_buffers[shader][start + i], next, resource);
1498
1499            if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1500               struct threaded_resource *tres = threaded_resource(resource);
1501
1502               util_range_add(&tres->b, &tres->valid_buffer_range,
1503                              images[i].u.buf.offset,
1504                              images[i].u.buf.offset + images[i].u.buf.size);
1505               writable_buffers |= BITFIELD_BIT(start + i);
1506            }
1507         } else {
1508            tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1509         }
1510      }
1511      memcpy(p->slot, images, count * sizeof(images[0]));
1512
1513      tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1514                        unbind_num_trailing_slots);
1515      tc->seen_image_buffers[shader] = true;
1516   } else {
1517      p->count = 0;
1518      p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1519
1520      tc_unbind_buffers(&tc->image_buffers[shader][start],
1521                        count + unbind_num_trailing_slots);
1522   }
1523
1524   tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1525   tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1526}
1527
1528struct tc_shader_buffers {
1529   struct tc_call_base base;
1530   ubyte shader, start, count;
1531   bool unbind;
1532   unsigned writable_bitmask;
1533   struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1534};
1535
1536static uint16_t
1537tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1538{
1539   struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1540   unsigned count = p->count;
1541
1542   if (p->unbind) {
1543      pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1544      return call_size(tc_shader_buffers);
1545   }
1546
1547   pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1548                            p->writable_bitmask);
1549
1550   for (unsigned i = 0; i < count; i++)
1551      tc_drop_resource_reference(p->slot[i].buffer);
1552
1553   return p->base.num_slots;
1554}
1555
1556static void
1557tc_set_shader_buffers(struct pipe_context *_pipe,
1558                      enum pipe_shader_type shader,
1559                      unsigned start, unsigned count,
1560                      const struct pipe_shader_buffer *buffers,
1561                      unsigned writable_bitmask)
1562{
1563   if (!count)
1564      return;
1565
1566   struct threaded_context *tc = threaded_context(_pipe);
1567   struct tc_shader_buffers *p =
1568      tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1569                             buffers ? count : 0);
1570
1571   p->shader = shader;
1572   p->start = start;
1573   p->count = count;
1574   p->unbind = buffers == NULL;
1575   p->writable_bitmask = writable_bitmask;
1576
1577   if (buffers) {
1578      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1579
1580      for (unsigned i = 0; i < count; i++) {
1581         struct pipe_shader_buffer *dst = &p->slot[i];
1582         const struct pipe_shader_buffer *src = buffers + i;
1583
1584         tc_set_resource_reference(&dst->buffer, src->buffer);
1585         dst->buffer_offset = src->buffer_offset;
1586         dst->buffer_size = src->buffer_size;
1587
1588         if (src->buffer) {
1589            struct threaded_resource *tres = threaded_resource(src->buffer);
1590
1591            tc_bind_buffer(&tc->shader_buffers[shader][start + i], next, &tres->b);
1592
1593            if (writable_bitmask & BITFIELD_BIT(i)) {
1594               util_range_add(&tres->b, &tres->valid_buffer_range,
1595                              src->buffer_offset,
1596                              src->buffer_offset + src->buffer_size);
1597            }
1598         } else {
1599            tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1600         }
1601      }
1602      tc->seen_shader_buffers[shader] = true;
1603   } else {
1604      tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1605   }
1606
1607   tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1608   tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1609}
1610
1611struct tc_vertex_buffers {
1612   struct tc_call_base base;
1613   ubyte start, count;
1614   ubyte unbind_num_trailing_slots;
1615   struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1616};
1617
1618static uint16_t
1619tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1620{
1621   struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1622   unsigned count = p->count;
1623
1624   if (!count) {
1625      pipe->set_vertex_buffers(pipe, p->start, 0,
1626                               p->unbind_num_trailing_slots, false, NULL);
1627      return call_size(tc_vertex_buffers);
1628   }
1629
1630   for (unsigned i = 0; i < count; i++)
1631      tc_assert(!p->slot[i].is_user_buffer);
1632
1633   pipe->set_vertex_buffers(pipe, p->start, count,
1634                            p->unbind_num_trailing_slots, true, p->slot);
1635   return p->base.num_slots;
1636}
1637
1638static void
1639tc_set_vertex_buffers(struct pipe_context *_pipe,
1640                      unsigned start, unsigned count,
1641                      unsigned unbind_num_trailing_slots,
1642                      bool take_ownership,
1643                      const struct pipe_vertex_buffer *buffers)
1644{
1645   struct threaded_context *tc = threaded_context(_pipe);
1646
1647   if (!count && !unbind_num_trailing_slots)
1648      return;
1649
1650   if (count && buffers) {
1651      struct tc_vertex_buffers *p =
1652         tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1653      p->start = start;
1654      p->count = count;
1655      p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1656
1657      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1658
1659      if (take_ownership) {
1660         memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1661
1662         for (unsigned i = 0; i < count; i++) {
1663            struct pipe_resource *buf = buffers[i].buffer.resource;
1664
1665            if (buf) {
1666               tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1667            } else {
1668               tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1669            }
1670         }
1671      } else {
1672         for (unsigned i = 0; i < count; i++) {
1673            struct pipe_vertex_buffer *dst = &p->slot[i];
1674            const struct pipe_vertex_buffer *src = buffers + i;
1675            struct pipe_resource *buf = src->buffer.resource;
1676
1677            tc_assert(!src->is_user_buffer);
1678            dst->stride = src->stride;
1679            dst->is_user_buffer = false;
1680            tc_set_resource_reference(&dst->buffer.resource, buf);
1681            dst->buffer_offset = src->buffer_offset;
1682
1683            if (buf) {
1684               tc_bind_buffer(&tc->vertex_buffers[start + i], next, buf);
1685            } else {
1686               tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1687            }
1688         }
1689      }
1690
1691      tc_unbind_buffers(&tc->vertex_buffers[start + count],
1692                        unbind_num_trailing_slots);
1693   } else {
1694      struct tc_vertex_buffers *p =
1695         tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1696      p->start = start;
1697      p->count = 0;
1698      p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1699
1700      tc_unbind_buffers(&tc->vertex_buffers[start],
1701                        count + unbind_num_trailing_slots);
1702   }
1703}
1704
1705struct tc_stream_outputs {
1706   struct tc_call_base base;
1707   unsigned count;
1708   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1709   unsigned offsets[PIPE_MAX_SO_BUFFERS];
1710};
1711
1712static uint16_t
1713tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1714{
1715   struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1716   unsigned count = p->count;
1717
1718   pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1719   for (unsigned i = 0; i < count; i++)
1720      tc_drop_so_target_reference(p->targets[i]);
1721
1722   return call_size(tc_stream_outputs);
1723}
1724
1725static void
1726tc_set_stream_output_targets(struct pipe_context *_pipe,
1727                             unsigned count,
1728                             struct pipe_stream_output_target **tgs,
1729                             const unsigned *offsets)
1730{
1731   struct threaded_context *tc = threaded_context(_pipe);
1732   struct tc_stream_outputs *p =
1733      tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1734   struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1735
1736   for (unsigned i = 0; i < count; i++) {
1737      p->targets[i] = NULL;
1738      pipe_so_target_reference(&p->targets[i], tgs[i]);
1739      if (tgs[i]) {
1740         tc_bind_buffer(&tc->streamout_buffers[i], next, tgs[i]->buffer);
1741      } else {
1742         tc_unbind_buffer(&tc->streamout_buffers[i]);
1743      }
1744   }
1745   p->count = count;
1746   memcpy(p->offsets, offsets, count * sizeof(unsigned));
1747
1748   tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1749   if (count)
1750      tc->seen_streamout_buffers = true;
1751}
1752
1753static void
1754tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1755                         unsigned count, struct pipe_surface **resources)
1756{
1757   struct threaded_context *tc = threaded_context(_pipe);
1758   struct pipe_context *pipe = tc->pipe;
1759
1760   tc_sync(tc);
1761   pipe->set_compute_resources(pipe, start, count, resources);
1762}
1763
1764static void
1765tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1766                      unsigned count, struct pipe_resource **resources,
1767                      uint32_t **handles)
1768{
1769   struct threaded_context *tc = threaded_context(_pipe);
1770   struct pipe_context *pipe = tc->pipe;
1771
1772   tc_sync(tc);
1773   pipe->set_global_binding(pipe, first, count, resources, handles);
1774}
1775
1776
1777/********************************************************************
1778 * views
1779 */
1780
1781static struct pipe_surface *
1782tc_create_surface(struct pipe_context *_pipe,
1783                  struct pipe_resource *resource,
1784                  const struct pipe_surface *surf_tmpl)
1785{
1786   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1787   struct pipe_surface *view =
1788         pipe->create_surface(pipe, resource, surf_tmpl);
1789
1790   if (view)
1791      view->context = _pipe;
1792   return view;
1793}
1794
1795static void
1796tc_surface_destroy(struct pipe_context *_pipe,
1797                   struct pipe_surface *surf)
1798{
1799   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1800
1801   pipe->surface_destroy(pipe, surf);
1802}
1803
1804static struct pipe_sampler_view *
1805tc_create_sampler_view(struct pipe_context *_pipe,
1806                       struct pipe_resource *resource,
1807                       const struct pipe_sampler_view *templ)
1808{
1809   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1810   struct pipe_sampler_view *view =
1811         pipe->create_sampler_view(pipe, resource, templ);
1812
1813   if (view)
1814      view->context = _pipe;
1815   return view;
1816}
1817
1818static void
1819tc_sampler_view_destroy(struct pipe_context *_pipe,
1820                        struct pipe_sampler_view *view)
1821{
1822   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1823
1824   pipe->sampler_view_destroy(pipe, view);
1825}
1826
1827static struct pipe_stream_output_target *
1828tc_create_stream_output_target(struct pipe_context *_pipe,
1829                               struct pipe_resource *res,
1830                               unsigned buffer_offset,
1831                               unsigned buffer_size)
1832{
1833   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1834   struct threaded_resource *tres = threaded_resource(res);
1835   struct pipe_stream_output_target *view;
1836
1837   util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1838                  buffer_offset + buffer_size);
1839
1840   view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1841                                            buffer_size);
1842   if (view)
1843      view->context = _pipe;
1844   return view;
1845}
1846
1847static void
1848tc_stream_output_target_destroy(struct pipe_context *_pipe,
1849                                struct pipe_stream_output_target *target)
1850{
1851   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1852
1853   pipe->stream_output_target_destroy(pipe, target);
1854}
1855
1856
1857/********************************************************************
1858 * bindless
1859 */
1860
1861static uint64_t
1862tc_create_texture_handle(struct pipe_context *_pipe,
1863                         struct pipe_sampler_view *view,
1864                         const struct pipe_sampler_state *state)
1865{
1866   struct threaded_context *tc = threaded_context(_pipe);
1867   struct pipe_context *pipe = tc->pipe;
1868
1869   tc_sync(tc);
1870   return pipe->create_texture_handle(pipe, view, state);
1871}
1872
1873struct tc_make_texture_handle_resident {
1874   struct tc_call_base base;
1875   bool resident;
1876   uint64_t handle;
1877};
1878
1879static uint16_t
1880tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1881{
1882   struct tc_make_texture_handle_resident *p =
1883      to_call(call, tc_make_texture_handle_resident);
1884
1885   pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1886   return call_size(tc_make_texture_handle_resident);
1887}
1888
1889static void
1890tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1891                                bool resident)
1892{
1893   struct threaded_context *tc = threaded_context(_pipe);
1894   struct tc_make_texture_handle_resident *p =
1895      tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1896                  tc_make_texture_handle_resident);
1897
1898   p->handle = handle;
1899   p->resident = resident;
1900}
1901
1902static uint64_t
1903tc_create_image_handle(struct pipe_context *_pipe,
1904                       const struct pipe_image_view *image)
1905{
1906   struct threaded_context *tc = threaded_context(_pipe);
1907   struct pipe_context *pipe = tc->pipe;
1908
1909   tc_sync(tc);
1910   return pipe->create_image_handle(pipe, image);
1911}
1912
1913struct tc_make_image_handle_resident {
1914   struct tc_call_base base;
1915   bool resident;
1916   unsigned access;
1917   uint64_t handle;
1918};
1919
1920static uint16_t
1921tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1922{
1923   struct tc_make_image_handle_resident *p =
1924      to_call(call, tc_make_image_handle_resident);
1925
1926   pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1927   return call_size(tc_make_image_handle_resident);
1928}
1929
1930static void
1931tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1932                              unsigned access, bool resident)
1933{
1934   struct threaded_context *tc = threaded_context(_pipe);
1935   struct tc_make_image_handle_resident *p =
1936      tc_add_call(tc, TC_CALL_make_image_handle_resident,
1937                  tc_make_image_handle_resident);
1938
1939   p->handle = handle;
1940   p->access = access;
1941   p->resident = resident;
1942}
1943
1944
1945/********************************************************************
1946 * transfer
1947 */
1948
1949struct tc_replace_buffer_storage {
1950   struct tc_call_base base;
1951   uint16_t num_rebinds;
1952   uint32_t rebind_mask;
1953   uint32_t delete_buffer_id;
1954   struct pipe_resource *dst;
1955   struct pipe_resource *src;
1956   tc_replace_buffer_storage_func func;
1957};
1958
1959static uint16_t
1960tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
1961{
1962   struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
1963
1964   p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
1965
1966   tc_drop_resource_reference(p->dst);
1967   tc_drop_resource_reference(p->src);
1968   return call_size(tc_replace_buffer_storage);
1969}
1970
1971/* Return true if the buffer has been invalidated or is idle. */
1972static bool
1973tc_invalidate_buffer(struct threaded_context *tc,
1974                     struct threaded_resource *tbuf)
1975{
1976   if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
1977      /* It's idle, so invalidation would be a no-op, but we can still clear
1978       * the valid range because we are technically doing invalidation, but
1979       * skipping it because it's useless.
1980       *
1981       * If the buffer is bound for write, we can't invalidate the range.
1982       */
1983      if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
1984         util_range_set_empty(&tbuf->valid_buffer_range);
1985      return true;
1986   }
1987
1988   struct pipe_screen *screen = tc->base.screen;
1989   struct pipe_resource *new_buf;
1990
1991   /* Shared, pinned, and sparse buffers can't be reallocated. */
1992   if (tbuf->is_shared ||
1993       tbuf->is_user_ptr ||
1994       tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
1995      return false;
1996
1997   /* Allocate a new one. */
1998   new_buf = screen->resource_create(screen, &tbuf->b);
1999   if (!new_buf)
2000      return false;
2001
2002   /* Replace the "latest" pointer. */
2003   if (tbuf->latest != &tbuf->b)
2004      pipe_resource_reference(&tbuf->latest, NULL);
2005
2006   tbuf->latest = new_buf;
2007
2008   uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2009
2010   /* Enqueue storage replacement of the original buffer. */
2011   struct tc_replace_buffer_storage *p =
2012      tc_add_call(tc, TC_CALL_replace_buffer_storage,
2013                  tc_replace_buffer_storage);
2014
2015   p->func = tc->replace_buffer_storage;
2016   tc_set_resource_reference(&p->dst, &tbuf->b);
2017   tc_set_resource_reference(&p->src, new_buf);
2018   p->delete_buffer_id = delete_buffer_id;
2019   p->rebind_mask = 0;
2020
2021   /* Treat the current buffer as the new buffer. */
2022   bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2023   p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2024                                     threaded_resource(new_buf)->buffer_id_unique,
2025                                     &p->rebind_mask);
2026
2027   /* If the buffer is not bound for write, clear the valid range. */
2028   if (!bound_for_write)
2029      util_range_set_empty(&tbuf->valid_buffer_range);
2030
2031   tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2032   threaded_resource(new_buf)->buffer_id_unique = 0;
2033
2034   return true;
2035}
2036
2037static unsigned
2038tc_improve_map_buffer_flags(struct threaded_context *tc,
2039                            struct threaded_resource *tres, unsigned usage,
2040                            unsigned offset, unsigned size)
2041{
2042   /* Never invalidate inside the driver and never infer "unsynchronized". */
2043   unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2044                       TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2045
2046   /* Prevent a reentry. */
2047   if (usage & tc_flags)
2048      return usage;
2049
2050   /* Use the staging upload if it's preferred. */
2051   if (usage & (PIPE_MAP_DISCARD_RANGE |
2052                PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2053       !(usage & PIPE_MAP_PERSISTENT) &&
2054       tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2055       tc->use_forced_staging_uploads) {
2056      usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2057                 PIPE_MAP_UNSYNCHRONIZED);
2058
2059      return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2060   }
2061
2062   /* Sparse buffers can't be mapped directly and can't be reallocated
2063    * (fully invalidated). That may just be a radeonsi limitation, but
2064    * the threaded context must obey it with radeonsi.
2065    */
2066   if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
2067      /* We can use DISCARD_RANGE instead of full discard. This is the only
2068       * fast path for sparse buffers that doesn't need thread synchronization.
2069       */
2070      if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2071         usage |= PIPE_MAP_DISCARD_RANGE;
2072
2073      /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2074       * The threaded context doesn't do unsychronized mappings and invalida-
2075       * tions of sparse buffers, therefore a correct driver behavior won't
2076       * result in an incorrect behavior with the threaded context.
2077       */
2078      return usage;
2079   }
2080
2081   usage |= tc_flags;
2082
2083   /* Handle CPU reads trivially. */
2084   if (usage & PIPE_MAP_READ) {
2085      if (usage & PIPE_MAP_UNSYNCHRONIZED)
2086         usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2087
2088      /* Drivers aren't allowed to do buffer invalidations. */
2089      return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2090   }
2091
2092   /* See if the buffer range being mapped has never been initialized or
2093    * the buffer is idle, in which case it can be mapped unsynchronized. */
2094   if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2095       ((!tres->is_shared &&
2096         !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2097        !tc_is_buffer_busy(tc, tres, usage)))
2098      usage |= PIPE_MAP_UNSYNCHRONIZED;
2099
2100   if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2101      /* If discarding the entire range, discard the whole resource instead. */
2102      if (usage & PIPE_MAP_DISCARD_RANGE &&
2103          offset == 0 && size == tres->b.width0)
2104         usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2105
2106      /* Discard the whole resource if needed. */
2107      if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2108         if (tc_invalidate_buffer(tc, tres))
2109            usage |= PIPE_MAP_UNSYNCHRONIZED;
2110         else
2111            usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2112      }
2113   }
2114
2115   /* We won't need this flag anymore. */
2116   /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2117   usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2118
2119   /* GL_AMD_pinned_memory and persistent mappings can't use staging
2120    * buffers. */
2121   if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2122                PIPE_MAP_PERSISTENT) ||
2123       tres->is_user_ptr)
2124      usage &= ~PIPE_MAP_DISCARD_RANGE;
2125
2126   /* Unsychronized buffer mappings don't have to synchronize the thread. */
2127   if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2128      usage &= ~PIPE_MAP_DISCARD_RANGE;
2129      usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2130   }
2131
2132   return usage;
2133}
2134
2135static void *
2136tc_buffer_map(struct pipe_context *_pipe,
2137              struct pipe_resource *resource, unsigned level,
2138              unsigned usage, const struct pipe_box *box,
2139              struct pipe_transfer **transfer)
2140{
2141   struct threaded_context *tc = threaded_context(_pipe);
2142   struct threaded_resource *tres = threaded_resource(resource);
2143   struct pipe_context *pipe = tc->pipe;
2144
2145   usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2146
2147   /* Do a staging transfer within the threaded context. The driver should
2148    * only get resource_copy_region.
2149    */
2150   if (usage & PIPE_MAP_DISCARD_RANGE) {
2151      struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
2152      uint8_t *map;
2153
2154      ttrans->staging = NULL;
2155
2156      u_upload_alloc(tc->base.stream_uploader, 0,
2157                     box->width + (box->x % tc->map_buffer_alignment),
2158                     tc->map_buffer_alignment, &ttrans->b.offset,
2159                     &ttrans->staging, (void**)&map);
2160      if (!map) {
2161         slab_free(&tc->pool_transfers, ttrans);
2162         return NULL;
2163      }
2164
2165      ttrans->b.resource = resource;
2166      ttrans->b.level = 0;
2167      ttrans->b.usage = usage;
2168      ttrans->b.box = *box;
2169      ttrans->b.stride = 0;
2170      ttrans->b.layer_stride = 0;
2171      ttrans->valid_buffer_range = &tres->valid_buffer_range;
2172      *transfer = &ttrans->b;
2173
2174      p_atomic_inc(&tres->pending_staging_uploads);
2175      util_range_add(resource, &tres->pending_staging_uploads_range,
2176                     box->x, box->x + box->width);
2177
2178      return map + (box->x % tc->map_buffer_alignment);
2179   }
2180
2181   if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2182       p_atomic_read(&tres->pending_staging_uploads) &&
2183       util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2184      /* Write conflict detected between a staging transfer and the direct mapping we're
2185       * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2186       * will have to wait for the staging transfer completion.
2187       * Note: The conflict detection is only based on the mapped range, not on the actual
2188       * written range(s).
2189       */
2190      usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2191      tc->use_forced_staging_uploads = false;
2192   }
2193
2194   /* Unsychronized buffer mappings don't have to synchronize the thread. */
2195   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2196      tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2197                      usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2198      tc_set_driver_thread(tc);
2199   }
2200
2201   tc->bytes_mapped_estimate += box->width;
2202
2203   void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2204                                level, usage, box, transfer);
2205   threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2206
2207   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2208      tc_clear_driver_thread(tc);
2209
2210   return ret;
2211}
2212
2213static void *
2214tc_texture_map(struct pipe_context *_pipe,
2215               struct pipe_resource *resource, unsigned level,
2216               unsigned usage, const struct pipe_box *box,
2217               struct pipe_transfer **transfer)
2218{
2219   struct threaded_context *tc = threaded_context(_pipe);
2220   struct threaded_resource *tres = threaded_resource(resource);
2221   struct pipe_context *pipe = tc->pipe;
2222
2223   tc_sync_msg(tc, "texture");
2224   tc_set_driver_thread(tc);
2225
2226   tc->bytes_mapped_estimate += box->width;
2227
2228   void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2229                                 level, usage, box, transfer);
2230
2231   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2232      tc_clear_driver_thread(tc);
2233
2234   return ret;
2235}
2236
2237struct tc_transfer_flush_region {
2238   struct tc_call_base base;
2239   struct pipe_box box;
2240   struct pipe_transfer *transfer;
2241};
2242
2243static uint16_t
2244tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2245{
2246   struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2247
2248   pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2249   return call_size(tc_transfer_flush_region);
2250}
2251
2252struct tc_resource_copy_region {
2253   struct tc_call_base base;
2254   unsigned dst_level;
2255   unsigned dstx, dsty, dstz;
2256   unsigned src_level;
2257   struct pipe_box src_box;
2258   struct pipe_resource *dst;
2259   struct pipe_resource *src;
2260};
2261
2262static void
2263tc_resource_copy_region(struct pipe_context *_pipe,
2264                        struct pipe_resource *dst, unsigned dst_level,
2265                        unsigned dstx, unsigned dsty, unsigned dstz,
2266                        struct pipe_resource *src, unsigned src_level,
2267                        const struct pipe_box *src_box);
2268
2269static void
2270tc_buffer_do_flush_region(struct threaded_context *tc,
2271                          struct threaded_transfer *ttrans,
2272                          const struct pipe_box *box)
2273{
2274   struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2275
2276   if (ttrans->staging) {
2277      struct pipe_box src_box;
2278
2279      u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2280               (box->x - ttrans->b.box.x),
2281               box->width, &src_box);
2282
2283      /* Copy the staging buffer into the original one. */
2284      tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2285                              ttrans->staging, 0, &src_box);
2286   }
2287
2288   util_range_add(&tres->b, ttrans->valid_buffer_range,
2289                  box->x, box->x + box->width);
2290}
2291
2292static void
2293tc_transfer_flush_region(struct pipe_context *_pipe,
2294                         struct pipe_transfer *transfer,
2295                         const struct pipe_box *rel_box)
2296{
2297   struct threaded_context *tc = threaded_context(_pipe);
2298   struct threaded_transfer *ttrans = threaded_transfer(transfer);
2299   struct threaded_resource *tres = threaded_resource(transfer->resource);
2300   unsigned required_usage = PIPE_MAP_WRITE |
2301                             PIPE_MAP_FLUSH_EXPLICIT;
2302
2303   if (tres->b.target == PIPE_BUFFER) {
2304      if ((transfer->usage & required_usage) == required_usage) {
2305         struct pipe_box box;
2306
2307         u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2308         tc_buffer_do_flush_region(tc, ttrans, &box);
2309      }
2310
2311      /* Staging transfers don't send the call to the driver. */
2312      if (ttrans->staging)
2313         return;
2314   }
2315
2316   struct tc_transfer_flush_region *p =
2317      tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2318   p->transfer = transfer;
2319   p->box = *rel_box;
2320}
2321
2322static void
2323tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2324         unsigned flags);
2325
2326struct tc_buffer_unmap {
2327   struct tc_call_base base;
2328   bool was_staging_transfer;
2329   union {
2330      struct pipe_transfer *transfer;
2331      struct pipe_resource *resource;
2332   };
2333};
2334
2335static uint16_t
2336tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2337{
2338   struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2339
2340   if (p->was_staging_transfer) {
2341      struct threaded_resource *tres = threaded_resource(p->resource);
2342      /* Nothing to do except keeping track of staging uploads */
2343      assert(tres->pending_staging_uploads > 0);
2344      p_atomic_dec(&tres->pending_staging_uploads);
2345      tc_drop_resource_reference(p->resource);
2346   } else {
2347      pipe->buffer_unmap(pipe, p->transfer);
2348   }
2349
2350   return call_size(tc_buffer_unmap);
2351}
2352
2353static void
2354tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2355{
2356   struct threaded_context *tc = threaded_context(_pipe);
2357   struct threaded_transfer *ttrans = threaded_transfer(transfer);
2358   struct threaded_resource *tres = threaded_resource(transfer->resource);
2359
2360   /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2361    * called from any thread and bypasses all multithreaded queues.
2362    */
2363   if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2364      assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2365      assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2366                                  PIPE_MAP_DISCARD_RANGE)));
2367
2368      struct pipe_context *pipe = tc->pipe;
2369      util_range_add(&tres->b, ttrans->valid_buffer_range,
2370                      transfer->box.x, transfer->box.x + transfer->box.width);
2371
2372      pipe->buffer_unmap(pipe, transfer);
2373      return;
2374   }
2375
2376   bool was_staging_transfer = false;
2377
2378   if (transfer->usage & PIPE_MAP_WRITE &&
2379       !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2380      tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2381
2382   if (ttrans->staging) {
2383      was_staging_transfer = true;
2384
2385      tc_drop_resource_reference(ttrans->staging);
2386      slab_free(&tc->pool_transfers, ttrans);
2387   }
2388
2389   struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2390                                           tc_buffer_unmap);
2391   if (was_staging_transfer) {
2392      tc_set_resource_reference(&p->resource, &tres->b);
2393      p->was_staging_transfer = true;
2394   } else {
2395      p->transfer = transfer;
2396      p->was_staging_transfer = false;
2397   }
2398
2399   /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2400    * defers the unmap operation to the batch execution.
2401    * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2402    * and if it goes over an optional limit the current batch is flushed,
2403    * to reclaim some RAM. */
2404   if (!ttrans->staging && tc->bytes_mapped_limit &&
2405       tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2406      tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2407   }
2408}
2409
2410struct tc_texture_unmap {
2411   struct tc_call_base base;
2412   struct pipe_transfer *transfer;
2413};
2414
2415static uint16_t
2416tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2417{
2418   struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2419
2420   pipe->texture_unmap(pipe, p->transfer);
2421   return call_size(tc_texture_unmap);
2422}
2423
2424static void
2425tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2426{
2427   struct threaded_context *tc = threaded_context(_pipe);
2428   struct threaded_transfer *ttrans = threaded_transfer(transfer);
2429
2430   tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2431
2432   /* tc_texture_map directly maps the textures, but tc_texture_unmap
2433    * defers the unmap operation to the batch execution.
2434    * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2435    * and if it goes over an optional limit the current batch is flushed,
2436    * to reclaim some RAM. */
2437   if (!ttrans->staging && tc->bytes_mapped_limit &&
2438       tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2439      tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2440   }
2441}
2442
2443struct tc_buffer_subdata {
2444   struct tc_call_base base;
2445   unsigned usage, offset, size;
2446   struct pipe_resource *resource;
2447   char slot[0]; /* more will be allocated if needed */
2448};
2449
2450static uint16_t
2451tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2452{
2453   struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2454
2455   pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2456                        p->slot);
2457   tc_drop_resource_reference(p->resource);
2458   return p->base.num_slots;
2459}
2460
2461static void
2462tc_buffer_subdata(struct pipe_context *_pipe,
2463                  struct pipe_resource *resource,
2464                  unsigned usage, unsigned offset,
2465                  unsigned size, const void *data)
2466{
2467   struct threaded_context *tc = threaded_context(_pipe);
2468   struct threaded_resource *tres = threaded_resource(resource);
2469
2470   if (!size)
2471      return;
2472
2473   usage |= PIPE_MAP_WRITE;
2474
2475   /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2476   if (!(usage & PIPE_MAP_DIRECTLY))
2477      usage |= PIPE_MAP_DISCARD_RANGE;
2478
2479   usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2480
2481   /* Unsychronized and big transfers should use transfer_map. Also handle
2482    * full invalidations, because drivers aren't allowed to do them.
2483    */
2484   if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2485                PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2486       size > TC_MAX_SUBDATA_BYTES) {
2487      struct pipe_transfer *transfer;
2488      struct pipe_box box;
2489      uint8_t *map = NULL;
2490
2491      u_box_1d(offset, size, &box);
2492
2493      map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2494      if (map) {
2495         memcpy(map, data, size);
2496         tc_buffer_unmap(_pipe, transfer);
2497      }
2498      return;
2499   }
2500
2501   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2502
2503   /* The upload is small. Enqueue it. */
2504   struct tc_buffer_subdata *p =
2505      tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2506
2507   tc_set_resource_reference(&p->resource, resource);
2508   /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2509    * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2510    */
2511   tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], resource);
2512   p->usage = usage;
2513   p->offset = offset;
2514   p->size = size;
2515   memcpy(p->slot, data, size);
2516}
2517
2518struct tc_texture_subdata {
2519   struct tc_call_base base;
2520   unsigned level, usage, stride, layer_stride;
2521   struct pipe_box box;
2522   struct pipe_resource *resource;
2523   char slot[0]; /* more will be allocated if needed */
2524};
2525
2526static uint16_t
2527tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2528{
2529   struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2530
2531   pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2532                         p->slot, p->stride, p->layer_stride);
2533   tc_drop_resource_reference(p->resource);
2534   return p->base.num_slots;
2535}
2536
2537static void
2538tc_texture_subdata(struct pipe_context *_pipe,
2539                   struct pipe_resource *resource,
2540                   unsigned level, unsigned usage,
2541                   const struct pipe_box *box,
2542                   const void *data, unsigned stride,
2543                   unsigned layer_stride)
2544{
2545   struct threaded_context *tc = threaded_context(_pipe);
2546   unsigned size;
2547
2548   assert(box->height >= 1);
2549   assert(box->depth >= 1);
2550
2551   size = (box->depth - 1) * layer_stride +
2552          (box->height - 1) * stride +
2553          box->width * util_format_get_blocksize(resource->format);
2554   if (!size)
2555      return;
2556
2557   /* Small uploads can be enqueued, big uploads must sync. */
2558   if (size <= TC_MAX_SUBDATA_BYTES) {
2559      struct tc_texture_subdata *p =
2560         tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2561
2562      tc_set_resource_reference(&p->resource, resource);
2563      p->level = level;
2564      p->usage = usage;
2565      p->box = *box;
2566      p->stride = stride;
2567      p->layer_stride = layer_stride;
2568      memcpy(p->slot, data, size);
2569   } else {
2570      struct pipe_context *pipe = tc->pipe;
2571
2572      tc_sync(tc);
2573      tc_set_driver_thread(tc);
2574      pipe->texture_subdata(pipe, resource, level, usage, box, data,
2575                            stride, layer_stride);
2576      tc_clear_driver_thread(tc);
2577   }
2578}
2579
2580
2581/********************************************************************
2582 * miscellaneous
2583 */
2584
2585#define TC_FUNC_SYNC_RET0(ret_type, func) \
2586   static ret_type \
2587   tc_##func(struct pipe_context *_pipe) \
2588   { \
2589      struct threaded_context *tc = threaded_context(_pipe); \
2590      struct pipe_context *pipe = tc->pipe; \
2591      tc_sync(tc); \
2592      return pipe->func(pipe); \
2593   }
2594
2595TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2596
2597static void
2598tc_get_sample_position(struct pipe_context *_pipe,
2599                       unsigned sample_count, unsigned sample_index,
2600                       float *out_value)
2601{
2602   struct threaded_context *tc = threaded_context(_pipe);
2603   struct pipe_context *pipe = tc->pipe;
2604
2605   tc_sync(tc);
2606   pipe->get_sample_position(pipe, sample_count, sample_index,
2607                             out_value);
2608}
2609
2610static enum pipe_reset_status
2611tc_get_device_reset_status(struct pipe_context *_pipe)
2612{
2613   struct threaded_context *tc = threaded_context(_pipe);
2614   struct pipe_context *pipe = tc->pipe;
2615
2616   if (!tc->options.unsynchronized_get_device_reset_status)
2617      tc_sync(tc);
2618
2619   return pipe->get_device_reset_status(pipe);
2620}
2621
2622static void
2623tc_set_device_reset_callback(struct pipe_context *_pipe,
2624                             const struct pipe_device_reset_callback *cb)
2625{
2626   struct threaded_context *tc = threaded_context(_pipe);
2627   struct pipe_context *pipe = tc->pipe;
2628
2629   tc_sync(tc);
2630   pipe->set_device_reset_callback(pipe, cb);
2631}
2632
2633struct tc_string_marker {
2634   struct tc_call_base base;
2635   int len;
2636   char slot[0]; /* more will be allocated if needed */
2637};
2638
2639static uint16_t
2640tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2641{
2642   struct tc_string_marker *p = (struct tc_string_marker *)call;
2643   pipe->emit_string_marker(pipe, p->slot, p->len);
2644   return p->base.num_slots;
2645}
2646
2647static void
2648tc_emit_string_marker(struct pipe_context *_pipe,
2649                      const char *string, int len)
2650{
2651   struct threaded_context *tc = threaded_context(_pipe);
2652
2653   if (len <= TC_MAX_STRING_MARKER_BYTES) {
2654      struct tc_string_marker *p =
2655         tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2656
2657      memcpy(p->slot, string, len);
2658      p->len = len;
2659   } else {
2660      struct pipe_context *pipe = tc->pipe;
2661
2662      tc_sync(tc);
2663      tc_set_driver_thread(tc);
2664      pipe->emit_string_marker(pipe, string, len);
2665      tc_clear_driver_thread(tc);
2666   }
2667}
2668
2669static void
2670tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2671                    unsigned flags)
2672{
2673   struct threaded_context *tc = threaded_context(_pipe);
2674   struct pipe_context *pipe = tc->pipe;
2675
2676   tc_sync(tc);
2677   pipe->dump_debug_state(pipe, stream, flags);
2678}
2679
2680static void
2681tc_set_debug_callback(struct pipe_context *_pipe,
2682                      const struct pipe_debug_callback *cb)
2683{
2684   struct threaded_context *tc = threaded_context(_pipe);
2685   struct pipe_context *pipe = tc->pipe;
2686
2687   /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2688    * with this. shader-db will use an environment variable to disable
2689    * the threaded context.
2690    */
2691   if (cb && cb->debug_message && !cb->async)
2692      return;
2693
2694   tc_sync(tc);
2695   pipe->set_debug_callback(pipe, cb);
2696}
2697
2698static void
2699tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2700{
2701   struct threaded_context *tc = threaded_context(_pipe);
2702   struct pipe_context *pipe = tc->pipe;
2703
2704   tc_sync(tc);
2705   pipe->set_log_context(pipe, log);
2706}
2707
2708static void
2709tc_create_fence_fd(struct pipe_context *_pipe,
2710                   struct pipe_fence_handle **fence, int fd,
2711                   enum pipe_fd_type type)
2712{
2713   struct threaded_context *tc = threaded_context(_pipe);
2714   struct pipe_context *pipe = tc->pipe;
2715
2716   tc_sync(tc);
2717   pipe->create_fence_fd(pipe, fence, fd, type);
2718}
2719
2720struct tc_fence_call {
2721   struct tc_call_base base;
2722   struct pipe_fence_handle *fence;
2723};
2724
2725static uint16_t
2726tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2727{
2728   struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2729
2730   pipe->fence_server_sync(pipe, fence);
2731   pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2732   return call_size(tc_fence_call);
2733}
2734
2735static void
2736tc_fence_server_sync(struct pipe_context *_pipe,
2737                     struct pipe_fence_handle *fence)
2738{
2739   struct threaded_context *tc = threaded_context(_pipe);
2740   struct pipe_screen *screen = tc->pipe->screen;
2741   struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2742                                            tc_fence_call);
2743
2744   call->fence = NULL;
2745   screen->fence_reference(screen, &call->fence, fence);
2746}
2747
2748static uint16_t
2749tc_call_fence_server_signal(struct pipe_context *pipe, void *call, uint64_t *last)
2750{
2751   struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2752
2753   pipe->fence_server_signal(pipe, fence);
2754   pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2755   return call_size(tc_fence_call);
2756}
2757
2758static void
2759tc_fence_server_signal(struct pipe_context *_pipe,
2760                           struct pipe_fence_handle *fence)
2761{
2762   struct threaded_context *tc = threaded_context(_pipe);
2763   struct pipe_screen *screen = tc->pipe->screen;
2764   struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_signal,
2765                                            tc_fence_call);
2766
2767   call->fence = NULL;
2768   screen->fence_reference(screen, &call->fence, fence);
2769}
2770
2771static struct pipe_video_codec *
2772tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2773                      UNUSED const struct pipe_video_codec *templ)
2774{
2775   unreachable("Threaded context should not be enabled for video APIs");
2776   return NULL;
2777}
2778
2779static struct pipe_video_buffer *
2780tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2781                       UNUSED const struct pipe_video_buffer *templ)
2782{
2783   unreachable("Threaded context should not be enabled for video APIs");
2784   return NULL;
2785}
2786
2787struct tc_context_param {
2788   struct tc_call_base base;
2789   enum pipe_context_param param;
2790   unsigned value;
2791};
2792
2793static uint16_t
2794tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2795{
2796   struct tc_context_param *p = to_call(call, tc_context_param);
2797
2798   if (pipe->set_context_param)
2799      pipe->set_context_param(pipe, p->param, p->value);
2800
2801   return call_size(tc_context_param);
2802}
2803
2804static void
2805tc_set_context_param(struct pipe_context *_pipe,
2806                           enum pipe_context_param param,
2807                           unsigned value)
2808{
2809   struct threaded_context *tc = threaded_context(_pipe);
2810
2811   if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2812      /* Pin the gallium thread as requested. */
2813      util_set_thread_affinity(tc->queue.threads[0],
2814                               util_get_cpu_caps()->L3_affinity_mask[value],
2815                               NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2816
2817      /* Execute this immediately (without enqueuing).
2818       * It's required to be thread-safe.
2819       */
2820      struct pipe_context *pipe = tc->pipe;
2821      if (pipe->set_context_param)
2822         pipe->set_context_param(pipe, param, value);
2823      return;
2824   }
2825
2826   if (tc->pipe->set_context_param) {
2827      struct tc_context_param *call =
2828         tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2829
2830      call->param = param;
2831      call->value = value;
2832   }
2833}
2834
2835
2836/********************************************************************
2837 * draw, launch, clear, blit, copy, flush
2838 */
2839
2840struct tc_flush_call {
2841   struct tc_call_base base;
2842   unsigned flags;
2843   struct threaded_context *tc;
2844   struct pipe_fence_handle *fence;
2845};
2846
2847static void
2848tc_flush_queries(struct threaded_context *tc)
2849{
2850   struct threaded_query *tq, *tmp;
2851   LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
2852      list_del(&tq->head_unflushed);
2853
2854      /* Memory release semantics: due to a possible race with
2855       * tc_get_query_result, we must ensure that the linked list changes
2856       * are visible before setting tq->flushed.
2857       */
2858      p_atomic_set(&tq->flushed, true);
2859   }
2860}
2861
2862static uint16_t
2863tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
2864{
2865   struct tc_flush_call *p = to_call(call, tc_flush_call);
2866   struct pipe_screen *screen = pipe->screen;
2867
2868   pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
2869   screen->fence_reference(screen, &p->fence, NULL);
2870
2871   if (!(p->flags & PIPE_FLUSH_DEFERRED))
2872      tc_flush_queries(p->tc);
2873
2874   return call_size(tc_flush_call);
2875}
2876
2877static void
2878tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2879         unsigned flags)
2880{
2881   struct threaded_context *tc = threaded_context(_pipe);
2882   struct pipe_context *pipe = tc->pipe;
2883   struct pipe_screen *screen = pipe->screen;
2884   bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
2885
2886   if (async && tc->options.create_fence) {
2887      if (fence) {
2888         struct tc_batch *next = &tc->batch_slots[tc->next];
2889
2890         if (!next->token) {
2891            next->token = malloc(sizeof(*next->token));
2892            if (!next->token)
2893               goto out_of_memory;
2894
2895            pipe_reference_init(&next->token->ref, 1);
2896            next->token->tc = tc;
2897         }
2898
2899         screen->fence_reference(screen, fence,
2900                                 tc->options.create_fence(pipe, next->token));
2901         if (!*fence)
2902            goto out_of_memory;
2903      }
2904
2905      struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
2906      p->tc = tc;
2907      p->fence = fence ? *fence : NULL;
2908      p->flags = flags | TC_FLUSH_ASYNC;
2909
2910      if (!(flags & PIPE_FLUSH_DEFERRED))
2911         tc_batch_flush(tc);
2912      return;
2913   }
2914
2915out_of_memory:
2916   tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
2917                   flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
2918
2919   if (!(flags & PIPE_FLUSH_DEFERRED))
2920      tc_flush_queries(tc);
2921   tc_set_driver_thread(tc);
2922   pipe->flush(pipe, fence, flags);
2923   tc_clear_driver_thread(tc);
2924}
2925
2926struct tc_draw_single {
2927   struct tc_call_base base;
2928   unsigned index_bias;
2929   struct pipe_draw_info info;
2930};
2931
2932struct tc_draw_single_drawid {
2933   struct tc_draw_single base;
2934   unsigned drawid_offset;
2935};
2936
2937static uint16_t
2938tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
2939{
2940   struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
2941   struct tc_draw_single *info = &info_drawid->base;
2942
2943   /* u_threaded_context stores start/count in min/max_index for single draws. */
2944   /* Drivers using u_threaded_context shouldn't use min/max_index. */
2945   struct pipe_draw_start_count_bias draw;
2946
2947   draw.start = info->info.min_index;
2948   draw.count = info->info.max_index;
2949   draw.index_bias = info->index_bias;
2950
2951   info->info.index_bounds_valid = false;
2952   info->info.has_user_indices = false;
2953   info->info.take_index_buffer_ownership = false;
2954
2955   pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
2956   if (info->info.index_size)
2957      tc_drop_resource_reference(info->info.index.resource);
2958
2959   return call_size(tc_draw_single_drawid);
2960}
2961
2962static void
2963simplify_draw_info(struct pipe_draw_info *info)
2964{
2965   /* Clear these fields to facilitate draw merging.
2966    * Drivers shouldn't use them.
2967    */
2968   info->has_user_indices = false;
2969   info->index_bounds_valid = false;
2970   info->take_index_buffer_ownership = false;
2971   info->index_bias_varies = false;
2972   info->_pad = 0;
2973
2974   /* This shouldn't be set when merging single draws. */
2975   info->increment_draw_id = false;
2976
2977   if (info->index_size) {
2978      if (!info->primitive_restart)
2979         info->restart_index = 0;
2980   } else {
2981      assert(!info->primitive_restart);
2982      info->primitive_restart = false;
2983      info->restart_index = 0;
2984      info->index.resource = NULL;
2985   }
2986}
2987
2988static bool
2989is_next_call_a_mergeable_draw(struct tc_draw_single *first,
2990                              struct tc_draw_single *next)
2991{
2992   if (next->base.call_id != TC_CALL_draw_single)
2993      return false;
2994
2995   simplify_draw_info(&next->info);
2996
2997   STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
2998                 sizeof(struct pipe_draw_info) - 8);
2999   STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3000                 sizeof(struct pipe_draw_info) - 4);
3001   /* All fields must be the same except start and count. */
3002   /* u_threaded_context stores start/count in min/max_index for single draws. */
3003   return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3004                 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3005}
3006
3007static uint16_t
3008tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3009{
3010   /* Draw call merging. */
3011   struct tc_draw_single *first = to_call(call, tc_draw_single);
3012   struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3013   struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3014
3015   /* If at least 2 consecutive draw calls can be merged... */
3016   if (next != last &&
3017       next->base.call_id == TC_CALL_draw_single) {
3018      simplify_draw_info(&first->info);
3019
3020      if (is_next_call_a_mergeable_draw(first, next)) {
3021         /* The maximum number of merged draws is given by the batch size. */
3022         struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3023         unsigned num_draws = 2;
3024         bool index_bias_varies = first->index_bias != next->index_bias;
3025
3026         /* u_threaded_context stores start/count in min/max_index for single draws. */
3027         multi[0].start = first->info.min_index;
3028         multi[0].count = first->info.max_index;
3029         multi[0].index_bias = first->index_bias;
3030         multi[1].start = next->info.min_index;
3031         multi[1].count = next->info.max_index;
3032         multi[1].index_bias = next->index_bias;
3033
3034         /* Find how many other draws can be merged. */
3035         next = get_next_call(next, tc_draw_single);
3036         for (; next != last && is_next_call_a_mergeable_draw(first, next);
3037              next = get_next_call(next, tc_draw_single), num_draws++) {
3038            /* u_threaded_context stores start/count in min/max_index for single draws. */
3039            multi[num_draws].start = next->info.min_index;
3040            multi[num_draws].count = next->info.max_index;
3041            multi[num_draws].index_bias = next->index_bias;
3042            index_bias_varies |= first->index_bias != next->index_bias;
3043         }
3044
3045         first->info.index_bias_varies = index_bias_varies;
3046         pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3047
3048         /* Since all draws use the same index buffer, drop all references at once. */
3049         if (first->info.index_size)
3050            pipe_drop_resource_references(first->info.index.resource, num_draws);
3051
3052         return call_size(tc_draw_single) * num_draws;
3053      }
3054   }
3055
3056   /* u_threaded_context stores start/count in min/max_index for single draws. */
3057   /* Drivers using u_threaded_context shouldn't use min/max_index. */
3058   struct pipe_draw_start_count_bias draw;
3059
3060   draw.start = first->info.min_index;
3061   draw.count = first->info.max_index;
3062   draw.index_bias = first->index_bias;
3063
3064   first->info.index_bounds_valid = false;
3065   first->info.has_user_indices = false;
3066   first->info.take_index_buffer_ownership = false;
3067
3068   pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3069   if (first->info.index_size)
3070      tc_drop_resource_reference(first->info.index.resource);
3071
3072   return call_size(tc_draw_single);
3073}
3074
3075struct tc_draw_indirect {
3076   struct tc_call_base base;
3077   struct pipe_draw_start_count_bias draw;
3078   struct pipe_draw_info info;
3079   struct pipe_draw_indirect_info indirect;
3080};
3081
3082static uint16_t
3083tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3084{
3085   struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3086
3087   info->info.index_bounds_valid = false;
3088   info->info.take_index_buffer_ownership = false;
3089
3090   pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3091   if (info->info.index_size)
3092      tc_drop_resource_reference(info->info.index.resource);
3093
3094   tc_drop_resource_reference(info->indirect.buffer);
3095   tc_drop_resource_reference(info->indirect.indirect_draw_count);
3096   tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3097   return call_size(tc_draw_indirect);
3098}
3099
3100struct tc_draw_multi {
3101   struct tc_call_base base;
3102   unsigned num_draws;
3103   struct pipe_draw_info info;
3104   struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3105};
3106
3107static uint16_t
3108tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3109{
3110   struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3111
3112   info->info.has_user_indices = false;
3113   info->info.index_bounds_valid = false;
3114   info->info.take_index_buffer_ownership = false;
3115
3116   pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3117   if (info->info.index_size)
3118      tc_drop_resource_reference(info->info.index.resource);
3119
3120   return info->base.num_slots;
3121}
3122
3123#define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3124   offsetof(struct pipe_draw_info, index)
3125
3126void
3127tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3128            unsigned drawid_offset,
3129            const struct pipe_draw_indirect_info *indirect,
3130            const struct pipe_draw_start_count_bias *draws,
3131            unsigned num_draws)
3132{
3133   STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3134                 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3135
3136   struct threaded_context *tc = threaded_context(_pipe);
3137   unsigned index_size = info->index_size;
3138   bool has_user_indices = info->has_user_indices;
3139
3140   if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3141      tc_add_all_gfx_bindings_to_buffer_list(tc);
3142
3143   if (unlikely(indirect)) {
3144      assert(!has_user_indices);
3145      assert(num_draws == 1);
3146
3147      struct tc_draw_indirect *p =
3148         tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3149      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3150
3151      if (index_size) {
3152         if (!info->take_index_buffer_ownership) {
3153            tc_set_resource_reference(&p->info.index.resource,
3154                                      info->index.resource);
3155         }
3156         tc_add_to_buffer_list(next, info->index.resource);
3157      }
3158      memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3159
3160      tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3161      tc_set_resource_reference(&p->indirect.indirect_draw_count,
3162                                indirect->indirect_draw_count);
3163      p->indirect.count_from_stream_output = NULL;
3164      pipe_so_target_reference(&p->indirect.count_from_stream_output,
3165                               indirect->count_from_stream_output);
3166
3167      if (indirect->buffer)
3168         tc_add_to_buffer_list(next, indirect->buffer);
3169      if (indirect->indirect_draw_count)
3170         tc_add_to_buffer_list(next, indirect->indirect_draw_count);
3171      if (indirect->count_from_stream_output)
3172         tc_add_to_buffer_list(next, indirect->count_from_stream_output->buffer);
3173
3174      memcpy(&p->indirect, indirect, sizeof(*indirect));
3175      p->draw.start = draws[0].start;
3176      return;
3177   }
3178
3179   if (num_draws == 1) {
3180      /* Single draw. */
3181      if (index_size && has_user_indices) {
3182         unsigned size = draws[0].count * index_size;
3183         struct pipe_resource *buffer = NULL;
3184         unsigned offset;
3185
3186         if (!size)
3187            return;
3188
3189         /* This must be done before adding draw_vbo, because it could generate
3190          * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3191          * to the driver if it was done afterwards.
3192          */
3193         u_upload_data(tc->base.stream_uploader, 0, size, 4,
3194                       (uint8_t*)info->index.user + draws[0].start * index_size,
3195                       &offset, &buffer);
3196         if (unlikely(!buffer))
3197            return;
3198
3199         struct tc_draw_single *p = drawid_offset > 0 ?
3200            &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3201            tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3202         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3203         p->info.index.resource = buffer;
3204         if (drawid_offset > 0)
3205            ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3206         /* u_threaded_context stores start/count in min/max_index for single draws. */
3207         p->info.min_index = offset >> util_logbase2(index_size);
3208         p->info.max_index = draws[0].count;
3209         p->index_bias = draws[0].index_bias;
3210      } else {
3211         /* Non-indexed call or indexed with a real index buffer. */
3212         struct tc_draw_single *p = drawid_offset > 0 ?
3213            &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3214            tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3215         if (index_size) {
3216            if (!info->take_index_buffer_ownership) {
3217               tc_set_resource_reference(&p->info.index.resource,
3218                                         info->index.resource);
3219            }
3220            tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3221         }
3222         if (drawid_offset > 0)
3223            ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3224         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3225         /* u_threaded_context stores start/count in min/max_index for single draws. */
3226         p->info.min_index = draws[0].start;
3227         p->info.max_index = draws[0].count;
3228         p->index_bias = draws[0].index_bias;
3229      }
3230      return;
3231   }
3232
3233   const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3234   const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3235   const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3236                                               sizeof(struct tc_call_base));
3237   /* Multi draw. */
3238   if (index_size && has_user_indices) {
3239      struct pipe_resource *buffer = NULL;
3240      unsigned buffer_offset, total_count = 0;
3241      unsigned index_size_shift = util_logbase2(index_size);
3242      uint8_t *ptr = NULL;
3243
3244      /* Get the total count. */
3245      for (unsigned i = 0; i < num_draws; i++)
3246         total_count += draws[i].count;
3247
3248      if (!total_count)
3249         return;
3250
3251      /* Allocate space for all index buffers.
3252       *
3253       * This must be done before adding draw_vbo, because it could generate
3254       * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3255       * to the driver if it was done afterwards.
3256       */
3257      u_upload_alloc(tc->base.stream_uploader, 0,
3258                     total_count << index_size_shift, 4,
3259                     &buffer_offset, &buffer, (void**)&ptr);
3260      if (unlikely(!buffer))
3261         return;
3262
3263      int total_offset = 0;
3264      while (num_draws) {
3265         struct tc_batch *next = &tc->batch_slots[tc->next];
3266
3267         int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3268         /* If there isn't enough place for one draw, try to fill the next one */
3269         if (nb_slots_left < slots_for_one_draw)
3270            nb_slots_left = TC_SLOTS_PER_BATCH;
3271         const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3272
3273         /* How many draws can we fit in the current batch */
3274         const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3275
3276         struct tc_draw_multi *p =
3277            tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3278                                   dr);
3279         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3280         p->info.index.resource = buffer;
3281         p->num_draws = dr;
3282
3283         /* Upload index buffers. */
3284         for (unsigned i = 0, offset = 0; i < dr; i++) {
3285            unsigned count = draws[i + total_offset].count;
3286
3287            if (!count) {
3288               p->slot[i].start = 0;
3289               p->slot[i].count = 0;
3290               p->slot[i].index_bias = 0;
3291               continue;
3292            }
3293
3294            unsigned size = count << index_size_shift;
3295            memcpy(ptr + offset,
3296                   (uint8_t*)info->index.user +
3297                   (draws[i + total_offset].start << index_size_shift), size);
3298            p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3299            p->slot[i].count = count;
3300            p->slot[i].index_bias = draws[i + total_offset].index_bias;
3301            offset += size;
3302         }
3303
3304         total_offset += dr;
3305         num_draws -= dr;
3306      }
3307   } else {
3308      int total_offset = 0;
3309      bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3310      while (num_draws) {
3311         struct tc_batch *next = &tc->batch_slots[tc->next];
3312
3313         int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3314         /* If there isn't enough place for one draw, try to fill the next one */
3315         if (nb_slots_left < slots_for_one_draw)
3316            nb_slots_left = TC_SLOTS_PER_BATCH;
3317         const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3318
3319         /* How many draws can we fit in the current batch */
3320         const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3321
3322         /* Non-indexed call or indexed with a real index buffer. */
3323         struct tc_draw_multi *p =
3324            tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3325                                   dr);
3326         if (index_size) {
3327            if (!take_index_buffer_ownership) {
3328               tc_set_resource_reference(&p->info.index.resource,
3329                                         info->index.resource);
3330            }
3331            tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->index.resource);
3332         }
3333         take_index_buffer_ownership = false;
3334         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3335         p->num_draws = dr;
3336         memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3337         num_draws -= dr;
3338
3339         total_offset += dr;
3340      }
3341   }
3342}
3343
3344struct tc_draw_vstate_single {
3345   struct tc_call_base base;
3346   struct pipe_draw_start_count_bias draw;
3347
3348   /* The following states must be together without holes because they are
3349    * compared by draw merging.
3350    */
3351   struct pipe_vertex_state *state;
3352   uint32_t partial_velem_mask;
3353   struct pipe_draw_vertex_state_info info;
3354};
3355
3356static bool
3357is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3358                                     struct tc_draw_vstate_single *next)
3359{
3360   if (next->base.call_id != TC_CALL_draw_vstate_single)
3361      return false;
3362
3363   return !memcmp(&first->state, &next->state,
3364                  offsetof(struct tc_draw_vstate_single, info) +
3365                  sizeof(struct pipe_draw_vertex_state_info) -
3366                  offsetof(struct tc_draw_vstate_single, state));
3367}
3368
3369static uint16_t
3370tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3371{
3372   /* Draw call merging. */
3373   struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3374   struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3375   struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3376
3377   /* If at least 2 consecutive draw calls can be merged... */
3378   if (next != last &&
3379       is_next_call_a_mergeable_draw_vstate(first, next)) {
3380      /* The maximum number of merged draws is given by the batch size. */
3381      struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3382                                              call_size(tc_draw_vstate_single)];
3383      unsigned num_draws = 2;
3384
3385      draws[0] = first->draw;
3386      draws[1] = next->draw;
3387
3388      /* Find how many other draws can be merged. */
3389      next = get_next_call(next, tc_draw_vstate_single);
3390      for (; next != last &&
3391           is_next_call_a_mergeable_draw_vstate(first, next);
3392           next = get_next_call(next, tc_draw_vstate_single),
3393           num_draws++)
3394         draws[num_draws] = next->draw;
3395
3396      pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3397                              first->info, draws, num_draws);
3398      /* Since all draws use the same state, drop all references at once. */
3399      tc_drop_vertex_state_references(first->state, num_draws);
3400
3401      return call_size(tc_draw_vstate_single) * num_draws;
3402   }
3403
3404   pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3405                           first->info, &first->draw, 1);
3406   tc_drop_vertex_state_references(first->state, 1);
3407   return call_size(tc_draw_vstate_single);
3408}
3409
3410struct tc_draw_vstate_multi {
3411   struct tc_call_base base;
3412   uint32_t partial_velem_mask;
3413   struct pipe_draw_vertex_state_info info;
3414   unsigned num_draws;
3415   struct pipe_vertex_state *state;
3416   struct pipe_draw_start_count_bias slot[0];
3417};
3418
3419static uint16_t
3420tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3421{
3422   struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3423
3424   pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3425                           info->info, info->slot, info->num_draws);
3426   tc_drop_vertex_state_references(info->state, 1);
3427   return info->base.num_slots;
3428}
3429
3430static void
3431tc_draw_vertex_state(struct pipe_context *_pipe,
3432                     struct pipe_vertex_state *state,
3433                     uint32_t partial_velem_mask,
3434                     struct pipe_draw_vertex_state_info info,
3435                     const struct pipe_draw_start_count_bias *draws,
3436                     unsigned num_draws)
3437{
3438   struct threaded_context *tc = threaded_context(_pipe);
3439
3440   if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3441      tc_add_all_gfx_bindings_to_buffer_list(tc);
3442
3443   if (num_draws == 1) {
3444      /* Single draw. */
3445      struct tc_draw_vstate_single *p =
3446         tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3447      p->partial_velem_mask = partial_velem_mask;
3448      p->draw = draws[0];
3449      p->info.mode = info.mode;
3450      p->info.take_vertex_state_ownership = false;
3451
3452      /* This should be always 0 for simplicity because we assume that
3453       * index_bias doesn't vary.
3454       */
3455      assert(draws[0].index_bias == 0);
3456
3457      if (!info.take_vertex_state_ownership)
3458         tc_set_vertex_state_reference(&p->state, state);
3459      else
3460         p->state = state;
3461      return;
3462   }
3463
3464   const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3465   const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3466   const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3467                                               sizeof(struct tc_call_base));
3468   /* Multi draw. */
3469   int total_offset = 0;
3470   bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3471   while (num_draws) {
3472      struct tc_batch *next = &tc->batch_slots[tc->next];
3473
3474      int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3475      /* If there isn't enough place for one draw, try to fill the next one */
3476      if (nb_slots_left < slots_for_one_draw)
3477         nb_slots_left = TC_SLOTS_PER_BATCH;
3478      const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3479
3480      /* How many draws can we fit in the current batch */
3481      const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3482
3483      /* Non-indexed call or indexed with a real index buffer. */
3484      struct tc_draw_vstate_multi *p =
3485         tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3486
3487      if (!take_vertex_state_ownership)
3488         tc_set_vertex_state_reference(&p->state, state);
3489      else
3490         p->state = state;
3491
3492      take_vertex_state_ownership = false;
3493      p->partial_velem_mask = partial_velem_mask;
3494      p->info.mode = info.mode;
3495      p->info.take_vertex_state_ownership = false;
3496      p->num_draws = dr;
3497      memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3498      num_draws -= dr;
3499
3500      total_offset += dr;
3501   }
3502}
3503
3504struct tc_launch_grid_call {
3505   struct tc_call_base base;
3506   struct pipe_grid_info info;
3507};
3508
3509static uint16_t
3510tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3511{
3512   struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3513
3514   pipe->launch_grid(pipe, p);
3515   tc_drop_resource_reference(p->indirect);
3516   return call_size(tc_launch_grid_call);
3517}
3518
3519static void
3520tc_launch_grid(struct pipe_context *_pipe,
3521               const struct pipe_grid_info *info)
3522{
3523   struct threaded_context *tc = threaded_context(_pipe);
3524   struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3525                                               tc_launch_grid_call);
3526   assert(info->input == NULL);
3527
3528   if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3529      tc_add_all_compute_bindings_to_buffer_list(tc);
3530
3531   tc_set_resource_reference(&p->info.indirect, info->indirect);
3532   memcpy(&p->info, info, sizeof(*info));
3533
3534   if (info->indirect)
3535      tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], info->indirect);
3536}
3537
3538static uint16_t
3539tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3540{
3541   struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3542
3543   pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3544                              p->dstz, p->src, p->src_level, &p->src_box);
3545   tc_drop_resource_reference(p->dst);
3546   tc_drop_resource_reference(p->src);
3547   return call_size(tc_resource_copy_region);
3548}
3549
3550static void
3551tc_resource_copy_region(struct pipe_context *_pipe,
3552                        struct pipe_resource *dst, unsigned dst_level,
3553                        unsigned dstx, unsigned dsty, unsigned dstz,
3554                        struct pipe_resource *src, unsigned src_level,
3555                        const struct pipe_box *src_box)
3556{
3557   struct threaded_context *tc = threaded_context(_pipe);
3558   struct threaded_resource *tdst = threaded_resource(dst);
3559   struct tc_resource_copy_region *p =
3560      tc_add_call(tc, TC_CALL_resource_copy_region,
3561                  tc_resource_copy_region);
3562
3563   tc_set_resource_reference(&p->dst, dst);
3564   p->dst_level = dst_level;
3565   p->dstx = dstx;
3566   p->dsty = dsty;
3567   p->dstz = dstz;
3568   tc_set_resource_reference(&p->src, src);
3569   p->src_level = src_level;
3570   p->src_box = *src_box;
3571
3572   if (dst->target == PIPE_BUFFER) {
3573      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3574
3575      tc_add_to_buffer_list(next, src);
3576      tc_add_to_buffer_list(next, dst);
3577
3578      util_range_add(&tdst->b, &tdst->valid_buffer_range,
3579                     dstx, dstx + src_box->width);
3580   }
3581}
3582
3583struct tc_blit_call {
3584   struct tc_call_base base;
3585   struct pipe_blit_info info;
3586};
3587
3588static uint16_t
3589tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3590{
3591   struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3592
3593   pipe->blit(pipe, blit);
3594   tc_drop_resource_reference(blit->dst.resource);
3595   tc_drop_resource_reference(blit->src.resource);
3596   return call_size(tc_blit_call);
3597}
3598
3599static void
3600tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3601{
3602   struct threaded_context *tc = threaded_context(_pipe);
3603   struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3604
3605   tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3606   tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3607   memcpy(&blit->info, info, sizeof(*info));
3608}
3609
3610struct tc_generate_mipmap {
3611   struct tc_call_base base;
3612   enum pipe_format format;
3613   unsigned base_level;
3614   unsigned last_level;
3615   unsigned first_layer;
3616   unsigned last_layer;
3617   struct pipe_resource *res;
3618};
3619
3620static uint16_t
3621tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3622{
3623   struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3624   ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3625                                                    p->base_level,
3626                                                    p->last_level,
3627                                                    p->first_layer,
3628                                                    p->last_layer);
3629   assert(result);
3630   tc_drop_resource_reference(p->res);
3631   return call_size(tc_generate_mipmap);
3632}
3633
3634static bool
3635tc_generate_mipmap(struct pipe_context *_pipe,
3636                   struct pipe_resource *res,
3637                   enum pipe_format format,
3638                   unsigned base_level,
3639                   unsigned last_level,
3640                   unsigned first_layer,
3641                   unsigned last_layer)
3642{
3643   struct threaded_context *tc = threaded_context(_pipe);
3644   struct pipe_context *pipe = tc->pipe;
3645   struct pipe_screen *screen = pipe->screen;
3646   unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3647
3648   if (util_format_is_depth_or_stencil(format))
3649      bind = PIPE_BIND_DEPTH_STENCIL;
3650   else
3651      bind = PIPE_BIND_RENDER_TARGET;
3652
3653   if (!screen->is_format_supported(screen, format, res->target,
3654                                    res->nr_samples, res->nr_storage_samples,
3655                                    bind))
3656      return false;
3657
3658   struct tc_generate_mipmap *p =
3659      tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3660
3661   tc_set_resource_reference(&p->res, res);
3662   p->format = format;
3663   p->base_level = base_level;
3664   p->last_level = last_level;
3665   p->first_layer = first_layer;
3666   p->last_layer = last_layer;
3667   return true;
3668}
3669
3670struct tc_resource_call {
3671   struct tc_call_base base;
3672   struct pipe_resource *resource;
3673};
3674
3675static uint16_t
3676tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3677{
3678   struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3679
3680   pipe->flush_resource(pipe, resource);
3681   tc_drop_resource_reference(resource);
3682   return call_size(tc_resource_call);
3683}
3684
3685static void
3686tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3687{
3688   struct threaded_context *tc = threaded_context(_pipe);
3689   struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3690                                               tc_resource_call);
3691
3692   tc_set_resource_reference(&call->resource, resource);
3693}
3694
3695static uint16_t
3696tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3697{
3698   struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3699
3700   pipe->invalidate_resource(pipe, resource);
3701   tc_drop_resource_reference(resource);
3702   return call_size(tc_resource_call);
3703}
3704
3705static void
3706tc_invalidate_resource(struct pipe_context *_pipe,
3707                       struct pipe_resource *resource)
3708{
3709   struct threaded_context *tc = threaded_context(_pipe);
3710
3711   if (resource->target == PIPE_BUFFER) {
3712      tc_invalidate_buffer(tc, threaded_resource(resource));
3713      return;
3714   }
3715
3716   struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3717                                               tc_resource_call);
3718   tc_set_resource_reference(&call->resource, resource);
3719}
3720
3721struct tc_clear {
3722   struct tc_call_base base;
3723   bool scissor_state_set;
3724   uint8_t stencil;
3725   uint16_t buffers;
3726   float depth;
3727   struct pipe_scissor_state scissor_state;
3728   union pipe_color_union color;
3729};
3730
3731static uint16_t
3732tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3733{
3734   struct tc_clear *p = to_call(call, tc_clear);
3735
3736   pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3737   return call_size(tc_clear);
3738}
3739
3740static void
3741tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3742         const union pipe_color_union *color, double depth,
3743         unsigned stencil)
3744{
3745   struct threaded_context *tc = threaded_context(_pipe);
3746   struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3747
3748   p->buffers = buffers;
3749   if (scissor_state)
3750      p->scissor_state = *scissor_state;
3751   p->scissor_state_set = !!scissor_state;
3752   p->color = *color;
3753   p->depth = depth;
3754   p->stencil = stencil;
3755}
3756
3757struct tc_clear_render_target {
3758   struct tc_call_base base;
3759   bool render_condition_enabled;
3760   unsigned dstx;
3761   unsigned dsty;
3762   unsigned width;
3763   unsigned height;
3764   union pipe_color_union color;
3765   struct pipe_surface *dst;
3766};
3767
3768static uint16_t
3769tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3770{
3771   struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3772
3773   pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3774                             p->render_condition_enabled);
3775   tc_drop_surface_reference(p->dst);
3776   return call_size(tc_clear_render_target);
3777}
3778
3779static void
3780tc_clear_render_target(struct pipe_context *_pipe,
3781                       struct pipe_surface *dst,
3782                       const union pipe_color_union *color,
3783                       unsigned dstx, unsigned dsty,
3784                       unsigned width, unsigned height,
3785                       bool render_condition_enabled)
3786{
3787   struct threaded_context *tc = threaded_context(_pipe);
3788   struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3789   p->dst = NULL;
3790   pipe_surface_reference(&p->dst, dst);
3791   p->color = *color;
3792   p->dstx = dstx;
3793   p->dsty = dsty;
3794   p->width = width;
3795   p->height = height;
3796   p->render_condition_enabled = render_condition_enabled;
3797}
3798
3799
3800struct tc_clear_depth_stencil {
3801   struct tc_call_base base;
3802   bool render_condition_enabled;
3803   float depth;
3804   unsigned clear_flags;
3805   unsigned stencil;
3806   unsigned dstx;
3807   unsigned dsty;
3808   unsigned width;
3809   unsigned height;
3810   struct pipe_surface *dst;
3811};
3812
3813
3814static uint16_t
3815tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
3816{
3817   struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
3818
3819   pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
3820                             p->dstx, p->dsty, p->width, p->height,
3821                             p->render_condition_enabled);
3822   tc_drop_surface_reference(p->dst);
3823   return call_size(tc_clear_depth_stencil);
3824}
3825
3826static void
3827tc_clear_depth_stencil(struct pipe_context *_pipe,
3828                       struct pipe_surface *dst, unsigned clear_flags,
3829                       double depth, unsigned stencil, unsigned dstx,
3830                       unsigned dsty, unsigned width, unsigned height,
3831                       bool render_condition_enabled)
3832{
3833   struct threaded_context *tc = threaded_context(_pipe);
3834   struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
3835   p->dst = NULL;
3836   pipe_surface_reference(&p->dst, dst);
3837   p->clear_flags = clear_flags;
3838   p->depth = depth;
3839   p->stencil = stencil;
3840   p->dstx = dstx;
3841   p->dsty = dsty;
3842   p->width = width;
3843   p->height = height;
3844   p->render_condition_enabled = render_condition_enabled;
3845}
3846
3847struct tc_clear_buffer {
3848   struct tc_call_base base;
3849   uint8_t clear_value_size;
3850   unsigned offset;
3851   unsigned size;
3852   char clear_value[16];
3853   struct pipe_resource *res;
3854};
3855
3856static uint16_t
3857tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
3858{
3859   struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
3860
3861   pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
3862                      p->clear_value_size);
3863   tc_drop_resource_reference(p->res);
3864   return call_size(tc_clear_buffer);
3865}
3866
3867static void
3868tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
3869                unsigned offset, unsigned size,
3870                const void *clear_value, int clear_value_size)
3871{
3872   struct threaded_context *tc = threaded_context(_pipe);
3873   struct threaded_resource *tres = threaded_resource(res);
3874   struct tc_clear_buffer *p =
3875      tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
3876
3877   tc_set_resource_reference(&p->res, res);
3878   tc_add_to_buffer_list(&tc->buffer_lists[tc->next_buf_list], res);
3879   p->offset = offset;
3880   p->size = size;
3881   memcpy(p->clear_value, clear_value, clear_value_size);
3882   p->clear_value_size = clear_value_size;
3883
3884   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
3885}
3886
3887struct tc_clear_texture {
3888   struct tc_call_base base;
3889   unsigned level;
3890   struct pipe_box box;
3891   char data[16];
3892   struct pipe_resource *res;
3893};
3894
3895static uint16_t
3896tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
3897{
3898   struct tc_clear_texture *p = to_call(call, tc_clear_texture);
3899
3900   pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
3901   tc_drop_resource_reference(p->res);
3902   return call_size(tc_clear_texture);
3903}
3904
3905static void
3906tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
3907                 unsigned level, const struct pipe_box *box, const void *data)
3908{
3909   struct threaded_context *tc = threaded_context(_pipe);
3910   struct tc_clear_texture *p =
3911      tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
3912
3913   tc_set_resource_reference(&p->res, res);
3914   p->level = level;
3915   p->box = *box;
3916   memcpy(p->data, data,
3917          util_format_get_blocksize(res->format));
3918}
3919
3920struct tc_resource_commit {
3921   struct tc_call_base base;
3922   bool commit;
3923   unsigned level;
3924   struct pipe_box box;
3925   struct pipe_resource *res;
3926};
3927
3928static uint16_t
3929tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
3930{
3931   struct tc_resource_commit *p = to_call(call, tc_resource_commit);
3932
3933   pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
3934   tc_drop_resource_reference(p->res);
3935   return call_size(tc_resource_commit);
3936}
3937
3938static bool
3939tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
3940                   unsigned level, struct pipe_box *box, bool commit)
3941{
3942   struct threaded_context *tc = threaded_context(_pipe);
3943   struct tc_resource_commit *p =
3944      tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
3945
3946   tc_set_resource_reference(&p->res, res);
3947   p->level = level;
3948   p->box = *box;
3949   p->commit = commit;
3950   return true; /* we don't care about the return value for this call */
3951}
3952
3953static unsigned
3954tc_init_intel_perf_query_info(struct pipe_context *_pipe)
3955{
3956   struct threaded_context *tc = threaded_context(_pipe);
3957   struct pipe_context *pipe = tc->pipe;
3958
3959   return pipe->init_intel_perf_query_info(pipe);
3960}
3961
3962static void
3963tc_get_intel_perf_query_info(struct pipe_context *_pipe,
3964                             unsigned query_index,
3965                             const char **name,
3966                             uint32_t *data_size,
3967                             uint32_t *n_counters,
3968                             uint32_t *n_active)
3969{
3970   struct threaded_context *tc = threaded_context(_pipe);
3971   struct pipe_context *pipe = tc->pipe;
3972
3973   tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
3974   pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
3975         n_counters, n_active);
3976}
3977
3978static void
3979tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
3980                                     unsigned query_index,
3981                                     unsigned counter_index,
3982                                     const char **name,
3983                                     const char **desc,
3984                                     uint32_t *offset,
3985                                     uint32_t *data_size,
3986                                     uint32_t *type_enum,
3987                                     uint32_t *data_type_enum,
3988                                     uint64_t *raw_max)
3989{
3990   struct threaded_context *tc = threaded_context(_pipe);
3991   struct pipe_context *pipe = tc->pipe;
3992
3993   pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
3994         name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
3995}
3996
3997static struct pipe_query *
3998tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
3999{
4000   struct threaded_context *tc = threaded_context(_pipe);
4001   struct pipe_context *pipe = tc->pipe;
4002
4003   return pipe->new_intel_perf_query_obj(pipe, query_index);
4004}
4005
4006static uint16_t
4007tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4008{
4009   (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4010   return call_size(tc_query_call);
4011}
4012
4013static bool
4014tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4015{
4016   struct threaded_context *tc = threaded_context(_pipe);
4017
4018   tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4019
4020   /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4021   return true;
4022}
4023
4024static uint16_t
4025tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4026{
4027   pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4028   return call_size(tc_query_call);
4029}
4030
4031static void
4032tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4033{
4034   struct threaded_context *tc = threaded_context(_pipe);
4035
4036   tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4037}
4038
4039static void
4040tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4041{
4042   struct threaded_context *tc = threaded_context(_pipe);
4043   struct pipe_context *pipe = tc->pipe;
4044
4045   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4046   pipe->delete_intel_perf_query(pipe, q);
4047}
4048
4049static void
4050tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4051{
4052   struct threaded_context *tc = threaded_context(_pipe);
4053   struct pipe_context *pipe = tc->pipe;
4054
4055   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4056   pipe->wait_intel_perf_query(pipe, q);
4057}
4058
4059static bool
4060tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4061{
4062   struct threaded_context *tc = threaded_context(_pipe);
4063   struct pipe_context *pipe = tc->pipe;
4064
4065   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4066   return pipe->is_intel_perf_query_ready(pipe, q);
4067}
4068
4069static bool
4070tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4071                             struct pipe_query *q,
4072                             size_t data_size,
4073                             uint32_t *data,
4074                             uint32_t *bytes_written)
4075{
4076   struct threaded_context *tc = threaded_context(_pipe);
4077   struct pipe_context *pipe = tc->pipe;
4078
4079   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4080   return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4081}
4082
4083/********************************************************************
4084 * callback
4085 */
4086
4087struct tc_callback_call {
4088   struct tc_call_base base;
4089   void (*fn)(void *data);
4090   void *data;
4091};
4092
4093static uint16_t
4094tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4095{
4096   struct tc_callback_call *p = to_call(call, tc_callback_call);
4097
4098   p->fn(p->data);
4099   return call_size(tc_callback_call);
4100}
4101
4102static void
4103tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4104            bool asap)
4105{
4106   struct threaded_context *tc = threaded_context(_pipe);
4107
4108   if (asap && tc_is_sync(tc)) {
4109      fn(data);
4110      return;
4111   }
4112
4113   struct tc_callback_call *p =
4114      tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4115   p->fn = fn;
4116   p->data = data;
4117}
4118
4119
4120/********************************************************************
4121 * create & destroy
4122 */
4123
4124static void
4125tc_destroy(struct pipe_context *_pipe)
4126{
4127   struct threaded_context *tc = threaded_context(_pipe);
4128   struct pipe_context *pipe = tc->pipe;
4129
4130   if (tc->base.const_uploader &&
4131       tc->base.stream_uploader != tc->base.const_uploader)
4132      u_upload_destroy(tc->base.const_uploader);
4133
4134   if (tc->base.stream_uploader)
4135      u_upload_destroy(tc->base.stream_uploader);
4136
4137   tc_sync(tc);
4138
4139   if (util_queue_is_initialized(&tc->queue)) {
4140      util_queue_destroy(&tc->queue);
4141
4142      for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4143         util_queue_fence_destroy(&tc->batch_slots[i].fence);
4144         assert(!tc->batch_slots[i].token);
4145      }
4146   }
4147
4148   slab_destroy_child(&tc->pool_transfers);
4149   assert(tc->batch_slots[tc->next].num_total_slots == 0);
4150   pipe->destroy(pipe);
4151
4152   for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4153      if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4154         util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4155      util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4156   }
4157
4158   FREE(tc);
4159}
4160
4161static const tc_execute execute_func[TC_NUM_CALLS] = {
4162#define CALL(name) tc_call_##name,
4163#include "u_threaded_context_calls.h"
4164#undef CALL
4165};
4166
4167void tc_driver_internal_flush_notify(struct threaded_context *tc)
4168{
4169   /* Allow drivers to call this function even for internal contexts that
4170    * don't have tc. It simplifies drivers.
4171    */
4172   if (!tc)
4173      return;
4174
4175   /* Signal fences set by tc_batch_execute. */
4176   for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4177      util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4178
4179   tc->num_signal_fences_next_flush = 0;
4180}
4181
4182/**
4183 * Wrap an existing pipe_context into a threaded_context.
4184 *
4185 * \param pipe                 pipe_context to wrap
4186 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4187 *                             transfer objects; the driver should have one
4188 *                             in pipe_screen.
4189 * \param replace_buffer  callback for replacing a pipe_resource's storage
4190 *                        with another pipe_resource's storage.
4191 * \param options         optional TC options/callbacks
4192 * \param out  if successful, the threaded_context will be returned here in
4193 *             addition to the return value if "out" != NULL
4194 */
4195struct pipe_context *
4196threaded_context_create(struct pipe_context *pipe,
4197                        struct slab_parent_pool *parent_transfer_pool,
4198                        tc_replace_buffer_storage_func replace_buffer,
4199                        const struct threaded_context_options *options,
4200                        struct threaded_context **out)
4201{
4202   struct threaded_context *tc;
4203
4204   if (!pipe)
4205      return NULL;
4206
4207   util_cpu_detect();
4208
4209   if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4210      return pipe;
4211
4212   tc = CALLOC_STRUCT(threaded_context);
4213   if (!tc) {
4214      pipe->destroy(pipe);
4215      return NULL;
4216   }
4217
4218   if (options)
4219      tc->options = *options;
4220
4221   pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4222
4223   /* The driver context isn't wrapped, so set its "priv" to NULL. */
4224   pipe->priv = NULL;
4225
4226   tc->pipe = pipe;
4227   tc->replace_buffer_storage = replace_buffer;
4228   tc->map_buffer_alignment =
4229      pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4230   tc->ubo_alignment =
4231      MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4232   tc->base.priv = pipe; /* priv points to the wrapped driver context */
4233   tc->base.screen = pipe->screen;
4234   tc->base.destroy = tc_destroy;
4235   tc->base.callback = tc_callback;
4236
4237   tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4238   if (pipe->stream_uploader == pipe->const_uploader)
4239      tc->base.const_uploader = tc->base.stream_uploader;
4240   else
4241      tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4242
4243   if (!tc->base.stream_uploader || !tc->base.const_uploader)
4244      goto fail;
4245
4246   tc->use_forced_staging_uploads = true;
4247
4248   /* The queue size is the number of batches "waiting". Batches are removed
4249    * from the queue before being executed, so keep one tc_batch slot for that
4250    * execution. Also, keep one unused slot for an unflushed batch.
4251    */
4252   if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4253      goto fail;
4254
4255   for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4256#if !defined(NDEBUG) && TC_DEBUG >= 1
4257      tc->batch_slots[i].sentinel = TC_SENTINEL;
4258#endif
4259      tc->batch_slots[i].tc = tc;
4260      util_queue_fence_init(&tc->batch_slots[i].fence);
4261   }
4262   for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4263      util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4264
4265   list_inithead(&tc->unflushed_queries);
4266
4267   slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4268
4269   /* If you have different limits in each shader stage, set the maximum. */
4270   struct pipe_screen *screen = pipe->screen;;
4271   tc->max_vertex_buffers =
4272      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4273   tc->max_const_buffers =
4274      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4275                               PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4276   tc->max_shader_buffers =
4277      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4278                               PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4279   tc->max_images =
4280      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4281                               PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4282   tc->max_samplers =
4283      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4284                               PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4285
4286   tc->base.set_context_param = tc_set_context_param; /* always set this */
4287
4288#define CTX_INIT(_member) \
4289   tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4290
4291   CTX_INIT(flush);
4292   CTX_INIT(draw_vbo);
4293   CTX_INIT(draw_vertex_state);
4294   CTX_INIT(launch_grid);
4295   CTX_INIT(resource_copy_region);
4296   CTX_INIT(blit);
4297   CTX_INIT(clear);
4298   CTX_INIT(clear_render_target);
4299   CTX_INIT(clear_depth_stencil);
4300   CTX_INIT(clear_buffer);
4301   CTX_INIT(clear_texture);
4302   CTX_INIT(flush_resource);
4303   CTX_INIT(generate_mipmap);
4304   CTX_INIT(render_condition);
4305   CTX_INIT(create_query);
4306   CTX_INIT(create_batch_query);
4307   CTX_INIT(destroy_query);
4308   CTX_INIT(begin_query);
4309   CTX_INIT(end_query);
4310   CTX_INIT(get_query_result);
4311   CTX_INIT(get_query_result_resource);
4312   CTX_INIT(set_active_query_state);
4313   CTX_INIT(create_blend_state);
4314   CTX_INIT(bind_blend_state);
4315   CTX_INIT(delete_blend_state);
4316   CTX_INIT(create_sampler_state);
4317   CTX_INIT(bind_sampler_states);
4318   CTX_INIT(delete_sampler_state);
4319   CTX_INIT(create_rasterizer_state);
4320   CTX_INIT(bind_rasterizer_state);
4321   CTX_INIT(delete_rasterizer_state);
4322   CTX_INIT(create_depth_stencil_alpha_state);
4323   CTX_INIT(bind_depth_stencil_alpha_state);
4324   CTX_INIT(delete_depth_stencil_alpha_state);
4325   CTX_INIT(create_fs_state);
4326   CTX_INIT(bind_fs_state);
4327   CTX_INIT(delete_fs_state);
4328   CTX_INIT(create_vs_state);
4329   CTX_INIT(bind_vs_state);
4330   CTX_INIT(delete_vs_state);
4331   CTX_INIT(create_gs_state);
4332   CTX_INIT(bind_gs_state);
4333   CTX_INIT(delete_gs_state);
4334   CTX_INIT(create_tcs_state);
4335   CTX_INIT(bind_tcs_state);
4336   CTX_INIT(delete_tcs_state);
4337   CTX_INIT(create_tes_state);
4338   CTX_INIT(bind_tes_state);
4339   CTX_INIT(delete_tes_state);
4340   CTX_INIT(create_compute_state);
4341   CTX_INIT(bind_compute_state);
4342   CTX_INIT(delete_compute_state);
4343   CTX_INIT(create_vertex_elements_state);
4344   CTX_INIT(bind_vertex_elements_state);
4345   CTX_INIT(delete_vertex_elements_state);
4346   CTX_INIT(set_blend_color);
4347   CTX_INIT(set_stencil_ref);
4348   CTX_INIT(set_sample_mask);
4349   CTX_INIT(set_min_samples);
4350   CTX_INIT(set_clip_state);
4351   CTX_INIT(set_constant_buffer);
4352   CTX_INIT(set_inlinable_constants);
4353   CTX_INIT(set_framebuffer_state);
4354   CTX_INIT(set_polygon_stipple);
4355   CTX_INIT(set_sample_locations);
4356   CTX_INIT(set_scissor_states);
4357   CTX_INIT(set_viewport_states);
4358   CTX_INIT(set_window_rectangles);
4359   CTX_INIT(set_sampler_views);
4360   CTX_INIT(set_tess_state);
4361   CTX_INIT(set_patch_vertices);
4362   CTX_INIT(set_shader_buffers);
4363   CTX_INIT(set_shader_images);
4364   CTX_INIT(set_vertex_buffers);
4365   CTX_INIT(create_stream_output_target);
4366   CTX_INIT(stream_output_target_destroy);
4367   CTX_INIT(set_stream_output_targets);
4368   CTX_INIT(create_sampler_view);
4369   CTX_INIT(sampler_view_destroy);
4370   CTX_INIT(create_surface);
4371   CTX_INIT(surface_destroy);
4372   CTX_INIT(buffer_map);
4373   CTX_INIT(texture_map);
4374   CTX_INIT(transfer_flush_region);
4375   CTX_INIT(buffer_unmap);
4376   CTX_INIT(texture_unmap);
4377   CTX_INIT(buffer_subdata);
4378   CTX_INIT(texture_subdata);
4379   CTX_INIT(texture_barrier);
4380   CTX_INIT(memory_barrier);
4381   CTX_INIT(resource_commit);
4382   CTX_INIT(create_video_codec);
4383   CTX_INIT(create_video_buffer);
4384   CTX_INIT(set_compute_resources);
4385   CTX_INIT(set_global_binding);
4386   CTX_INIT(get_sample_position);
4387   CTX_INIT(invalidate_resource);
4388   CTX_INIT(get_device_reset_status);
4389   CTX_INIT(set_device_reset_callback);
4390   CTX_INIT(dump_debug_state);
4391   CTX_INIT(set_log_context);
4392   CTX_INIT(emit_string_marker);
4393   CTX_INIT(set_debug_callback);
4394   CTX_INIT(create_fence_fd);
4395   CTX_INIT(fence_server_sync);
4396   CTX_INIT(fence_server_signal);
4397   CTX_INIT(get_timestamp);
4398   CTX_INIT(create_texture_handle);
4399   CTX_INIT(delete_texture_handle);
4400   CTX_INIT(make_texture_handle_resident);
4401   CTX_INIT(create_image_handle);
4402   CTX_INIT(delete_image_handle);
4403   CTX_INIT(make_image_handle_resident);
4404   CTX_INIT(set_frontend_noop);
4405   CTX_INIT(init_intel_perf_query_info);
4406   CTX_INIT(get_intel_perf_query_info);
4407   CTX_INIT(get_intel_perf_query_counter_info);
4408   CTX_INIT(new_intel_perf_query_obj);
4409   CTX_INIT(begin_intel_perf_query);
4410   CTX_INIT(end_intel_perf_query);
4411   CTX_INIT(delete_intel_perf_query);
4412   CTX_INIT(wait_intel_perf_query);
4413   CTX_INIT(is_intel_perf_query_ready);
4414   CTX_INIT(get_intel_perf_query_data);
4415#undef CTX_INIT
4416
4417   if (out)
4418      *out = tc;
4419
4420   tc_begin_next_buffer_list(tc);
4421   return &tc->base;
4422
4423fail:
4424   tc_destroy(&tc->base);
4425   return NULL;
4426}
4427
4428void
4429threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4430{
4431   uint64_t total_ram;
4432   if (os_get_total_physical_memory(&total_ram)) {
4433      tc->bytes_mapped_limit = total_ram / divisor;
4434      if (sizeof(void*) == 4)
4435         tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4436   }
4437}
4438