u_helpers.c revision 361fc4cb
1/**************************************************************************
2 *
3 * Copyright 2012 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/u_cpu_detect.h"
29#include "util/u_helpers.h"
30#include "util/u_inlines.h"
31#include "util/u_upload_mgr.h"
32#include "util/u_thread.h"
33#include "util/os_time.h"
34#include <inttypes.h>
35
36/**
37 * This function is used to copy an array of pipe_vertex_buffer structures,
38 * while properly referencing the pipe_vertex_buffer::buffer member.
39 *
40 * enabled_buffers is updated such that the bits corresponding to the indices
41 * of disabled buffers are set to 0 and the enabled ones are set to 1.
42 *
43 * \sa util_copy_framebuffer_state
44 */
45void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
46                                  uint32_t *enabled_buffers,
47                                  const struct pipe_vertex_buffer *src,
48                                  unsigned start_slot, unsigned count)
49{
50   unsigned i;
51   uint32_t bitmask = 0;
52
53   dst += start_slot;
54
55   if (src) {
56      for (i = 0; i < count; i++) {
57         if (src[i].buffer.resource)
58            bitmask |= 1 << i;
59
60         pipe_vertex_buffer_unreference(&dst[i]);
61
62         if (!src[i].is_user_buffer)
63            pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
64      }
65
66      /* Copy over the other members of pipe_vertex_buffer. */
67      memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
68
69      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
70      *enabled_buffers |= bitmask << start_slot;
71   }
72   else {
73      /* Unreference the buffers. */
74      for (i = 0; i < count; i++)
75         pipe_vertex_buffer_unreference(&dst[i]);
76
77      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
78   }
79}
80
81/**
82 * Same as util_set_vertex_buffers_mask, but it only returns the number
83 * of bound buffers.
84 */
85void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
86                                   unsigned *dst_count,
87                                   const struct pipe_vertex_buffer *src,
88                                   unsigned start_slot, unsigned count)
89{
90   unsigned i;
91   uint32_t enabled_buffers = 0;
92
93   for (i = 0; i < *dst_count; i++) {
94      if (dst[i].buffer.resource)
95         enabled_buffers |= (1ull << i);
96   }
97
98   util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
99                                count);
100
101   *dst_count = util_last_bit(enabled_buffers);
102}
103
104/**
105 * Given a user index buffer, save the structure to "saved", and upload it.
106 */
107bool
108util_upload_index_buffer(struct pipe_context *pipe,
109                         const struct pipe_draw_info *info,
110                         struct pipe_resource **out_buffer,
111                         unsigned *out_offset)
112{
113   unsigned start_offset = info->start * info->index_size;
114
115   u_upload_data(pipe->stream_uploader, start_offset,
116                 info->count * info->index_size, 4,
117                 (char*)info->index.user + start_offset,
118                 out_offset, out_buffer);
119   u_upload_unmap(pipe->stream_uploader);
120   *out_offset -= start_offset;
121   return *out_buffer != NULL;
122}
123
124/**
125 * Called by MakeCurrent. Used to notify the driver that the application
126 * thread may have been changed.
127 *
128 * The function pins the current thread and driver threads to a group of
129 * CPU cores that share the same L3 cache. This is needed for good multi-
130 * threading performance on AMD Zen CPUs.
131 *
132 * \param upper_thread  thread in the state tracker that also needs to be
133 *                      pinned.
134 */
135void
136util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
137                                     thrd_t *upper_thread)
138{
139   /* If pinning has no effect, don't do anything. */
140   if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
141      return;
142
143   unsigned num_L3_caches = util_cpu_caps.nr_cpus /
144                            util_cpu_caps.cores_per_L3;
145
146   /* Get a semi-random number. */
147   int64_t t = os_time_get_nano();
148   unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
149
150   /* Tell the driver to pin its threads to the selected L3 cache. */
151   if (ctx->set_context_param) {
152      ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
153                             cache);
154   }
155
156   /* Do the same for the upper level thread if there is any (e.g. glthread) */
157   if (upper_thread)
158      util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
159}
160
161/* This is a helper for hardware bring-up. Don't remove. */
162struct pipe_query *
163util_begin_pipestat_query(struct pipe_context *ctx)
164{
165   struct pipe_query *q =
166      ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
167   if (!q)
168      return NULL;
169
170   ctx->begin_query(ctx, q);
171   return q;
172}
173
174/* This is a helper for hardware bring-up. Don't remove. */
175void
176util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
177                        FILE *f)
178{
179   static unsigned counter;
180   struct pipe_query_data_pipeline_statistics stats;
181
182   ctx->end_query(ctx, q);
183   ctx->get_query_result(ctx, q, true, (void*)&stats);
184   ctx->destroy_query(ctx, q);
185
186   fprintf(f,
187           "Draw call %u:\n"
188           "    ia_vertices    = %"PRIu64"\n"
189           "    ia_primitives  = %"PRIu64"\n"
190           "    vs_invocations = %"PRIu64"\n"
191           "    gs_invocations = %"PRIu64"\n"
192           "    gs_primitives  = %"PRIu64"\n"
193           "    c_invocations  = %"PRIu64"\n"
194           "    c_primitives   = %"PRIu64"\n"
195           "    ps_invocations = %"PRIu64"\n"
196           "    hs_invocations = %"PRIu64"\n"
197           "    ds_invocations = %"PRIu64"\n"
198           "    cs_invocations = %"PRIu64"\n",
199           p_atomic_inc_return(&counter),
200           stats.ia_vertices,
201           stats.ia_primitives,
202           stats.vs_invocations,
203           stats.gs_invocations,
204           stats.gs_primitives,
205           stats.c_invocations,
206           stats.c_primitives,
207           stats.ps_invocations,
208           stats.hs_invocations,
209           stats.ds_invocations,
210           stats.cs_invocations);
211}
212
213/* This is a helper for hardware bring-up. Don't remove. */
214void
215util_wait_for_idle(struct pipe_context *ctx)
216{
217   struct pipe_fence_handle *fence = NULL;
218
219   ctx->flush(ctx, &fence, 0);
220   ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
221}
222
223void
224util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
225{
226   t->max_mem_usage = max_mem_usage;
227}
228
229void
230util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
231{
232   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
233      screen->fence_reference(screen, &t->ring[i].fence, NULL);
234}
235
236static uint64_t
237util_get_throttle_total_memory_usage(struct util_throttle *t)
238{
239   uint64_t total_usage = 0;
240
241   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
242      total_usage += t->ring[i].mem_usage;
243   return total_usage;
244}
245
246static void util_dump_throttle_ring(struct util_throttle *t)
247{
248   printf("Throttle:\n");
249   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
250      printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
251             i, t->ring[i].fence ? "yes" : " no",
252             t->ring[i].mem_usage,
253             t->flush_index == i ? " [flush]" : "",
254             t->wait_index == i ? " [wait]" : "");
255   }
256}
257
258/**
259 * Notify util_throttle that the next operation allocates memory.
260 * util_throttle tracks memory usage and waits for fences until its tracked
261 * memory usage decreases.
262 *
263 * Example:
264 *   util_throttle_memory_usage(..., w*h*d*Bpp);
265 *   TexSubImage(..., w, h, d, ...);
266 *
267 * This means that TexSubImage can't allocate more memory its maximum limit
268 * set during initialization.
269 */
270void
271util_throttle_memory_usage(struct pipe_context *pipe,
272                           struct util_throttle *t, uint64_t memory_size)
273{
274   (void)util_dump_throttle_ring; /* silence warning */
275
276   if (!t->max_mem_usage)
277      return;
278
279   struct pipe_screen *screen = pipe->screen;
280   struct pipe_fence_handle **fence = NULL;
281   unsigned ring_size = ARRAY_SIZE(t->ring);
282   uint64_t total = util_get_throttle_total_memory_usage(t);
283
284   /* If there is not enough memory, walk the list of fences and find
285    * the latest one that we need to wait for.
286    */
287   while (t->wait_index != t->flush_index &&
288          total && total + memory_size > t->max_mem_usage) {
289      assert(t->ring[t->wait_index].fence);
290
291      /* Release an older fence if we need to wait for a newer one. */
292      if (fence)
293         screen->fence_reference(screen, fence, NULL);
294
295      fence = &t->ring[t->wait_index].fence;
296      t->ring[t->wait_index].mem_usage = 0;
297      t->wait_index = (t->wait_index + 1) % ring_size;
298
299      total = util_get_throttle_total_memory_usage(t);
300   }
301
302   /* Wait for the fence to decrease memory usage. */
303   if (fence) {
304      screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
305      screen->fence_reference(screen, fence, NULL);
306   }
307
308   /* Flush and get a fence if we've exhausted memory usage for the current
309    * slot.
310    */
311   if (t->ring[t->flush_index].mem_usage &&
312       t->ring[t->flush_index].mem_usage + memory_size >
313       t->max_mem_usage / (ring_size / 2)) {
314      struct pipe_fence_handle **fence =
315         &t->ring[t->flush_index].fence;
316
317      /* Expect that the current flush slot doesn't have a fence yet. */
318      assert(!*fence);
319
320      pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
321      t->flush_index = (t->flush_index + 1) % ring_size;
322
323      /* Vacate the next slot if it's occupied. This should be rare. */
324      if (t->flush_index == t->wait_index) {
325         struct pipe_fence_handle **fence =
326            &t->ring[t->wait_index].fence;
327
328         t->ring[t->wait_index].mem_usage = 0;
329         t->wait_index = (t->wait_index + 1) % ring_size;
330
331         assert(*fence);
332         screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
333         screen->fence_reference(screen, fence, NULL);
334      }
335
336      assert(!t->ring[t->flush_index].mem_usage);
337      assert(!t->ring[t->flush_index].fence);
338   }
339
340   t->ring[t->flush_index].mem_usage += memory_size;
341}
342