u_helpers.c revision 361fc4cb
1/************************************************************************** 2 * 3 * Copyright 2012 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "util/u_cpu_detect.h" 29#include "util/u_helpers.h" 30#include "util/u_inlines.h" 31#include "util/u_upload_mgr.h" 32#include "util/u_thread.h" 33#include "util/os_time.h" 34#include <inttypes.h> 35 36/** 37 * This function is used to copy an array of pipe_vertex_buffer structures, 38 * while properly referencing the pipe_vertex_buffer::buffer member. 39 * 40 * enabled_buffers is updated such that the bits corresponding to the indices 41 * of disabled buffers are set to 0 and the enabled ones are set to 1. 42 * 43 * \sa util_copy_framebuffer_state 44 */ 45void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, 46 uint32_t *enabled_buffers, 47 const struct pipe_vertex_buffer *src, 48 unsigned start_slot, unsigned count) 49{ 50 unsigned i; 51 uint32_t bitmask = 0; 52 53 dst += start_slot; 54 55 if (src) { 56 for (i = 0; i < count; i++) { 57 if (src[i].buffer.resource) 58 bitmask |= 1 << i; 59 60 pipe_vertex_buffer_unreference(&dst[i]); 61 62 if (!src[i].is_user_buffer) 63 pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource); 64 } 65 66 /* Copy over the other members of pipe_vertex_buffer. */ 67 memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer)); 68 69 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot); 70 *enabled_buffers |= bitmask << start_slot; 71 } 72 else { 73 /* Unreference the buffers. */ 74 for (i = 0; i < count; i++) 75 pipe_vertex_buffer_unreference(&dst[i]); 76 77 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot); 78 } 79} 80 81/** 82 * Same as util_set_vertex_buffers_mask, but it only returns the number 83 * of bound buffers. 84 */ 85void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, 86 unsigned *dst_count, 87 const struct pipe_vertex_buffer *src, 88 unsigned start_slot, unsigned count) 89{ 90 unsigned i; 91 uint32_t enabled_buffers = 0; 92 93 for (i = 0; i < *dst_count; i++) { 94 if (dst[i].buffer.resource) 95 enabled_buffers |= (1ull << i); 96 } 97 98 util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot, 99 count); 100 101 *dst_count = util_last_bit(enabled_buffers); 102} 103 104/** 105 * Given a user index buffer, save the structure to "saved", and upload it. 106 */ 107bool 108util_upload_index_buffer(struct pipe_context *pipe, 109 const struct pipe_draw_info *info, 110 struct pipe_resource **out_buffer, 111 unsigned *out_offset) 112{ 113 unsigned start_offset = info->start * info->index_size; 114 115 u_upload_data(pipe->stream_uploader, start_offset, 116 info->count * info->index_size, 4, 117 (char*)info->index.user + start_offset, 118 out_offset, out_buffer); 119 u_upload_unmap(pipe->stream_uploader); 120 *out_offset -= start_offset; 121 return *out_buffer != NULL; 122} 123 124/** 125 * Called by MakeCurrent. Used to notify the driver that the application 126 * thread may have been changed. 127 * 128 * The function pins the current thread and driver threads to a group of 129 * CPU cores that share the same L3 cache. This is needed for good multi- 130 * threading performance on AMD Zen CPUs. 131 * 132 * \param upper_thread thread in the state tracker that also needs to be 133 * pinned. 134 */ 135void 136util_pin_driver_threads_to_random_L3(struct pipe_context *ctx, 137 thrd_t *upper_thread) 138{ 139 /* If pinning has no effect, don't do anything. */ 140 if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3) 141 return; 142 143 unsigned num_L3_caches = util_cpu_caps.nr_cpus / 144 util_cpu_caps.cores_per_L3; 145 146 /* Get a semi-random number. */ 147 int64_t t = os_time_get_nano(); 148 unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches; 149 150 /* Tell the driver to pin its threads to the selected L3 cache. */ 151 if (ctx->set_context_param) { 152 ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, 153 cache); 154 } 155 156 /* Do the same for the upper level thread if there is any (e.g. glthread) */ 157 if (upper_thread) 158 util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3); 159} 160 161/* This is a helper for hardware bring-up. Don't remove. */ 162struct pipe_query * 163util_begin_pipestat_query(struct pipe_context *ctx) 164{ 165 struct pipe_query *q = 166 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0); 167 if (!q) 168 return NULL; 169 170 ctx->begin_query(ctx, q); 171 return q; 172} 173 174/* This is a helper for hardware bring-up. Don't remove. */ 175void 176util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q, 177 FILE *f) 178{ 179 static unsigned counter; 180 struct pipe_query_data_pipeline_statistics stats; 181 182 ctx->end_query(ctx, q); 183 ctx->get_query_result(ctx, q, true, (void*)&stats); 184 ctx->destroy_query(ctx, q); 185 186 fprintf(f, 187 "Draw call %u:\n" 188 " ia_vertices = %"PRIu64"\n" 189 " ia_primitives = %"PRIu64"\n" 190 " vs_invocations = %"PRIu64"\n" 191 " gs_invocations = %"PRIu64"\n" 192 " gs_primitives = %"PRIu64"\n" 193 " c_invocations = %"PRIu64"\n" 194 " c_primitives = %"PRIu64"\n" 195 " ps_invocations = %"PRIu64"\n" 196 " hs_invocations = %"PRIu64"\n" 197 " ds_invocations = %"PRIu64"\n" 198 " cs_invocations = %"PRIu64"\n", 199 p_atomic_inc_return(&counter), 200 stats.ia_vertices, 201 stats.ia_primitives, 202 stats.vs_invocations, 203 stats.gs_invocations, 204 stats.gs_primitives, 205 stats.c_invocations, 206 stats.c_primitives, 207 stats.ps_invocations, 208 stats.hs_invocations, 209 stats.ds_invocations, 210 stats.cs_invocations); 211} 212 213/* This is a helper for hardware bring-up. Don't remove. */ 214void 215util_wait_for_idle(struct pipe_context *ctx) 216{ 217 struct pipe_fence_handle *fence = NULL; 218 219 ctx->flush(ctx, &fence, 0); 220 ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE); 221} 222 223void 224util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage) 225{ 226 t->max_mem_usage = max_mem_usage; 227} 228 229void 230util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t) 231{ 232 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) 233 screen->fence_reference(screen, &t->ring[i].fence, NULL); 234} 235 236static uint64_t 237util_get_throttle_total_memory_usage(struct util_throttle *t) 238{ 239 uint64_t total_usage = 0; 240 241 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) 242 total_usage += t->ring[i].mem_usage; 243 return total_usage; 244} 245 246static void util_dump_throttle_ring(struct util_throttle *t) 247{ 248 printf("Throttle:\n"); 249 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) { 250 printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n", 251 i, t->ring[i].fence ? "yes" : " no", 252 t->ring[i].mem_usage, 253 t->flush_index == i ? " [flush]" : "", 254 t->wait_index == i ? " [wait]" : ""); 255 } 256} 257 258/** 259 * Notify util_throttle that the next operation allocates memory. 260 * util_throttle tracks memory usage and waits for fences until its tracked 261 * memory usage decreases. 262 * 263 * Example: 264 * util_throttle_memory_usage(..., w*h*d*Bpp); 265 * TexSubImage(..., w, h, d, ...); 266 * 267 * This means that TexSubImage can't allocate more memory its maximum limit 268 * set during initialization. 269 */ 270void 271util_throttle_memory_usage(struct pipe_context *pipe, 272 struct util_throttle *t, uint64_t memory_size) 273{ 274 (void)util_dump_throttle_ring; /* silence warning */ 275 276 if (!t->max_mem_usage) 277 return; 278 279 struct pipe_screen *screen = pipe->screen; 280 struct pipe_fence_handle **fence = NULL; 281 unsigned ring_size = ARRAY_SIZE(t->ring); 282 uint64_t total = util_get_throttle_total_memory_usage(t); 283 284 /* If there is not enough memory, walk the list of fences and find 285 * the latest one that we need to wait for. 286 */ 287 while (t->wait_index != t->flush_index && 288 total && total + memory_size > t->max_mem_usage) { 289 assert(t->ring[t->wait_index].fence); 290 291 /* Release an older fence if we need to wait for a newer one. */ 292 if (fence) 293 screen->fence_reference(screen, fence, NULL); 294 295 fence = &t->ring[t->wait_index].fence; 296 t->ring[t->wait_index].mem_usage = 0; 297 t->wait_index = (t->wait_index + 1) % ring_size; 298 299 total = util_get_throttle_total_memory_usage(t); 300 } 301 302 /* Wait for the fence to decrease memory usage. */ 303 if (fence) { 304 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE); 305 screen->fence_reference(screen, fence, NULL); 306 } 307 308 /* Flush and get a fence if we've exhausted memory usage for the current 309 * slot. 310 */ 311 if (t->ring[t->flush_index].mem_usage && 312 t->ring[t->flush_index].mem_usage + memory_size > 313 t->max_mem_usage / (ring_size / 2)) { 314 struct pipe_fence_handle **fence = 315 &t->ring[t->flush_index].fence; 316 317 /* Expect that the current flush slot doesn't have a fence yet. */ 318 assert(!*fence); 319 320 pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC); 321 t->flush_index = (t->flush_index + 1) % ring_size; 322 323 /* Vacate the next slot if it's occupied. This should be rare. */ 324 if (t->flush_index == t->wait_index) { 325 struct pipe_fence_handle **fence = 326 &t->ring[t->wait_index].fence; 327 328 t->ring[t->wait_index].mem_usage = 0; 329 t->wait_index = (t->wait_index + 1) % ring_size; 330 331 assert(*fence); 332 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE); 333 screen->fence_reference(screen, fence, NULL); 334 } 335 336 assert(!t->ring[t->flush_index].mem_usage); 337 assert(!t->ring[t->flush_index].fence); 338 } 339 340 t->ring[t->flush_index].mem_usage += memory_size; 341} 342