101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Advanced Micro Devices, Inc. 301e04c3fSmrg * All Rights Reserved. 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining 601e04c3fSmrg * a copy of this software and associated documentation files (the 701e04c3fSmrg * "Software"), to deal in the Software without restriction, including 801e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish, 901e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to 1001e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to 1101e04c3fSmrg * the following conditions: 1201e04c3fSmrg * 1301e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 1401e04c3fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 1501e04c3fSmrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 1601e04c3fSmrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 1701e04c3fSmrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1801e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 1901e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2001e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2101e04c3fSmrg * 2201e04c3fSmrg * The above copyright notice and this permission notice (including the 2301e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions 2401e04c3fSmrg * of the Software. 2501e04c3fSmrg */ 2601e04c3fSmrg 2701e04c3fSmrg/* Job queue with execution in a separate thread. 2801e04c3fSmrg * 2901e04c3fSmrg * Jobs can be added from any thread. After that, the wait call can be used 3001e04c3fSmrg * to wait for completion of the job. 3101e04c3fSmrg */ 3201e04c3fSmrg 3301e04c3fSmrg#ifndef U_QUEUE_H 3401e04c3fSmrg#define U_QUEUE_H 3501e04c3fSmrg 3601e04c3fSmrg#include <string.h> 3701e04c3fSmrg 387ec681f3Smrg#include "simple_mtx.h" 3901e04c3fSmrg#include "util/futex.h" 4001e04c3fSmrg#include "util/list.h" 4101e04c3fSmrg#include "util/macros.h" 4201e04c3fSmrg#include "util/os_time.h" 4301e04c3fSmrg#include "util/u_atomic.h" 4401e04c3fSmrg#include "util/u_thread.h" 4501e04c3fSmrg 4601e04c3fSmrg#ifdef __cplusplus 4701e04c3fSmrgextern "C" { 4801e04c3fSmrg#endif 4901e04c3fSmrg 5001e04c3fSmrg#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY (1 << 0) 5101e04c3fSmrg#define UTIL_QUEUE_INIT_RESIZE_IF_FULL (1 << 1) 5201e04c3fSmrg#define UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY (1 << 2) 537ec681f3Smrg#define UTIL_QUEUE_INIT_SCALE_THREADS (1 << 3) 5401e04c3fSmrg 557ec681f3Smrg#if UTIL_FUTEX_SUPPORTED 5601e04c3fSmrg#define UTIL_QUEUE_FENCE_FUTEX 5701e04c3fSmrg#else 5801e04c3fSmrg#define UTIL_QUEUE_FENCE_STANDARD 5901e04c3fSmrg#endif 6001e04c3fSmrg 6101e04c3fSmrg#ifdef UTIL_QUEUE_FENCE_FUTEX 6201e04c3fSmrg/* Job completion fence. 6301e04c3fSmrg * Put this into your job structure. 6401e04c3fSmrg */ 6501e04c3fSmrgstruct util_queue_fence { 6601e04c3fSmrg /* The fence can be in one of three states: 6701e04c3fSmrg * 0 - signaled 6801e04c3fSmrg * 1 - unsignaled 6901e04c3fSmrg * 2 - unsignaled, may have waiters 7001e04c3fSmrg */ 7101e04c3fSmrg uint32_t val; 7201e04c3fSmrg}; 7301e04c3fSmrg 7401e04c3fSmrgstatic inline void 7501e04c3fSmrgutil_queue_fence_init(struct util_queue_fence *fence) 7601e04c3fSmrg{ 7701e04c3fSmrg fence->val = 0; 7801e04c3fSmrg} 7901e04c3fSmrg 8001e04c3fSmrgstatic inline void 8101e04c3fSmrgutil_queue_fence_destroy(struct util_queue_fence *fence) 8201e04c3fSmrg{ 837ec681f3Smrg assert(p_atomic_read_relaxed(&fence->val) == 0); 8401e04c3fSmrg /* no-op */ 8501e04c3fSmrg} 8601e04c3fSmrg 8701e04c3fSmrgstatic inline void 8801e04c3fSmrgutil_queue_fence_signal(struct util_queue_fence *fence) 8901e04c3fSmrg{ 9001e04c3fSmrg uint32_t val = p_atomic_xchg(&fence->val, 0); 9101e04c3fSmrg 9201e04c3fSmrg assert(val != 0); 9301e04c3fSmrg 9401e04c3fSmrg if (val == 2) 9501e04c3fSmrg futex_wake(&fence->val, INT_MAX); 9601e04c3fSmrg} 9701e04c3fSmrg 9801e04c3fSmrg/** 9901e04c3fSmrg * Move \p fence back into unsignalled state. 10001e04c3fSmrg * 10101e04c3fSmrg * \warning The caller must ensure that no other thread may currently be 10201e04c3fSmrg * waiting (or about to wait) on the fence. 10301e04c3fSmrg */ 10401e04c3fSmrgstatic inline void 10501e04c3fSmrgutil_queue_fence_reset(struct util_queue_fence *fence) 10601e04c3fSmrg{ 10701e04c3fSmrg#ifdef NDEBUG 10801e04c3fSmrg fence->val = 1; 10901e04c3fSmrg#else 11001e04c3fSmrg uint32_t v = p_atomic_xchg(&fence->val, 1); 11101e04c3fSmrg assert(v == 0); 11201e04c3fSmrg#endif 11301e04c3fSmrg} 11401e04c3fSmrg 11501e04c3fSmrgstatic inline bool 11601e04c3fSmrgutil_queue_fence_is_signalled(struct util_queue_fence *fence) 11701e04c3fSmrg{ 1187ec681f3Smrg return p_atomic_read_relaxed(&fence->val) == 0; 11901e04c3fSmrg} 12001e04c3fSmrg#endif 12101e04c3fSmrg 12201e04c3fSmrg#ifdef UTIL_QUEUE_FENCE_STANDARD 12301e04c3fSmrg/* Job completion fence. 12401e04c3fSmrg * Put this into your job structure. 12501e04c3fSmrg */ 12601e04c3fSmrgstruct util_queue_fence { 12701e04c3fSmrg mtx_t mutex; 12801e04c3fSmrg cnd_t cond; 12901e04c3fSmrg int signalled; 13001e04c3fSmrg}; 13101e04c3fSmrg 13201e04c3fSmrgvoid util_queue_fence_init(struct util_queue_fence *fence); 13301e04c3fSmrgvoid util_queue_fence_destroy(struct util_queue_fence *fence); 13401e04c3fSmrgvoid util_queue_fence_signal(struct util_queue_fence *fence); 13501e04c3fSmrg 13601e04c3fSmrg/** 13701e04c3fSmrg * Move \p fence back into unsignalled state. 13801e04c3fSmrg * 13901e04c3fSmrg * \warning The caller must ensure that no other thread may currently be 14001e04c3fSmrg * waiting (or about to wait) on the fence. 14101e04c3fSmrg */ 14201e04c3fSmrgstatic inline void 14301e04c3fSmrgutil_queue_fence_reset(struct util_queue_fence *fence) 14401e04c3fSmrg{ 14501e04c3fSmrg assert(fence->signalled); 14601e04c3fSmrg fence->signalled = 0; 14701e04c3fSmrg} 14801e04c3fSmrg 14901e04c3fSmrgstatic inline bool 15001e04c3fSmrgutil_queue_fence_is_signalled(struct util_queue_fence *fence) 15101e04c3fSmrg{ 15201e04c3fSmrg return fence->signalled != 0; 15301e04c3fSmrg} 15401e04c3fSmrg#endif 15501e04c3fSmrg 15601e04c3fSmrgvoid 15701e04c3fSmrg_util_queue_fence_wait(struct util_queue_fence *fence); 15801e04c3fSmrg 15901e04c3fSmrgstatic inline void 16001e04c3fSmrgutil_queue_fence_wait(struct util_queue_fence *fence) 16101e04c3fSmrg{ 16201e04c3fSmrg if (unlikely(!util_queue_fence_is_signalled(fence))) 16301e04c3fSmrg _util_queue_fence_wait(fence); 16401e04c3fSmrg} 16501e04c3fSmrg 16601e04c3fSmrgbool 16701e04c3fSmrg_util_queue_fence_wait_timeout(struct util_queue_fence *fence, 16801e04c3fSmrg int64_t abs_timeout); 16901e04c3fSmrg 17001e04c3fSmrg/** 17101e04c3fSmrg * Wait for the fence to be signaled with a timeout. 17201e04c3fSmrg * 17301e04c3fSmrg * \param fence the fence 17401e04c3fSmrg * \param abs_timeout the absolute timeout in nanoseconds, relative to the 17501e04c3fSmrg * clock provided by os_time_get_nano. 17601e04c3fSmrg * 17701e04c3fSmrg * \return true if the fence was signaled, false if the timeout occurred. 17801e04c3fSmrg */ 17901e04c3fSmrgstatic inline bool 18001e04c3fSmrgutil_queue_fence_wait_timeout(struct util_queue_fence *fence, 18101e04c3fSmrg int64_t abs_timeout) 18201e04c3fSmrg{ 18301e04c3fSmrg if (util_queue_fence_is_signalled(fence)) 18401e04c3fSmrg return true; 18501e04c3fSmrg 18601e04c3fSmrg if (abs_timeout == (int64_t)OS_TIMEOUT_INFINITE) { 18701e04c3fSmrg _util_queue_fence_wait(fence); 18801e04c3fSmrg return true; 18901e04c3fSmrg } 19001e04c3fSmrg 19101e04c3fSmrg return _util_queue_fence_wait_timeout(fence, abs_timeout); 19201e04c3fSmrg} 19301e04c3fSmrg 1947ec681f3Smrgtypedef void (*util_queue_execute_func)(void *job, void *gdata, int thread_index); 19501e04c3fSmrg 19601e04c3fSmrgstruct util_queue_job { 19701e04c3fSmrg void *job; 1987ec681f3Smrg void *global_data; 1997ec681f3Smrg size_t job_size; 20001e04c3fSmrg struct util_queue_fence *fence; 20101e04c3fSmrg util_queue_execute_func execute; 20201e04c3fSmrg util_queue_execute_func cleanup; 20301e04c3fSmrg}; 20401e04c3fSmrg 20501e04c3fSmrg/* Put this into your context. */ 20601e04c3fSmrgstruct util_queue { 20701e04c3fSmrg char name[14]; /* 13 characters = the thread name without the index */ 2087ec681f3Smrg simple_mtx_t finish_lock; /* for util_queue_finish and protects threads/num_threads */ 20901e04c3fSmrg mtx_t lock; 21001e04c3fSmrg cnd_t has_queued_cond; 21101e04c3fSmrg cnd_t has_space_cond; 21201e04c3fSmrg thrd_t *threads; 21301e04c3fSmrg unsigned flags; 21401e04c3fSmrg int num_queued; 2158a1362adSmaya unsigned max_threads; 2168a1362adSmaya unsigned num_threads; /* decreasing this number will terminate threads */ 21701e04c3fSmrg int max_jobs; 21801e04c3fSmrg int write_idx, read_idx; /* ring buffer pointers */ 2197ec681f3Smrg size_t total_jobs_size; /* memory use of all jobs in the queue */ 22001e04c3fSmrg struct util_queue_job *jobs; 2217ec681f3Smrg void *global_data; 22201e04c3fSmrg 22301e04c3fSmrg /* for cleanup at exit(), protected by exit_mutex */ 22401e04c3fSmrg struct list_head head; 22501e04c3fSmrg}; 22601e04c3fSmrg 22701e04c3fSmrgbool util_queue_init(struct util_queue *queue, 22801e04c3fSmrg const char *name, 22901e04c3fSmrg unsigned max_jobs, 23001e04c3fSmrg unsigned num_threads, 2317ec681f3Smrg unsigned flags, 2327ec681f3Smrg void *global_data); 23301e04c3fSmrgvoid util_queue_destroy(struct util_queue *queue); 23401e04c3fSmrg 23501e04c3fSmrg/* optional cleanup callback is called after fence is signaled: */ 23601e04c3fSmrgvoid util_queue_add_job(struct util_queue *queue, 23701e04c3fSmrg void *job, 23801e04c3fSmrg struct util_queue_fence *fence, 23901e04c3fSmrg util_queue_execute_func execute, 2407ec681f3Smrg util_queue_execute_func cleanup, 2417ec681f3Smrg const size_t job_size); 24201e04c3fSmrgvoid util_queue_drop_job(struct util_queue *queue, 24301e04c3fSmrg struct util_queue_fence *fence); 24401e04c3fSmrg 24501e04c3fSmrgvoid util_queue_finish(struct util_queue *queue); 24601e04c3fSmrg 2478a1362adSmaya/* Adjust the number of active threads. The new number of threads can't be 2488a1362adSmaya * greater than the initial number of threads at the creation of the queue, 2498a1362adSmaya * and it can't be less than 1. 2508a1362adSmaya */ 2518a1362adSmayavoid 2528a1362adSmayautil_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads); 2538a1362adSmaya 25401e04c3fSmrgint64_t util_queue_get_thread_time_nano(struct util_queue *queue, 25501e04c3fSmrg unsigned thread_index); 25601e04c3fSmrg 25701e04c3fSmrg/* util_queue needs to be cleared to zeroes for this to work */ 25801e04c3fSmrgstatic inline bool 25901e04c3fSmrgutil_queue_is_initialized(struct util_queue *queue) 26001e04c3fSmrg{ 26101e04c3fSmrg return queue->threads != NULL; 26201e04c3fSmrg} 26301e04c3fSmrg 26401e04c3fSmrg/* Convenient structure for monitoring the queue externally and passing 26501e04c3fSmrg * the structure between Mesa components. The queue doesn't use it directly. 26601e04c3fSmrg */ 26701e04c3fSmrgstruct util_queue_monitoring 26801e04c3fSmrg{ 26901e04c3fSmrg /* For querying the thread busyness. */ 27001e04c3fSmrg struct util_queue *queue; 27101e04c3fSmrg 27201e04c3fSmrg /* Counters updated by the user of the queue. */ 27301e04c3fSmrg unsigned num_offloaded_items; 27401e04c3fSmrg unsigned num_direct_items; 27501e04c3fSmrg unsigned num_syncs; 27601e04c3fSmrg}; 27701e04c3fSmrg 27801e04c3fSmrg#ifdef __cplusplus 27901e04c3fSmrg} 28001e04c3fSmrg#endif 28101e04c3fSmrg 28201e04c3fSmrg#endif 283