u_queue.h revision 8a1362ad
101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Advanced Micro Devices, Inc.
301e04c3fSmrg * All Rights Reserved.
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining
601e04c3fSmrg * a copy of this software and associated documentation files (the
701e04c3fSmrg * "Software"), to deal in the Software without restriction, including
801e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish,
901e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to
1001e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to
1101e04c3fSmrg * the following conditions:
1201e04c3fSmrg *
1301e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1401e04c3fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1501e04c3fSmrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1601e04c3fSmrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
1701e04c3fSmrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1801e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1901e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2001e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2101e04c3fSmrg *
2201e04c3fSmrg * The above copyright notice and this permission notice (including the
2301e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions
2401e04c3fSmrg * of the Software.
2501e04c3fSmrg */
2601e04c3fSmrg
2701e04c3fSmrg/* Job queue with execution in a separate thread.
2801e04c3fSmrg *
2901e04c3fSmrg * Jobs can be added from any thread. After that, the wait call can be used
3001e04c3fSmrg * to wait for completion of the job.
3101e04c3fSmrg */
3201e04c3fSmrg
3301e04c3fSmrg#ifndef U_QUEUE_H
3401e04c3fSmrg#define U_QUEUE_H
3501e04c3fSmrg
3601e04c3fSmrg#include <string.h>
3701e04c3fSmrg
3801e04c3fSmrg#include "util/futex.h"
3901e04c3fSmrg#include "util/list.h"
4001e04c3fSmrg#include "util/macros.h"
4101e04c3fSmrg#include "util/os_time.h"
4201e04c3fSmrg#include "util/u_atomic.h"
4301e04c3fSmrg#include "util/u_thread.h"
4401e04c3fSmrg
4501e04c3fSmrg#ifdef __cplusplus
4601e04c3fSmrgextern "C" {
4701e04c3fSmrg#endif
4801e04c3fSmrg
4901e04c3fSmrg#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
5001e04c3fSmrg#define UTIL_QUEUE_INIT_RESIZE_IF_FULL            (1 << 1)
5101e04c3fSmrg#define UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY  (1 << 2)
5201e04c3fSmrg
5301e04c3fSmrg#if defined(__GNUC__) && defined(HAVE_LINUX_FUTEX_H)
5401e04c3fSmrg#define UTIL_QUEUE_FENCE_FUTEX
5501e04c3fSmrg#else
5601e04c3fSmrg#define UTIL_QUEUE_FENCE_STANDARD
5701e04c3fSmrg#endif
5801e04c3fSmrg
5901e04c3fSmrg#ifdef UTIL_QUEUE_FENCE_FUTEX
6001e04c3fSmrg/* Job completion fence.
6101e04c3fSmrg * Put this into your job structure.
6201e04c3fSmrg */
6301e04c3fSmrgstruct util_queue_fence {
6401e04c3fSmrg   /* The fence can be in one of three states:
6501e04c3fSmrg    *  0 - signaled
6601e04c3fSmrg    *  1 - unsignaled
6701e04c3fSmrg    *  2 - unsignaled, may have waiters
6801e04c3fSmrg    */
6901e04c3fSmrg   uint32_t val;
7001e04c3fSmrg};
7101e04c3fSmrg
7201e04c3fSmrgstatic inline void
7301e04c3fSmrgutil_queue_fence_init(struct util_queue_fence *fence)
7401e04c3fSmrg{
7501e04c3fSmrg   fence->val = 0;
7601e04c3fSmrg}
7701e04c3fSmrg
7801e04c3fSmrgstatic inline void
7901e04c3fSmrgutil_queue_fence_destroy(struct util_queue_fence *fence)
8001e04c3fSmrg{
8101e04c3fSmrg   assert(fence->val == 0);
8201e04c3fSmrg   /* no-op */
8301e04c3fSmrg}
8401e04c3fSmrg
8501e04c3fSmrgstatic inline void
8601e04c3fSmrgutil_queue_fence_signal(struct util_queue_fence *fence)
8701e04c3fSmrg{
8801e04c3fSmrg   uint32_t val = p_atomic_xchg(&fence->val, 0);
8901e04c3fSmrg
9001e04c3fSmrg   assert(val != 0);
9101e04c3fSmrg
9201e04c3fSmrg   if (val == 2)
9301e04c3fSmrg      futex_wake(&fence->val, INT_MAX);
9401e04c3fSmrg}
9501e04c3fSmrg
9601e04c3fSmrg/**
9701e04c3fSmrg * Move \p fence back into unsignalled state.
9801e04c3fSmrg *
9901e04c3fSmrg * \warning The caller must ensure that no other thread may currently be
10001e04c3fSmrg *          waiting (or about to wait) on the fence.
10101e04c3fSmrg */
10201e04c3fSmrgstatic inline void
10301e04c3fSmrgutil_queue_fence_reset(struct util_queue_fence *fence)
10401e04c3fSmrg{
10501e04c3fSmrg#ifdef NDEBUG
10601e04c3fSmrg   fence->val = 1;
10701e04c3fSmrg#else
10801e04c3fSmrg   uint32_t v = p_atomic_xchg(&fence->val, 1);
10901e04c3fSmrg   assert(v == 0);
11001e04c3fSmrg#endif
11101e04c3fSmrg}
11201e04c3fSmrg
11301e04c3fSmrgstatic inline bool
11401e04c3fSmrgutil_queue_fence_is_signalled(struct util_queue_fence *fence)
11501e04c3fSmrg{
11601e04c3fSmrg   return fence->val == 0;
11701e04c3fSmrg}
11801e04c3fSmrg#endif
11901e04c3fSmrg
12001e04c3fSmrg#ifdef UTIL_QUEUE_FENCE_STANDARD
12101e04c3fSmrg/* Job completion fence.
12201e04c3fSmrg * Put this into your job structure.
12301e04c3fSmrg */
12401e04c3fSmrgstruct util_queue_fence {
12501e04c3fSmrg   mtx_t mutex;
12601e04c3fSmrg   cnd_t cond;
12701e04c3fSmrg   int signalled;
12801e04c3fSmrg};
12901e04c3fSmrg
13001e04c3fSmrgvoid util_queue_fence_init(struct util_queue_fence *fence);
13101e04c3fSmrgvoid util_queue_fence_destroy(struct util_queue_fence *fence);
13201e04c3fSmrgvoid util_queue_fence_signal(struct util_queue_fence *fence);
13301e04c3fSmrg
13401e04c3fSmrg/**
13501e04c3fSmrg * Move \p fence back into unsignalled state.
13601e04c3fSmrg *
13701e04c3fSmrg * \warning The caller must ensure that no other thread may currently be
13801e04c3fSmrg *          waiting (or about to wait) on the fence.
13901e04c3fSmrg */
14001e04c3fSmrgstatic inline void
14101e04c3fSmrgutil_queue_fence_reset(struct util_queue_fence *fence)
14201e04c3fSmrg{
14301e04c3fSmrg   assert(fence->signalled);
14401e04c3fSmrg   fence->signalled = 0;
14501e04c3fSmrg}
14601e04c3fSmrg
14701e04c3fSmrgstatic inline bool
14801e04c3fSmrgutil_queue_fence_is_signalled(struct util_queue_fence *fence)
14901e04c3fSmrg{
15001e04c3fSmrg   return fence->signalled != 0;
15101e04c3fSmrg}
15201e04c3fSmrg#endif
15301e04c3fSmrg
15401e04c3fSmrgvoid
15501e04c3fSmrg_util_queue_fence_wait(struct util_queue_fence *fence);
15601e04c3fSmrg
15701e04c3fSmrgstatic inline void
15801e04c3fSmrgutil_queue_fence_wait(struct util_queue_fence *fence)
15901e04c3fSmrg{
16001e04c3fSmrg   if (unlikely(!util_queue_fence_is_signalled(fence)))
16101e04c3fSmrg      _util_queue_fence_wait(fence);
16201e04c3fSmrg}
16301e04c3fSmrg
16401e04c3fSmrgbool
16501e04c3fSmrg_util_queue_fence_wait_timeout(struct util_queue_fence *fence,
16601e04c3fSmrg                               int64_t abs_timeout);
16701e04c3fSmrg
16801e04c3fSmrg/**
16901e04c3fSmrg * Wait for the fence to be signaled with a timeout.
17001e04c3fSmrg *
17101e04c3fSmrg * \param fence the fence
17201e04c3fSmrg * \param abs_timeout the absolute timeout in nanoseconds, relative to the
17301e04c3fSmrg *                    clock provided by os_time_get_nano.
17401e04c3fSmrg *
17501e04c3fSmrg * \return true if the fence was signaled, false if the timeout occurred.
17601e04c3fSmrg */
17701e04c3fSmrgstatic inline bool
17801e04c3fSmrgutil_queue_fence_wait_timeout(struct util_queue_fence *fence,
17901e04c3fSmrg                              int64_t abs_timeout)
18001e04c3fSmrg{
18101e04c3fSmrg   if (util_queue_fence_is_signalled(fence))
18201e04c3fSmrg      return true;
18301e04c3fSmrg
18401e04c3fSmrg   if (abs_timeout == (int64_t)OS_TIMEOUT_INFINITE) {
18501e04c3fSmrg      _util_queue_fence_wait(fence);
18601e04c3fSmrg      return true;
18701e04c3fSmrg   }
18801e04c3fSmrg
18901e04c3fSmrg   return _util_queue_fence_wait_timeout(fence, abs_timeout);
19001e04c3fSmrg}
19101e04c3fSmrg
19201e04c3fSmrgtypedef void (*util_queue_execute_func)(void *job, int thread_index);
19301e04c3fSmrg
19401e04c3fSmrgstruct util_queue_job {
19501e04c3fSmrg   void *job;
19601e04c3fSmrg   struct util_queue_fence *fence;
19701e04c3fSmrg   util_queue_execute_func execute;
19801e04c3fSmrg   util_queue_execute_func cleanup;
19901e04c3fSmrg};
20001e04c3fSmrg
20101e04c3fSmrg/* Put this into your context. */
20201e04c3fSmrgstruct util_queue {
20301e04c3fSmrg   char name[14]; /* 13 characters = the thread name without the index */
2048a1362adSmaya   mtx_t finish_lock; /* for util_queue_finish and protects threads/num_threads */
20501e04c3fSmrg   mtx_t lock;
20601e04c3fSmrg   cnd_t has_queued_cond;
20701e04c3fSmrg   cnd_t has_space_cond;
20801e04c3fSmrg   thrd_t *threads;
20901e04c3fSmrg   unsigned flags;
21001e04c3fSmrg   int num_queued;
2118a1362adSmaya   unsigned max_threads;
2128a1362adSmaya   unsigned num_threads; /* decreasing this number will terminate threads */
21301e04c3fSmrg   int max_jobs;
21401e04c3fSmrg   int write_idx, read_idx; /* ring buffer pointers */
21501e04c3fSmrg   struct util_queue_job *jobs;
21601e04c3fSmrg
21701e04c3fSmrg   /* for cleanup at exit(), protected by exit_mutex */
21801e04c3fSmrg   struct list_head head;
21901e04c3fSmrg};
22001e04c3fSmrg
22101e04c3fSmrgbool util_queue_init(struct util_queue *queue,
22201e04c3fSmrg                     const char *name,
22301e04c3fSmrg                     unsigned max_jobs,
22401e04c3fSmrg                     unsigned num_threads,
22501e04c3fSmrg                     unsigned flags);
22601e04c3fSmrgvoid util_queue_destroy(struct util_queue *queue);
22701e04c3fSmrg
22801e04c3fSmrg/* optional cleanup callback is called after fence is signaled: */
22901e04c3fSmrgvoid util_queue_add_job(struct util_queue *queue,
23001e04c3fSmrg                        void *job,
23101e04c3fSmrg                        struct util_queue_fence *fence,
23201e04c3fSmrg                        util_queue_execute_func execute,
23301e04c3fSmrg                        util_queue_execute_func cleanup);
23401e04c3fSmrgvoid util_queue_drop_job(struct util_queue *queue,
23501e04c3fSmrg                         struct util_queue_fence *fence);
23601e04c3fSmrg
23701e04c3fSmrgvoid util_queue_finish(struct util_queue *queue);
23801e04c3fSmrg
2398a1362adSmaya/* Adjust the number of active threads. The new number of threads can't be
2408a1362adSmaya * greater than the initial number of threads at the creation of the queue,
2418a1362adSmaya * and it can't be less than 1.
2428a1362adSmaya */
2438a1362adSmayavoid
2448a1362adSmayautil_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads);
2458a1362adSmaya
24601e04c3fSmrgint64_t util_queue_get_thread_time_nano(struct util_queue *queue,
24701e04c3fSmrg                                        unsigned thread_index);
24801e04c3fSmrg
24901e04c3fSmrg/* util_queue needs to be cleared to zeroes for this to work */
25001e04c3fSmrgstatic inline bool
25101e04c3fSmrgutil_queue_is_initialized(struct util_queue *queue)
25201e04c3fSmrg{
25301e04c3fSmrg   return queue->threads != NULL;
25401e04c3fSmrg}
25501e04c3fSmrg
25601e04c3fSmrg/* Convenient structure for monitoring the queue externally and passing
25701e04c3fSmrg * the structure between Mesa components. The queue doesn't use it directly.
25801e04c3fSmrg */
25901e04c3fSmrgstruct util_queue_monitoring
26001e04c3fSmrg{
26101e04c3fSmrg   /* For querying the thread busyness. */
26201e04c3fSmrg   struct util_queue *queue;
26301e04c3fSmrg
26401e04c3fSmrg   /* Counters updated by the user of the queue. */
26501e04c3fSmrg   unsigned num_offloaded_items;
26601e04c3fSmrg   unsigned num_direct_items;
26701e04c3fSmrg   unsigned num_syncs;
26801e04c3fSmrg};
26901e04c3fSmrg
27001e04c3fSmrg#ifdef __cplusplus
27101e04c3fSmrg}
27201e04c3fSmrg#endif
27301e04c3fSmrg
27401e04c3fSmrg#endif
275