101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Advanced Micro Devices, Inc.
301e04c3fSmrg * All Rights Reserved.
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining
601e04c3fSmrg * a copy of this software and associated documentation files (the
701e04c3fSmrg * "Software"), to deal in the Software without restriction, including
801e04c3fSmrg * without limitation the rights to use, copy, modify, merge, publish,
901e04c3fSmrg * distribute, sub license, and/or sell copies of the Software, and to
1001e04c3fSmrg * permit persons to whom the Software is furnished to do so, subject to
1101e04c3fSmrg * the following conditions:
1201e04c3fSmrg *
1301e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1401e04c3fSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
1501e04c3fSmrg * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1601e04c3fSmrg * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
1701e04c3fSmrg * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1801e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1901e04c3fSmrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2001e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2101e04c3fSmrg *
2201e04c3fSmrg * The above copyright notice and this permission notice (including the
2301e04c3fSmrg * next paragraph) shall be included in all copies or substantial portions
2401e04c3fSmrg * of the Software.
2501e04c3fSmrg */
2601e04c3fSmrg
2701e04c3fSmrg/* Job queue with execution in a separate thread.
2801e04c3fSmrg *
2901e04c3fSmrg * Jobs can be added from any thread. After that, the wait call can be used
3001e04c3fSmrg * to wait for completion of the job.
3101e04c3fSmrg */
3201e04c3fSmrg
3301e04c3fSmrg#ifndef U_QUEUE_H
3401e04c3fSmrg#define U_QUEUE_H
3501e04c3fSmrg
3601e04c3fSmrg#include <string.h>
3701e04c3fSmrg
387ec681f3Smrg#include "simple_mtx.h"
3901e04c3fSmrg#include "util/futex.h"
4001e04c3fSmrg#include "util/list.h"
4101e04c3fSmrg#include "util/macros.h"
4201e04c3fSmrg#include "util/os_time.h"
4301e04c3fSmrg#include "util/u_atomic.h"
4401e04c3fSmrg#include "util/u_thread.h"
4501e04c3fSmrg
4601e04c3fSmrg#ifdef __cplusplus
4701e04c3fSmrgextern "C" {
4801e04c3fSmrg#endif
4901e04c3fSmrg
5001e04c3fSmrg#define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
5101e04c3fSmrg#define UTIL_QUEUE_INIT_RESIZE_IF_FULL            (1 << 1)
5201e04c3fSmrg#define UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY  (1 << 2)
537ec681f3Smrg#define UTIL_QUEUE_INIT_SCALE_THREADS             (1 << 3)
5401e04c3fSmrg
557ec681f3Smrg#if UTIL_FUTEX_SUPPORTED
5601e04c3fSmrg#define UTIL_QUEUE_FENCE_FUTEX
5701e04c3fSmrg#else
5801e04c3fSmrg#define UTIL_QUEUE_FENCE_STANDARD
5901e04c3fSmrg#endif
6001e04c3fSmrg
6101e04c3fSmrg#ifdef UTIL_QUEUE_FENCE_FUTEX
6201e04c3fSmrg/* Job completion fence.
6301e04c3fSmrg * Put this into your job structure.
6401e04c3fSmrg */
6501e04c3fSmrgstruct util_queue_fence {
6601e04c3fSmrg   /* The fence can be in one of three states:
6701e04c3fSmrg    *  0 - signaled
6801e04c3fSmrg    *  1 - unsignaled
6901e04c3fSmrg    *  2 - unsignaled, may have waiters
7001e04c3fSmrg    */
7101e04c3fSmrg   uint32_t val;
7201e04c3fSmrg};
7301e04c3fSmrg
7401e04c3fSmrgstatic inline void
7501e04c3fSmrgutil_queue_fence_init(struct util_queue_fence *fence)
7601e04c3fSmrg{
7701e04c3fSmrg   fence->val = 0;
7801e04c3fSmrg}
7901e04c3fSmrg
8001e04c3fSmrgstatic inline void
8101e04c3fSmrgutil_queue_fence_destroy(struct util_queue_fence *fence)
8201e04c3fSmrg{
837ec681f3Smrg   assert(p_atomic_read_relaxed(&fence->val) == 0);
8401e04c3fSmrg   /* no-op */
8501e04c3fSmrg}
8601e04c3fSmrg
8701e04c3fSmrgstatic inline void
8801e04c3fSmrgutil_queue_fence_signal(struct util_queue_fence *fence)
8901e04c3fSmrg{
9001e04c3fSmrg   uint32_t val = p_atomic_xchg(&fence->val, 0);
9101e04c3fSmrg
9201e04c3fSmrg   assert(val != 0);
9301e04c3fSmrg
9401e04c3fSmrg   if (val == 2)
9501e04c3fSmrg      futex_wake(&fence->val, INT_MAX);
9601e04c3fSmrg}
9701e04c3fSmrg
9801e04c3fSmrg/**
9901e04c3fSmrg * Move \p fence back into unsignalled state.
10001e04c3fSmrg *
10101e04c3fSmrg * \warning The caller must ensure that no other thread may currently be
10201e04c3fSmrg *          waiting (or about to wait) on the fence.
10301e04c3fSmrg */
10401e04c3fSmrgstatic inline void
10501e04c3fSmrgutil_queue_fence_reset(struct util_queue_fence *fence)
10601e04c3fSmrg{
10701e04c3fSmrg#ifdef NDEBUG
10801e04c3fSmrg   fence->val = 1;
10901e04c3fSmrg#else
11001e04c3fSmrg   uint32_t v = p_atomic_xchg(&fence->val, 1);
11101e04c3fSmrg   assert(v == 0);
11201e04c3fSmrg#endif
11301e04c3fSmrg}
11401e04c3fSmrg
11501e04c3fSmrgstatic inline bool
11601e04c3fSmrgutil_queue_fence_is_signalled(struct util_queue_fence *fence)
11701e04c3fSmrg{
1187ec681f3Smrg   return p_atomic_read_relaxed(&fence->val) == 0;
11901e04c3fSmrg}
12001e04c3fSmrg#endif
12101e04c3fSmrg
12201e04c3fSmrg#ifdef UTIL_QUEUE_FENCE_STANDARD
12301e04c3fSmrg/* Job completion fence.
12401e04c3fSmrg * Put this into your job structure.
12501e04c3fSmrg */
12601e04c3fSmrgstruct util_queue_fence {
12701e04c3fSmrg   mtx_t mutex;
12801e04c3fSmrg   cnd_t cond;
12901e04c3fSmrg   int signalled;
13001e04c3fSmrg};
13101e04c3fSmrg
13201e04c3fSmrgvoid util_queue_fence_init(struct util_queue_fence *fence);
13301e04c3fSmrgvoid util_queue_fence_destroy(struct util_queue_fence *fence);
13401e04c3fSmrgvoid util_queue_fence_signal(struct util_queue_fence *fence);
13501e04c3fSmrg
13601e04c3fSmrg/**
13701e04c3fSmrg * Move \p fence back into unsignalled state.
13801e04c3fSmrg *
13901e04c3fSmrg * \warning The caller must ensure that no other thread may currently be
14001e04c3fSmrg *          waiting (or about to wait) on the fence.
14101e04c3fSmrg */
14201e04c3fSmrgstatic inline void
14301e04c3fSmrgutil_queue_fence_reset(struct util_queue_fence *fence)
14401e04c3fSmrg{
14501e04c3fSmrg   assert(fence->signalled);
14601e04c3fSmrg   fence->signalled = 0;
14701e04c3fSmrg}
14801e04c3fSmrg
14901e04c3fSmrgstatic inline bool
15001e04c3fSmrgutil_queue_fence_is_signalled(struct util_queue_fence *fence)
15101e04c3fSmrg{
15201e04c3fSmrg   return fence->signalled != 0;
15301e04c3fSmrg}
15401e04c3fSmrg#endif
15501e04c3fSmrg
15601e04c3fSmrgvoid
15701e04c3fSmrg_util_queue_fence_wait(struct util_queue_fence *fence);
15801e04c3fSmrg
15901e04c3fSmrgstatic inline void
16001e04c3fSmrgutil_queue_fence_wait(struct util_queue_fence *fence)
16101e04c3fSmrg{
16201e04c3fSmrg   if (unlikely(!util_queue_fence_is_signalled(fence)))
16301e04c3fSmrg      _util_queue_fence_wait(fence);
16401e04c3fSmrg}
16501e04c3fSmrg
16601e04c3fSmrgbool
16701e04c3fSmrg_util_queue_fence_wait_timeout(struct util_queue_fence *fence,
16801e04c3fSmrg                               int64_t abs_timeout);
16901e04c3fSmrg
17001e04c3fSmrg/**
17101e04c3fSmrg * Wait for the fence to be signaled with a timeout.
17201e04c3fSmrg *
17301e04c3fSmrg * \param fence the fence
17401e04c3fSmrg * \param abs_timeout the absolute timeout in nanoseconds, relative to the
17501e04c3fSmrg *                    clock provided by os_time_get_nano.
17601e04c3fSmrg *
17701e04c3fSmrg * \return true if the fence was signaled, false if the timeout occurred.
17801e04c3fSmrg */
17901e04c3fSmrgstatic inline bool
18001e04c3fSmrgutil_queue_fence_wait_timeout(struct util_queue_fence *fence,
18101e04c3fSmrg                              int64_t abs_timeout)
18201e04c3fSmrg{
18301e04c3fSmrg   if (util_queue_fence_is_signalled(fence))
18401e04c3fSmrg      return true;
18501e04c3fSmrg
18601e04c3fSmrg   if (abs_timeout == (int64_t)OS_TIMEOUT_INFINITE) {
18701e04c3fSmrg      _util_queue_fence_wait(fence);
18801e04c3fSmrg      return true;
18901e04c3fSmrg   }
19001e04c3fSmrg
19101e04c3fSmrg   return _util_queue_fence_wait_timeout(fence, abs_timeout);
19201e04c3fSmrg}
19301e04c3fSmrg
1947ec681f3Smrgtypedef void (*util_queue_execute_func)(void *job, void *gdata, int thread_index);
19501e04c3fSmrg
19601e04c3fSmrgstruct util_queue_job {
19701e04c3fSmrg   void *job;
1987ec681f3Smrg   void *global_data;
1997ec681f3Smrg   size_t job_size;
20001e04c3fSmrg   struct util_queue_fence *fence;
20101e04c3fSmrg   util_queue_execute_func execute;
20201e04c3fSmrg   util_queue_execute_func cleanup;
20301e04c3fSmrg};
20401e04c3fSmrg
20501e04c3fSmrg/* Put this into your context. */
20601e04c3fSmrgstruct util_queue {
20701e04c3fSmrg   char name[14]; /* 13 characters = the thread name without the index */
2087ec681f3Smrg   simple_mtx_t finish_lock; /* for util_queue_finish and protects threads/num_threads */
20901e04c3fSmrg   mtx_t lock;
21001e04c3fSmrg   cnd_t has_queued_cond;
21101e04c3fSmrg   cnd_t has_space_cond;
21201e04c3fSmrg   thrd_t *threads;
21301e04c3fSmrg   unsigned flags;
21401e04c3fSmrg   int num_queued;
2158a1362adSmaya   unsigned max_threads;
2168a1362adSmaya   unsigned num_threads; /* decreasing this number will terminate threads */
21701e04c3fSmrg   int max_jobs;
21801e04c3fSmrg   int write_idx, read_idx; /* ring buffer pointers */
2197ec681f3Smrg   size_t total_jobs_size;  /* memory use of all jobs in the queue */
22001e04c3fSmrg   struct util_queue_job *jobs;
2217ec681f3Smrg   void *global_data;
22201e04c3fSmrg
22301e04c3fSmrg   /* for cleanup at exit(), protected by exit_mutex */
22401e04c3fSmrg   struct list_head head;
22501e04c3fSmrg};
22601e04c3fSmrg
22701e04c3fSmrgbool util_queue_init(struct util_queue *queue,
22801e04c3fSmrg                     const char *name,
22901e04c3fSmrg                     unsigned max_jobs,
23001e04c3fSmrg                     unsigned num_threads,
2317ec681f3Smrg                     unsigned flags,
2327ec681f3Smrg                     void *global_data);
23301e04c3fSmrgvoid util_queue_destroy(struct util_queue *queue);
23401e04c3fSmrg
23501e04c3fSmrg/* optional cleanup callback is called after fence is signaled: */
23601e04c3fSmrgvoid util_queue_add_job(struct util_queue *queue,
23701e04c3fSmrg                        void *job,
23801e04c3fSmrg                        struct util_queue_fence *fence,
23901e04c3fSmrg                        util_queue_execute_func execute,
2407ec681f3Smrg                        util_queue_execute_func cleanup,
2417ec681f3Smrg                        const size_t job_size);
24201e04c3fSmrgvoid util_queue_drop_job(struct util_queue *queue,
24301e04c3fSmrg                         struct util_queue_fence *fence);
24401e04c3fSmrg
24501e04c3fSmrgvoid util_queue_finish(struct util_queue *queue);
24601e04c3fSmrg
2478a1362adSmaya/* Adjust the number of active threads. The new number of threads can't be
2488a1362adSmaya * greater than the initial number of threads at the creation of the queue,
2498a1362adSmaya * and it can't be less than 1.
2508a1362adSmaya */
2518a1362adSmayavoid
2528a1362adSmayautil_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads);
2538a1362adSmaya
25401e04c3fSmrgint64_t util_queue_get_thread_time_nano(struct util_queue *queue,
25501e04c3fSmrg                                        unsigned thread_index);
25601e04c3fSmrg
25701e04c3fSmrg/* util_queue needs to be cleared to zeroes for this to work */
25801e04c3fSmrgstatic inline bool
25901e04c3fSmrgutil_queue_is_initialized(struct util_queue *queue)
26001e04c3fSmrg{
26101e04c3fSmrg   return queue->threads != NULL;
26201e04c3fSmrg}
26301e04c3fSmrg
26401e04c3fSmrg/* Convenient structure for monitoring the queue externally and passing
26501e04c3fSmrg * the structure between Mesa components. The queue doesn't use it directly.
26601e04c3fSmrg */
26701e04c3fSmrgstruct util_queue_monitoring
26801e04c3fSmrg{
26901e04c3fSmrg   /* For querying the thread busyness. */
27001e04c3fSmrg   struct util_queue *queue;
27101e04c3fSmrg
27201e04c3fSmrg   /* Counters updated by the user of the queue. */
27301e04c3fSmrg   unsigned num_offloaded_items;
27401e04c3fSmrg   unsigned num_direct_items;
27501e04c3fSmrg   unsigned num_syncs;
27601e04c3fSmrg};
27701e04c3fSmrg
27801e04c3fSmrg#ifdef __cplusplus
27901e04c3fSmrg}
28001e04c3fSmrg#endif
28101e04c3fSmrg
28201e04c3fSmrg#endif
283