1/*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27
28#ifndef AMDGPU_CS_H
29#define AMDGPU_CS_H
30
31#include "amdgpu_bo.h"
32#include "util/u_memory.h"
33#include "drm-uapi/amdgpu_drm.h"
34
35/* Smaller submits means the GPU gets busy sooner and there is less
36 * waiting for buffers and fences. Proof:
37 *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
38 */
39#define IB_MAX_SUBMIT_DWORDS (20 * 1024)
40
41struct amdgpu_ctx {
42   struct amdgpu_winsys *ws;
43   amdgpu_context_handle ctx;
44   amdgpu_bo_handle user_fence_bo;
45   uint64_t *user_fence_cpu_address_base;
46   int refcount;
47   unsigned initial_num_total_rejected_cs;
48   unsigned num_rejected_cs;
49};
50
51struct amdgpu_cs_buffer {
52   struct amdgpu_winsys_bo *bo;
53   union {
54      struct {
55         uint32_t priority_usage;
56      } real;
57      struct {
58         uint32_t real_idx; /* index of underlying real BO */
59      } slab;
60   } u;
61   enum radeon_bo_usage usage;
62};
63
64enum ib_type {
65   IB_PREAMBLE,
66   IB_MAIN,
67   IB_NUM,
68};
69
70struct amdgpu_ib {
71   struct radeon_cmdbuf *rcs; /* pointer to the driver-owned data */
72
73   /* A buffer out of which new IBs are allocated. */
74   struct pb_buffer        *big_ib_buffer;
75   uint8_t                 *ib_mapped;
76   unsigned                used_ib_space;
77
78   /* The maximum seen size from cs_check_space. If the driver does
79    * cs_check_space and flush, the newly allocated IB should have at least
80    * this size.
81    */
82   unsigned                max_check_space_size;
83
84   unsigned                max_ib_size;
85   uint32_t                *ptr_ib_size;
86   bool                    ptr_ib_size_inside_ib;
87   enum ib_type            ib_type;
88};
89
90struct amdgpu_fence_list {
91   struct pipe_fence_handle    **list;
92   unsigned                    num;
93   unsigned                    max;
94};
95
96struct amdgpu_cs_context {
97   struct drm_amdgpu_cs_chunk_ib ib[IB_NUM];
98   uint32_t                    *ib_main_addr; /* the beginning of IB before chaining */
99
100   /* Buffers. */
101   unsigned                    max_real_buffers;
102   unsigned                    num_real_buffers;
103   struct amdgpu_cs_buffer     *real_buffers;
104
105   unsigned                    num_slab_buffers;
106   unsigned                    max_slab_buffers;
107   struct amdgpu_cs_buffer     *slab_buffers;
108
109   unsigned                    num_sparse_buffers;
110   unsigned                    max_sparse_buffers;
111   struct amdgpu_cs_buffer     *sparse_buffers;
112
113   int16_t                     *buffer_indices_hashlist;
114
115   struct amdgpu_winsys_bo     *last_added_bo;
116   unsigned                    last_added_bo_index;
117   unsigned                    last_added_bo_usage;
118   uint32_t                    last_added_bo_priority_usage;
119
120   struct amdgpu_fence_list    fence_dependencies;
121   struct amdgpu_fence_list    syncobj_dependencies;
122   struct amdgpu_fence_list    syncobj_to_signal;
123
124   struct pipe_fence_handle    *fence;
125
126   /* the error returned from cs_flush for non-async submissions */
127   int                         error_code;
128
129   /* TMZ: will this command be submitted using the TMZ flag */
130   bool secure;
131};
132
133#define BUFFER_HASHLIST_SIZE 4096
134
135struct amdgpu_cs {
136   struct amdgpu_ib main; /* must be first because this is inherited */
137   struct amdgpu_winsys *ws;
138   struct amdgpu_ctx *ctx;
139   enum ring_type ring_type;
140   struct drm_amdgpu_cs_chunk_fence fence_chunk;
141
142   /* We flip between these two CS. While one is being consumed
143    * by the kernel in another thread, the other one is being filled
144    * by the pipe driver. */
145   struct amdgpu_cs_context csc1;
146   struct amdgpu_cs_context csc2;
147   /* The currently-used CS. */
148   struct amdgpu_cs_context *csc;
149   /* The CS being currently-owned by the other thread. */
150   struct amdgpu_cs_context *cst;
151   /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
152    * isn't part of any buffer lists or the index where the bo could be found.
153    * Since 1) hash collisions of 2 different bo can happen and 2) we use a
154    * single hashlist for the 3 buffer list, this is only a hint.
155    * amdgpu_lookup_buffer uses this hint to speed up buffers look up.
156    */
157   int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];
158
159   /* Flush CS. */
160   void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
161   void *flush_data;
162   bool stop_exec_on_failure;
163   bool noop;
164   bool has_chaining;
165
166   struct util_queue_fence flush_completed;
167   struct pipe_fence_handle *next_fence;
168   struct pb_buffer *preamble_ib_bo;
169};
170
171struct amdgpu_fence {
172   struct pipe_reference reference;
173   /* If ctx == NULL, this fence is syncobj-based. */
174   uint32_t syncobj;
175
176   struct amdgpu_winsys *ws;
177   struct amdgpu_ctx *ctx;  /* submission context */
178   struct amdgpu_cs_fence fence;
179   uint64_t *user_fence_cpu_address;
180
181   /* If the fence has been submitted. This is unsignalled for deferred fences
182    * (cs->next_fence) and while an IB is still being submitted in the submit
183    * thread. */
184   struct util_queue_fence submitted;
185
186   volatile int signalled;              /* bool (int for atomicity) */
187};
188
189static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
190{
191   return fence->ctx == NULL;
192}
193
194static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
195{
196   if (p_atomic_dec_zero(&ctx->refcount)) {
197      amdgpu_cs_ctx_free(ctx->ctx);
198      amdgpu_bo_free(ctx->user_fence_bo);
199      FREE(ctx);
200   }
201}
202
203static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
204                                          struct pipe_fence_handle *src)
205{
206   struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
207   struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;
208
209   if (pipe_reference(&(*adst)->reference, &asrc->reference)) {
210      struct amdgpu_fence *fence = *adst;
211
212      if (amdgpu_fence_is_syncobj(fence))
213         amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
214      else
215         amdgpu_ctx_unref(fence->ctx);
216
217      util_queue_fence_destroy(&fence->submitted);
218      FREE(fence);
219   }
220   *adst = asrc;
221}
222
223int amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
224
225static inline struct amdgpu_cs *
226amdgpu_cs(struct radeon_cmdbuf *rcs)
227{
228   struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv;
229   assert(!cs || cs->main.ib_type == IB_MAIN);
230   return cs;
231}
232
233#define get_container(member_ptr, container_type, container_member) \
234   (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member))
235
236static inline bool
237amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
238                              struct amdgpu_winsys_bo *bo)
239{
240   return amdgpu_lookup_buffer_any_type(cs->csc, bo) != -1;
241}
242
243static inline bool
244amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
245                                         struct amdgpu_winsys_bo *bo,
246                                         enum radeon_bo_usage usage)
247{
248   int index;
249   struct amdgpu_cs_buffer *buffer;
250
251   index = amdgpu_lookup_buffer_any_type(cs->csc, bo);
252   if (index == -1)
253      return false;
254
255   buffer = bo->bo ? &cs->csc->real_buffers[index] :
256            bo->base.usage & RADEON_FLAG_SPARSE ? &cs->csc->sparse_buffers[index] :
257            &cs->csc->slab_buffers[index];
258
259   return (buffer->usage & usage) != 0;
260}
261
262bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
263                       bool absolute);
264void amdgpu_add_fences(struct amdgpu_winsys_bo *bo,
265                       unsigned num_fences,
266                       struct pipe_fence_handle **fences);
267void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs);
268void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws);
269
270#endif
271