1/*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef VC5_CL_H
25#define VC5_CL_H
26
27#include <stdint.h>
28
29#include "util/u_math.h"
30#include "util/macros.h"
31
32struct v3d_bo;
33struct v3d_job;
34struct v3d_cl;
35
36/**
37 * Undefined structure, used for typechecking that you're passing the pointers
38 * to these functions correctly.
39 */
40struct v3d_cl_out;
41
42/** A reference to a BO used in the CL packing functions */
43struct v3d_cl_reloc {
44        struct v3d_bo *bo;
45        uint32_t offset;
46};
47
48static inline void cl_pack_emit_reloc(struct v3d_cl *cl, const struct v3d_cl_reloc *);
49
50#define __gen_user_data struct v3d_cl
51#define __gen_address_type struct v3d_cl_reloc
52#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
53                                     (reloc)->offset)
54#define __gen_emit_reloc cl_pack_emit_reloc
55
56struct v3d_cl {
57        void *base;
58        struct v3d_job *job;
59        struct v3d_cl_out *next;
60        struct v3d_bo *bo;
61        uint32_t size;
62};
63
64void v3d_init_cl(struct v3d_job *job, struct v3d_cl *cl);
65void v3d_destroy_cl(struct v3d_cl *cl);
66void v3d_dump_cl(void *cl, uint32_t size, bool is_render);
67uint32_t v3d_gem_hindex(struct v3d_job *job, struct v3d_bo *bo);
68
69struct PACKED unaligned_16 { uint16_t x; };
70struct PACKED unaligned_32 { uint32_t x; };
71
72static inline uint32_t cl_offset(struct v3d_cl *cl)
73{
74        return (char *)cl->next - (char *)cl->base;
75}
76
77static inline struct v3d_cl_reloc cl_get_address(struct v3d_cl *cl)
78{
79        return (struct v3d_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) };
80}
81
82static inline void
83cl_advance(struct v3d_cl_out **cl, uint32_t n)
84{
85        (*cl) = (struct v3d_cl_out *)((char *)(*cl) + n);
86}
87
88static inline struct v3d_cl_out *
89cl_start(struct v3d_cl *cl)
90{
91        return cl->next;
92}
93
94static inline void
95cl_end(struct v3d_cl *cl, struct v3d_cl_out *next)
96{
97        cl->next = next;
98        assert(cl_offset(cl) <= cl->size);
99}
100
101
102static inline void
103put_unaligned_32(struct v3d_cl_out *ptr, uint32_t val)
104{
105        struct unaligned_32 *p = (void *)ptr;
106        p->x = val;
107}
108
109static inline void
110put_unaligned_16(struct v3d_cl_out *ptr, uint16_t val)
111{
112        struct unaligned_16 *p = (void *)ptr;
113        p->x = val;
114}
115
116static inline void
117cl_u8(struct v3d_cl_out **cl, uint8_t n)
118{
119        *(uint8_t *)(*cl) = n;
120        cl_advance(cl, 1);
121}
122
123static inline void
124cl_u16(struct v3d_cl_out **cl, uint16_t n)
125{
126        put_unaligned_16(*cl, n);
127        cl_advance(cl, 2);
128}
129
130static inline void
131cl_u32(struct v3d_cl_out **cl, uint32_t n)
132{
133        put_unaligned_32(*cl, n);
134        cl_advance(cl, 4);
135}
136
137static inline void
138cl_aligned_u32(struct v3d_cl_out **cl, uint32_t n)
139{
140        *(uint32_t *)(*cl) = n;
141        cl_advance(cl, 4);
142}
143
144static inline void
145cl_aligned_reloc(struct v3d_cl *cl,
146                 struct v3d_cl_out **cl_out,
147                 struct v3d_bo *bo, uint32_t offset)
148{
149        cl_aligned_u32(cl_out, bo->offset + offset);
150        v3d_job_add_bo(cl->job, bo);
151}
152
153static inline void
154cl_ptr(struct v3d_cl_out **cl, void *ptr)
155{
156        *(struct v3d_cl_out **)(*cl) = ptr;
157        cl_advance(cl, sizeof(void *));
158}
159
160static inline void
161cl_f(struct v3d_cl_out **cl, float f)
162{
163        cl_u32(cl, fui(f));
164}
165
166static inline void
167cl_aligned_f(struct v3d_cl_out **cl, float f)
168{
169        cl_aligned_u32(cl, fui(f));
170}
171
172/**
173 * Reference to a BO with its associated offset, used in the pack process.
174 */
175static inline struct v3d_cl_reloc
176cl_address(struct v3d_bo *bo, uint32_t offset)
177{
178        struct v3d_cl_reloc reloc = {
179                .bo = bo,
180                .offset = offset,
181        };
182        return reloc;
183}
184
185uint32_t v3d_cl_ensure_space(struct v3d_cl *cl, uint32_t size, uint32_t align);
186void v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t size);
187
188#define cl_packet_header(packet) V3DX(packet ## _header)
189#define cl_packet_length(packet) V3DX(packet ## _length)
190#define cl_packet_pack(packet)   V3DX(packet ## _pack)
191#define cl_packet_struct(packet) V3DX(packet)
192
193static inline void *
194cl_get_emit_space(struct v3d_cl_out **cl, size_t size)
195{
196        void *addr = *cl;
197        cl_advance(cl, size);
198        return addr;
199}
200
201/* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
202 * is created, which you get to set fields in of the form:
203 *
204 * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
205 *     .flags.flat_shade_flags = 1 << 2,
206 * }
207 *
208 * or default values only can be emitted with just:
209 *
210 * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
211 *
212 * The trick here is that we make a for loop that will execute the body
213 * (either the block or the ';' after the macro invocation) exactly once.
214 */
215#define cl_emit(cl, packet, name)                                \
216        for (struct cl_packet_struct(packet) name = {            \
217                cl_packet_header(packet)                         \
218        },                                                       \
219        *_loop_terminate = &name;                                \
220        __builtin_expect(_loop_terminate != NULL, 1);            \
221        ({                                                       \
222                struct v3d_cl_out *cl_out = cl_start(cl);        \
223                cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
224                cl_advance(&cl_out, cl_packet_length(packet));   \
225                cl_end(cl, cl_out);                              \
226                _loop_terminate = NULL;                          \
227        }))                                                      \
228
229#define cl_emit_with_prepacked(cl, packet, prepacked, name)      \
230        for (struct cl_packet_struct(packet) name = {            \
231                cl_packet_header(packet)                         \
232        },                                                       \
233        *_loop_terminate = &name;                                \
234        __builtin_expect(_loop_terminate != NULL, 1);            \
235        ({                                                       \
236                struct v3d_cl_out *cl_out = cl_start(cl);        \
237                uint8_t packed[cl_packet_length(packet)];         \
238                cl_packet_pack(packet)(cl, packed, &name);       \
239                for (int _i = 0; _i < cl_packet_length(packet); _i++) \
240                        ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \
241                cl_advance(&cl_out, cl_packet_length(packet));   \
242                cl_end(cl, cl_out);                              \
243                _loop_terminate = NULL;                          \
244        }))                                                      \
245
246#define cl_emit_prepacked_sized(cl, packet, size) do {                \
247        memcpy((cl)->next, packet, size);             \
248        cl_advance(&(cl)->next, size);                \
249} while (0)
250
251#define cl_emit_prepacked(cl, packet) \
252        cl_emit_prepacked_sized(cl, packet, sizeof(*(packet)))
253
254#define v3dx_pack(packed, packet, name)                          \
255        for (struct cl_packet_struct(packet) name = {            \
256                cl_packet_header(packet)                         \
257        },                                                       \
258        *_loop_terminate = &name;                                \
259        __builtin_expect(_loop_terminate != NULL, 1);            \
260        ({                                                       \
261                cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \
262                VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \
263                                                 cl_packet_length(packet))); \
264                _loop_terminate = NULL;                          \
265        }))                                                      \
266
267/**
268 * Helper function called by the XML-generated pack functions for filling in
269 * an address field in shader records.
270 *
271 * Since we have a private address space as of VC5, our BOs can have lifelong
272 * offsets, and all the kernel needs to know is which BOs need to be paged in
273 * for this exec.
274 */
275static inline void
276cl_pack_emit_reloc(struct v3d_cl *cl, const struct v3d_cl_reloc *reloc)
277{
278        if (reloc->bo)
279                v3d_job_add_bo(cl->job, reloc->bo);
280}
281
282#endif /* VC5_CL_H */
283