1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2014-2017 Broadcom
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#ifndef VC5_CL_H
25b8e80941Smrg#define VC5_CL_H
26b8e80941Smrg
27b8e80941Smrg#include <stdint.h>
28b8e80941Smrg
29b8e80941Smrg#include "util/u_math.h"
30b8e80941Smrg#include "util/macros.h"
31b8e80941Smrg
32b8e80941Smrgstruct v3d_bo;
33b8e80941Smrgstruct v3d_job;
34b8e80941Smrgstruct v3d_cl;
35b8e80941Smrg
36b8e80941Smrg/**
37b8e80941Smrg * Undefined structure, used for typechecking that you're passing the pointers
38b8e80941Smrg * to these functions correctly.
39b8e80941Smrg */
40b8e80941Smrgstruct v3d_cl_out;
41b8e80941Smrg
42b8e80941Smrg/** A reference to a BO used in the CL packing functions */
43b8e80941Smrgstruct v3d_cl_reloc {
44b8e80941Smrg        struct v3d_bo *bo;
45b8e80941Smrg        uint32_t offset;
46b8e80941Smrg};
47b8e80941Smrg
48b8e80941Smrgstatic inline void cl_pack_emit_reloc(struct v3d_cl *cl, const struct v3d_cl_reloc *);
49b8e80941Smrg
50b8e80941Smrg#define __gen_user_data struct v3d_cl
51b8e80941Smrg#define __gen_address_type struct v3d_cl_reloc
52b8e80941Smrg#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
53b8e80941Smrg                                     (reloc)->offset)
54b8e80941Smrg#define __gen_emit_reloc cl_pack_emit_reloc
55b8e80941Smrg
56b8e80941Smrgstruct v3d_cl {
57b8e80941Smrg        void *base;
58b8e80941Smrg        struct v3d_job *job;
59b8e80941Smrg        struct v3d_cl_out *next;
60b8e80941Smrg        struct v3d_bo *bo;
61b8e80941Smrg        uint32_t size;
62b8e80941Smrg};
63b8e80941Smrg
64b8e80941Smrgvoid v3d_init_cl(struct v3d_job *job, struct v3d_cl *cl);
65b8e80941Smrgvoid v3d_destroy_cl(struct v3d_cl *cl);
66b8e80941Smrgvoid v3d_dump_cl(void *cl, uint32_t size, bool is_render);
67b8e80941Smrguint32_t v3d_gem_hindex(struct v3d_job *job, struct v3d_bo *bo);
68b8e80941Smrg
69b8e80941Smrgstruct PACKED unaligned_16 { uint16_t x; };
70b8e80941Smrgstruct PACKED unaligned_32 { uint32_t x; };
71b8e80941Smrg
72b8e80941Smrgstatic inline uint32_t cl_offset(struct v3d_cl *cl)
73b8e80941Smrg{
74b8e80941Smrg        return (char *)cl->next - (char *)cl->base;
75b8e80941Smrg}
76b8e80941Smrg
77b8e80941Smrgstatic inline struct v3d_cl_reloc cl_get_address(struct v3d_cl *cl)
78b8e80941Smrg{
79b8e80941Smrg        return (struct v3d_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) };
80b8e80941Smrg}
81b8e80941Smrg
82b8e80941Smrgstatic inline void
83b8e80941Smrgcl_advance(struct v3d_cl_out **cl, uint32_t n)
84b8e80941Smrg{
85b8e80941Smrg        (*cl) = (struct v3d_cl_out *)((char *)(*cl) + n);
86b8e80941Smrg}
87b8e80941Smrg
88b8e80941Smrgstatic inline struct v3d_cl_out *
89b8e80941Smrgcl_start(struct v3d_cl *cl)
90b8e80941Smrg{
91b8e80941Smrg        return cl->next;
92b8e80941Smrg}
93b8e80941Smrg
94b8e80941Smrgstatic inline void
95b8e80941Smrgcl_end(struct v3d_cl *cl, struct v3d_cl_out *next)
96b8e80941Smrg{
97b8e80941Smrg        cl->next = next;
98b8e80941Smrg        assert(cl_offset(cl) <= cl->size);
99b8e80941Smrg}
100b8e80941Smrg
101b8e80941Smrg
102b8e80941Smrgstatic inline void
103b8e80941Smrgput_unaligned_32(struct v3d_cl_out *ptr, uint32_t val)
104b8e80941Smrg{
105b8e80941Smrg        struct unaligned_32 *p = (void *)ptr;
106b8e80941Smrg        p->x = val;
107b8e80941Smrg}
108b8e80941Smrg
109b8e80941Smrgstatic inline void
110b8e80941Smrgput_unaligned_16(struct v3d_cl_out *ptr, uint16_t val)
111b8e80941Smrg{
112b8e80941Smrg        struct unaligned_16 *p = (void *)ptr;
113b8e80941Smrg        p->x = val;
114b8e80941Smrg}
115b8e80941Smrg
116b8e80941Smrgstatic inline void
117b8e80941Smrgcl_u8(struct v3d_cl_out **cl, uint8_t n)
118b8e80941Smrg{
119b8e80941Smrg        *(uint8_t *)(*cl) = n;
120b8e80941Smrg        cl_advance(cl, 1);
121b8e80941Smrg}
122b8e80941Smrg
123b8e80941Smrgstatic inline void
124b8e80941Smrgcl_u16(struct v3d_cl_out **cl, uint16_t n)
125b8e80941Smrg{
126b8e80941Smrg        put_unaligned_16(*cl, n);
127b8e80941Smrg        cl_advance(cl, 2);
128b8e80941Smrg}
129b8e80941Smrg
130b8e80941Smrgstatic inline void
131b8e80941Smrgcl_u32(struct v3d_cl_out **cl, uint32_t n)
132b8e80941Smrg{
133b8e80941Smrg        put_unaligned_32(*cl, n);
134b8e80941Smrg        cl_advance(cl, 4);
135b8e80941Smrg}
136b8e80941Smrg
137b8e80941Smrgstatic inline void
138b8e80941Smrgcl_aligned_u32(struct v3d_cl_out **cl, uint32_t n)
139b8e80941Smrg{
140b8e80941Smrg        *(uint32_t *)(*cl) = n;
141b8e80941Smrg        cl_advance(cl, 4);
142b8e80941Smrg}
143b8e80941Smrg
144b8e80941Smrgstatic inline void
145b8e80941Smrgcl_aligned_reloc(struct v3d_cl *cl,
146b8e80941Smrg                 struct v3d_cl_out **cl_out,
147b8e80941Smrg                 struct v3d_bo *bo, uint32_t offset)
148b8e80941Smrg{
149b8e80941Smrg        cl_aligned_u32(cl_out, bo->offset + offset);
150b8e80941Smrg        v3d_job_add_bo(cl->job, bo);
151b8e80941Smrg}
152b8e80941Smrg
153b8e80941Smrgstatic inline void
154b8e80941Smrgcl_ptr(struct v3d_cl_out **cl, void *ptr)
155b8e80941Smrg{
156b8e80941Smrg        *(struct v3d_cl_out **)(*cl) = ptr;
157b8e80941Smrg        cl_advance(cl, sizeof(void *));
158b8e80941Smrg}
159b8e80941Smrg
160b8e80941Smrgstatic inline void
161b8e80941Smrgcl_f(struct v3d_cl_out **cl, float f)
162b8e80941Smrg{
163b8e80941Smrg        cl_u32(cl, fui(f));
164b8e80941Smrg}
165b8e80941Smrg
166b8e80941Smrgstatic inline void
167b8e80941Smrgcl_aligned_f(struct v3d_cl_out **cl, float f)
168b8e80941Smrg{
169b8e80941Smrg        cl_aligned_u32(cl, fui(f));
170b8e80941Smrg}
171b8e80941Smrg
172b8e80941Smrg/**
173b8e80941Smrg * Reference to a BO with its associated offset, used in the pack process.
174b8e80941Smrg */
175b8e80941Smrgstatic inline struct v3d_cl_reloc
176b8e80941Smrgcl_address(struct v3d_bo *bo, uint32_t offset)
177b8e80941Smrg{
178b8e80941Smrg        struct v3d_cl_reloc reloc = {
179b8e80941Smrg                .bo = bo,
180b8e80941Smrg                .offset = offset,
181b8e80941Smrg        };
182b8e80941Smrg        return reloc;
183b8e80941Smrg}
184b8e80941Smrg
185b8e80941Smrguint32_t v3d_cl_ensure_space(struct v3d_cl *cl, uint32_t size, uint32_t align);
186b8e80941Smrgvoid v3d_cl_ensure_space_with_branch(struct v3d_cl *cl, uint32_t size);
187b8e80941Smrg
188b8e80941Smrg#define cl_packet_header(packet) V3DX(packet ## _header)
189b8e80941Smrg#define cl_packet_length(packet) V3DX(packet ## _length)
190b8e80941Smrg#define cl_packet_pack(packet)   V3DX(packet ## _pack)
191b8e80941Smrg#define cl_packet_struct(packet) V3DX(packet)
192b8e80941Smrg
193b8e80941Smrgstatic inline void *
194b8e80941Smrgcl_get_emit_space(struct v3d_cl_out **cl, size_t size)
195b8e80941Smrg{
196b8e80941Smrg        void *addr = *cl;
197b8e80941Smrg        cl_advance(cl, size);
198b8e80941Smrg        return addr;
199b8e80941Smrg}
200b8e80941Smrg
201b8e80941Smrg/* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
202b8e80941Smrg * is created, which you get to set fields in of the form:
203b8e80941Smrg *
204b8e80941Smrg * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
205b8e80941Smrg *     .flags.flat_shade_flags = 1 << 2,
206b8e80941Smrg * }
207b8e80941Smrg *
208b8e80941Smrg * or default values only can be emitted with just:
209b8e80941Smrg *
210b8e80941Smrg * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
211b8e80941Smrg *
212b8e80941Smrg * The trick here is that we make a for loop that will execute the body
213b8e80941Smrg * (either the block or the ';' after the macro invocation) exactly once.
214b8e80941Smrg */
215b8e80941Smrg#define cl_emit(cl, packet, name)                                \
216b8e80941Smrg        for (struct cl_packet_struct(packet) name = {            \
217b8e80941Smrg                cl_packet_header(packet)                         \
218b8e80941Smrg        },                                                       \
219b8e80941Smrg        *_loop_terminate = &name;                                \
220b8e80941Smrg        __builtin_expect(_loop_terminate != NULL, 1);            \
221b8e80941Smrg        ({                                                       \
222b8e80941Smrg                struct v3d_cl_out *cl_out = cl_start(cl);        \
223b8e80941Smrg                cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
224b8e80941Smrg                cl_advance(&cl_out, cl_packet_length(packet));   \
225b8e80941Smrg                cl_end(cl, cl_out);                              \
226b8e80941Smrg                _loop_terminate = NULL;                          \
227b8e80941Smrg        }))                                                      \
228b8e80941Smrg
229b8e80941Smrg#define cl_emit_with_prepacked(cl, packet, prepacked, name)      \
230b8e80941Smrg        for (struct cl_packet_struct(packet) name = {            \
231b8e80941Smrg                cl_packet_header(packet)                         \
232b8e80941Smrg        },                                                       \
233b8e80941Smrg        *_loop_terminate = &name;                                \
234b8e80941Smrg        __builtin_expect(_loop_terminate != NULL, 1);            \
235b8e80941Smrg        ({                                                       \
236b8e80941Smrg                struct v3d_cl_out *cl_out = cl_start(cl);        \
237b8e80941Smrg                uint8_t packed[cl_packet_length(packet)];         \
238b8e80941Smrg                cl_packet_pack(packet)(cl, packed, &name);       \
239b8e80941Smrg                for (int _i = 0; _i < cl_packet_length(packet); _i++) \
240b8e80941Smrg                        ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \
241b8e80941Smrg                cl_advance(&cl_out, cl_packet_length(packet));   \
242b8e80941Smrg                cl_end(cl, cl_out);                              \
243b8e80941Smrg                _loop_terminate = NULL;                          \
244b8e80941Smrg        }))                                                      \
245b8e80941Smrg
246b8e80941Smrg#define cl_emit_prepacked_sized(cl, packet, size) do {                \
247b8e80941Smrg        memcpy((cl)->next, packet, size);             \
248b8e80941Smrg        cl_advance(&(cl)->next, size);                \
249b8e80941Smrg} while (0)
250b8e80941Smrg
251b8e80941Smrg#define cl_emit_prepacked(cl, packet) \
252b8e80941Smrg        cl_emit_prepacked_sized(cl, packet, sizeof(*(packet)))
253b8e80941Smrg
254b8e80941Smrg#define v3dx_pack(packed, packet, name)                          \
255b8e80941Smrg        for (struct cl_packet_struct(packet) name = {            \
256b8e80941Smrg                cl_packet_header(packet)                         \
257b8e80941Smrg        },                                                       \
258b8e80941Smrg        *_loop_terminate = &name;                                \
259b8e80941Smrg        __builtin_expect(_loop_terminate != NULL, 1);            \
260b8e80941Smrg        ({                                                       \
261b8e80941Smrg                cl_packet_pack(packet)(NULL, (uint8_t *)packed, &name); \
262b8e80941Smrg                VG(VALGRIND_CHECK_MEM_IS_DEFINED((uint8_t *)packed, \
263b8e80941Smrg                                                 cl_packet_length(packet))); \
264b8e80941Smrg                _loop_terminate = NULL;                          \
265b8e80941Smrg        }))                                                      \
266b8e80941Smrg
267b8e80941Smrg/**
268b8e80941Smrg * Helper function called by the XML-generated pack functions for filling in
269b8e80941Smrg * an address field in shader records.
270b8e80941Smrg *
271b8e80941Smrg * Since we have a private address space as of VC5, our BOs can have lifelong
272b8e80941Smrg * offsets, and all the kernel needs to know is which BOs need to be paged in
273b8e80941Smrg * for this exec.
274b8e80941Smrg */
275b8e80941Smrgstatic inline void
276b8e80941Smrgcl_pack_emit_reloc(struct v3d_cl *cl, const struct v3d_cl_reloc *reloc)
277b8e80941Smrg{
278b8e80941Smrg        if (reloc->bo)
279b8e80941Smrg                v3d_job_add_bo(cl->job, reloc->bo);
280b8e80941Smrg}
281b8e80941Smrg
282b8e80941Smrg#endif /* VC5_CL_H */
283