1848b8605Smrg/*
2848b8605Smrg * Copyright © 2014 Broadcom
3848b8605Smrg *
4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5848b8605Smrg * copy of this software and associated documentation files (the "Software"),
6848b8605Smrg * to deal in the Software without restriction, including without limitation
7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the
9848b8605Smrg * Software is furnished to do so, subject to the following conditions:
10848b8605Smrg *
11848b8605Smrg * The above copyright notice and this permission notice (including the next
12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the
13848b8605Smrg * Software.
14848b8605Smrg *
15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21848b8605Smrg * IN THE SOFTWARE.
22848b8605Smrg */
23848b8605Smrg
24848b8605Smrg#ifndef VC4_CL_H
25848b8605Smrg#define VC4_CL_H
26848b8605Smrg
27848b8605Smrg#include <stdint.h>
28848b8605Smrg
29848b8605Smrg#include "util/u_math.h"
30b8e80941Smrg#include "util/macros.h"
31848b8605Smrg
32848b8605Smrgstruct vc4_bo;
33b8e80941Smrgstruct vc4_job;
34b8e80941Smrgstruct vc4_cl;
35b8e80941Smrg
36b8e80941Smrg/**
37b8e80941Smrg * Undefined structure, used for typechecking that you're passing the pointers
38b8e80941Smrg * to these functions correctly.
39b8e80941Smrg */
40b8e80941Smrgstruct vc4_cl_out;
41b8e80941Smrg
42b8e80941Smrg/** A reference to a BO used in the CL packing functions */
43b8e80941Smrgstruct vc4_cl_reloc {
44b8e80941Smrg        struct vc4_bo *bo;
45b8e80941Smrg        uint32_t offset;
46b8e80941Smrg};
47b8e80941Smrg
48b8e80941Smrgstatic inline void cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *);
49b8e80941Smrg
50b8e80941Smrg#define __gen_user_data struct vc4_cl
51b8e80941Smrg#define __gen_address_type struct vc4_cl_reloc
52b8e80941Smrg#define __gen_address_offset(reloc) ((reloc)->offset)
53b8e80941Smrg#define __gen_emit_reloc cl_pack_emit_reloc
54b8e80941Smrg
55b8e80941Smrg#include "kernel/vc4_packet.h"
56b8e80941Smrg#include "broadcom/cle/v3d_packet_v21_pack.h"
57848b8605Smrg
58848b8605Smrgstruct vc4_cl {
59848b8605Smrg        void *base;
60b8e80941Smrg        struct vc4_job *job;
61b8e80941Smrg        struct vc4_cl_out *next;
62b8e80941Smrg        struct vc4_cl_out *reloc_next;
63b8e80941Smrg        uint32_t size;
64b8e80941Smrg#ifndef NDEBUG
65848b8605Smrg        uint32_t reloc_count;
66b8e80941Smrg#endif
67848b8605Smrg};
68848b8605Smrg
69b8e80941Smrgvoid vc4_init_cl(struct vc4_job *job, struct vc4_cl *cl);
70848b8605Smrgvoid vc4_reset_cl(struct vc4_cl *cl);
71b8e80941Smrguint32_t vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo);
72b8e80941Smrg
73b8e80941Smrgstruct PACKED unaligned_16 { uint16_t x; };
74b8e80941Smrgstruct PACKED unaligned_32 { uint32_t x; };
75b8e80941Smrg
76b8e80941Smrgstatic inline uint32_t cl_offset(struct vc4_cl *cl)
77b8e80941Smrg{
78b8e80941Smrg        return (char *)cl->next - (char *)cl->base;
79b8e80941Smrg}
80848b8605Smrg
81848b8605Smrgstatic inline void
82b8e80941Smrgcl_advance(struct vc4_cl_out **cl, uint32_t n)
83848b8605Smrg{
84b8e80941Smrg        (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
85b8e80941Smrg}
86848b8605Smrg
87b8e80941Smrgstatic inline struct vc4_cl_out *
88b8e80941Smrgcl_start(struct vc4_cl *cl)
89b8e80941Smrg{
90b8e80941Smrg        return cl->next;
91848b8605Smrg}
92848b8605Smrg
93848b8605Smrgstatic inline void
94b8e80941Smrgcl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
95848b8605Smrg{
96b8e80941Smrg        cl->next = next;
97b8e80941Smrg        assert(cl_offset(cl) <= cl->size);
98b8e80941Smrg}
99848b8605Smrg
100b8e80941Smrg
101b8e80941Smrgstatic inline void
102b8e80941Smrgput_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
103b8e80941Smrg{
104b8e80941Smrg        struct unaligned_32 *p = (void *)ptr;
105b8e80941Smrg        p->x = val;
106848b8605Smrg}
107848b8605Smrg
108848b8605Smrgstatic inline void
109b8e80941Smrgput_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
110848b8605Smrg{
111b8e80941Smrg        struct unaligned_16 *p = (void *)ptr;
112b8e80941Smrg        p->x = val;
113b8e80941Smrg}
114848b8605Smrg
115b8e80941Smrgstatic inline void
116b8e80941Smrgcl_u8(struct vc4_cl_out **cl, uint8_t n)
117b8e80941Smrg{
118b8e80941Smrg        *(uint8_t *)(*cl) = n;
119b8e80941Smrg        cl_advance(cl, 1);
120848b8605Smrg}
121848b8605Smrg
122848b8605Smrgstatic inline void
123b8e80941Smrgcl_u16(struct vc4_cl_out **cl, uint16_t n)
124848b8605Smrg{
125b8e80941Smrg        put_unaligned_16(*cl, n);
126b8e80941Smrg        cl_advance(cl, 2);
127b8e80941Smrg}
128848b8605Smrg
129b8e80941Smrgstatic inline void
130b8e80941Smrgcl_u32(struct vc4_cl_out **cl, uint32_t n)
131b8e80941Smrg{
132b8e80941Smrg        put_unaligned_32(*cl, n);
133b8e80941Smrg        cl_advance(cl, 4);
134848b8605Smrg}
135848b8605Smrg
136848b8605Smrgstatic inline void
137b8e80941Smrgcl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
138848b8605Smrg{
139b8e80941Smrg        *(uint32_t *)(*cl) = n;
140b8e80941Smrg        cl_advance(cl, 4);
141848b8605Smrg}
142848b8605Smrg
143848b8605Smrgstatic inline void
144b8e80941Smrgcl_ptr(struct vc4_cl_out **cl, void *ptr)
145848b8605Smrg{
146b8e80941Smrg        *(struct vc4_cl_out **)(*cl) = ptr;
147b8e80941Smrg        cl_advance(cl, sizeof(void *));
148b8e80941Smrg}
149848b8605Smrg
150b8e80941Smrgstatic inline void
151b8e80941Smrgcl_f(struct vc4_cl_out **cl, float f)
152b8e80941Smrg{
153b8e80941Smrg        cl_u32(cl, fui(f));
154848b8605Smrg}
155848b8605Smrg
156848b8605Smrgstatic inline void
157b8e80941Smrgcl_aligned_f(struct vc4_cl_out **cl, float f)
158b8e80941Smrg{
159b8e80941Smrg        cl_aligned_u32(cl, fui(f));
160b8e80941Smrg}
161b8e80941Smrg
162b8e80941Smrgstatic inline struct vc4_cl_out *
163848b8605Smrgcl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
164848b8605Smrg{
165848b8605Smrg        assert(cl->reloc_count == 0);
166b8e80941Smrg#ifndef NDEBUG
167848b8605Smrg        cl->reloc_count = n;
168b8e80941Smrg#endif
169b8e80941Smrg        cl->reloc_next = cl->next;
170b8e80941Smrg
171b8e80941Smrg        /* Reserve the space where hindex will be written. */
172b8e80941Smrg        cl_advance(&cl->next, n * 4);
173848b8605Smrg
174b8e80941Smrg        return cl->next;
175848b8605Smrg}
176848b8605Smrg
177848b8605Smrgstatic inline void
178b8e80941Smrgcl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
179848b8605Smrg         struct vc4_bo *bo, uint32_t offset)
180848b8605Smrg{
181b8e80941Smrg        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
182b8e80941Smrg        cl_advance(&cl->reloc_next, 4);
183b8e80941Smrg
184b8e80941Smrg#ifndef NDEBUG
185b8e80941Smrg        cl->reloc_count--;
186b8e80941Smrg#endif
187b8e80941Smrg
188b8e80941Smrg        cl_u32(cl_out, offset);
189b8e80941Smrg}
190b8e80941Smrg
191b8e80941Smrgstatic inline void
192b8e80941Smrgcl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
193b8e80941Smrg                 struct vc4_cl_out **cl_out,
194b8e80941Smrg                 struct vc4_bo *bo, uint32_t offset)
195b8e80941Smrg{
196b8e80941Smrg        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
197b8e80941Smrg        cl_advance(&cl->reloc_next, 4);
198848b8605Smrg
199b8e80941Smrg#ifndef NDEBUG
200848b8605Smrg        cl->reloc_count--;
201b8e80941Smrg#endif
202b8e80941Smrg
203b8e80941Smrg        cl_aligned_u32(cl_out, offset);
204b8e80941Smrg}
205b8e80941Smrg
206b8e80941Smrg/**
207b8e80941Smrg * Reference to a BO with its associated offset, used in the pack process.
208b8e80941Smrg */
209b8e80941Smrgstatic inline struct vc4_cl_reloc
210b8e80941Smrgcl_address(struct vc4_bo *bo, uint32_t offset)
211b8e80941Smrg{
212b8e80941Smrg        struct vc4_cl_reloc reloc = {
213b8e80941Smrg                .bo = bo,
214b8e80941Smrg                .offset = offset,
215b8e80941Smrg        };
216b8e80941Smrg        return reloc;
217b8e80941Smrg}
218b8e80941Smrg
219b8e80941Smrgvoid cl_ensure_space(struct vc4_cl *cl, uint32_t size);
220b8e80941Smrg
221b8e80941Smrg#define cl_packet_header(packet) V3D21_ ## packet ## _header
222b8e80941Smrg#define cl_packet_length(packet) V3D21_ ## packet ## _length
223b8e80941Smrg#define cl_packet_pack(packet)   V3D21_ ## packet ## _pack
224b8e80941Smrg#define cl_packet_struct(packet)   V3D21_ ## packet
225848b8605Smrg
226b8e80941Smrgstatic inline void *
227b8e80941Smrgcl_get_emit_space(struct vc4_cl_out **cl, size_t size)
228b8e80941Smrg{
229b8e80941Smrg        void *addr = *cl;
230b8e80941Smrg        cl_advance(cl, size);
231b8e80941Smrg        return addr;
232b8e80941Smrg}
233b8e80941Smrg
234b8e80941Smrg/* Macro for setting up an emit of a CL struct.  A temporary unpacked struct
235b8e80941Smrg * is created, which you get to set fields in of the form:
236b8e80941Smrg *
237b8e80941Smrg * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
238b8e80941Smrg *     .flags.flat_shade_flags = 1 << 2,
239b8e80941Smrg * }
240b8e80941Smrg *
241b8e80941Smrg * or default values only can be emitted with just:
242b8e80941Smrg *
243b8e80941Smrg * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
244b8e80941Smrg *
245b8e80941Smrg * The trick here is that we make a for loop that will execute the body
246b8e80941Smrg * (either the block or the ';' after the macro invocation) exactly once.
247b8e80941Smrg * Also, *dst is actually of the wrong type, it's the
248b8e80941Smrg * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
249b8e80941Smrg */
250b8e80941Smrg#define cl_emit(cl, packet, name)                                \
251b8e80941Smrg        for (struct cl_packet_struct(packet) name = {            \
252b8e80941Smrg                cl_packet_header(packet)                         \
253b8e80941Smrg        },                                                       \
254b8e80941Smrg        *_loop_terminate = &name;                                \
255b8e80941Smrg        __builtin_expect(_loop_terminate != NULL, 1);            \
256b8e80941Smrg        ({                                                       \
257b8e80941Smrg                struct vc4_cl_out *cl_out = cl_start(cl);        \
258b8e80941Smrg                cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
259b8e80941Smrg                VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out,         \
260b8e80941Smrg                                                 cl_packet_length(packet))); \
261b8e80941Smrg                cl_advance(&cl_out, cl_packet_length(packet));   \
262b8e80941Smrg                cl_end(cl, cl_out);                              \
263b8e80941Smrg                _loop_terminate = NULL;                          \
264b8e80941Smrg        }))                                                      \
265b8e80941Smrg
266b8e80941Smrg#define cl_emit_prepacked(cl, packet) do {                       \
267b8e80941Smrg        memcpy((cl)->next, packet, sizeof(*packet));             \
268b8e80941Smrg        cl_advance(&(cl)->next, sizeof(*packet));                \
269b8e80941Smrg} while (0)
270b8e80941Smrg
271b8e80941Smrg/**
272b8e80941Smrg * Helper function called by the XML-generated pack functions for filling in
273b8e80941Smrg * an address field in shader records.
274b8e80941Smrg *
275b8e80941Smrg * Relocations for shader recs and texturing involve the packet (or uniforms
276b8e80941Smrg * stream) being preceded by the handles to the BOs, and the offset within the
277b8e80941Smrg * BO being in the stream (the output of this function).
278b8e80941Smrg */
279b8e80941Smrgstatic inline void
280b8e80941Smrgcl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *reloc)
281b8e80941Smrg{
282b8e80941Smrg        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(cl->job, reloc->bo);
283b8e80941Smrg        cl_advance(&cl->reloc_next, 4);
284b8e80941Smrg
285b8e80941Smrg#ifndef NDEBUG
286b8e80941Smrg        cl->reloc_count--;
287b8e80941Smrg#endif
288848b8605Smrg}
289848b8605Smrg
290848b8605Smrg#endif /* VC4_CL_H */
291