1b8e80941Smrg/*
2b8e80941Smrg * © Copyright 2018 Alyssa Rosenzweig
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#ifndef __BUILDER_H__
26b8e80941Smrg#define __BUILDER_H__
27b8e80941Smrg
28b8e80941Smrg#define _LARGEFILE64_SOURCE 1
29b8e80941Smrg#define CACHE_LINE_SIZE 1024 /* TODO */
30b8e80941Smrg#include <sys/mman.h>
31b8e80941Smrg#include <assert.h>
32b8e80941Smrg#include "pan_resource.h"
33b8e80941Smrg#include "pan_job.h"
34b8e80941Smrg
35b8e80941Smrg#include "pipe/p_compiler.h"
36b8e80941Smrg#include "pipe/p_config.h"
37b8e80941Smrg#include "pipe/p_context.h"
38b8e80941Smrg#include "pipe/p_defines.h"
39b8e80941Smrg#include "pipe/p_format.h"
40b8e80941Smrg#include "pipe/p_screen.h"
41b8e80941Smrg#include "pipe/p_state.h"
42b8e80941Smrg#include "util/u_blitter.h"
43b8e80941Smrg#include "util/hash_table.h"
44b8e80941Smrg
45b8e80941Smrg#include "midgard/midgard_compile.h"
46b8e80941Smrg
47b8e80941Smrg/* Forward declare to avoid extra header dep */
48b8e80941Smrgstruct prim_convert_context;
49b8e80941Smrg
50b8e80941Smrg#define MAX_DRAW_CALLS 4096
51b8e80941Smrg#define MAX_VARYINGS   4096
52b8e80941Smrg
53b8e80941Smrg//#define PAN_DIRTY_CLEAR	     (1 << 0)
54b8e80941Smrg#define PAN_DIRTY_RASTERIZER (1 << 2)
55b8e80941Smrg#define PAN_DIRTY_FS	     (1 << 3)
56b8e80941Smrg#define PAN_DIRTY_FRAG_CORE  (PAN_DIRTY_FS) /* Dirty writes are tied */
57b8e80941Smrg#define PAN_DIRTY_VS	     (1 << 4)
58b8e80941Smrg#define PAN_DIRTY_VERTEX     (1 << 5)
59b8e80941Smrg#define PAN_DIRTY_VERT_BUF   (1 << 6)
60b8e80941Smrg//#define PAN_DIRTY_VIEWPORT   (1 << 7)
61b8e80941Smrg#define PAN_DIRTY_SAMPLERS   (1 << 8)
62b8e80941Smrg#define PAN_DIRTY_TEXTURES   (1 << 9)
63b8e80941Smrg
64b8e80941Smrg#define SET_BIT(lval, bit, cond) \
65b8e80941Smrg	if (cond) \
66b8e80941Smrg		lval |= (bit); \
67b8e80941Smrg	else \
68b8e80941Smrg		lval &= ~(bit);
69b8e80941Smrg
70b8e80941Smrgstruct panfrost_constant_buffer {
71b8e80941Smrg        bool dirty;
72b8e80941Smrg        size_t size;
73b8e80941Smrg        void *buffer;
74b8e80941Smrg};
75b8e80941Smrg
76b8e80941Smrgstruct panfrost_query {
77b8e80941Smrg        /* Passthrough from Gallium */
78b8e80941Smrg        unsigned type;
79b8e80941Smrg        unsigned index;
80b8e80941Smrg
81b8e80941Smrg        /* Memory for the GPU to writeback the value of the query */
82b8e80941Smrg        struct panfrost_transfer transfer;
83b8e80941Smrg};
84b8e80941Smrg
85b8e80941Smrgstruct panfrost_fence {
86b8e80941Smrg        struct pipe_reference reference;
87b8e80941Smrg        int fd;
88b8e80941Smrg};
89b8e80941Smrg
90b8e80941Smrg#define PANFROST_MAX_TRANSIENT_ENTRIES 64
91b8e80941Smrg
92b8e80941Smrgstruct panfrost_transient_pool {
93b8e80941Smrg        /* Memory blocks in the pool */
94b8e80941Smrg        struct panfrost_memory_entry *entries[PANFROST_MAX_TRANSIENT_ENTRIES];
95b8e80941Smrg
96b8e80941Smrg        /* Number of entries we own */
97b8e80941Smrg        unsigned entry_count;
98b8e80941Smrg
99b8e80941Smrg        /* Current entry that we are writing to, zero-indexed, strictly less than entry_count */
100b8e80941Smrg        unsigned entry_index;
101b8e80941Smrg
102b8e80941Smrg        /* Number of bytes into the current entry we are */
103b8e80941Smrg        off_t entry_offset;
104b8e80941Smrg
105b8e80941Smrg        /* Entry size (all entries must be homogenous) */
106b8e80941Smrg        size_t entry_size;
107b8e80941Smrg};
108b8e80941Smrg
109b8e80941Smrgstruct panfrost_context {
110b8e80941Smrg        /* Gallium context */
111b8e80941Smrg        struct pipe_context base;
112b8e80941Smrg
113b8e80941Smrg        /* Bound job and map of panfrost_job_key to jobs */
114b8e80941Smrg        struct panfrost_job *job;
115b8e80941Smrg        struct hash_table *jobs;
116b8e80941Smrg
117b8e80941Smrg        /* panfrost_resource -> panfrost_job */
118b8e80941Smrg        struct hash_table *write_jobs;
119b8e80941Smrg
120b8e80941Smrg        /* Bit mask for supported PIPE_DRAW for this hardware */
121b8e80941Smrg        unsigned draw_modes;
122b8e80941Smrg
123b8e80941Smrg        struct pipe_framebuffer_state pipe_framebuffer;
124b8e80941Smrg
125b8e80941Smrg        /* The number of concurrent FBOs allowed depends on the number of pools
126b8e80941Smrg         * used; pools are ringed for parallelism opportunities */
127b8e80941Smrg
128b8e80941Smrg        struct panfrost_transient_pool transient_pools[2];
129b8e80941Smrg        int cmdstream_i;
130b8e80941Smrg
131b8e80941Smrg        struct panfrost_memory cmdstream_persistent;
132b8e80941Smrg        struct panfrost_memory shaders;
133b8e80941Smrg        struct panfrost_memory scratchpad;
134b8e80941Smrg        struct panfrost_memory tiler_heap;
135b8e80941Smrg        struct panfrost_memory varying_mem;
136b8e80941Smrg        struct panfrost_memory misc_0;
137b8e80941Smrg        struct panfrost_memory misc_1;
138b8e80941Smrg        struct panfrost_memory depth_stencil_buffer;
139b8e80941Smrg
140b8e80941Smrg        struct panfrost_query *occlusion_query;
141b8e80941Smrg
142b8e80941Smrg        /* Each draw has corresponding vertex and tiler payloads */
143b8e80941Smrg        struct midgard_payload_vertex_tiler payload_vertex;
144b8e80941Smrg        struct midgard_payload_vertex_tiler payload_tiler;
145b8e80941Smrg
146b8e80941Smrg        /* The fragment shader binary itself is pointed here (for the tripipe) but
147b8e80941Smrg         * also everything else in the shader core, including blending, the
148b8e80941Smrg         * stencil/depth tests, etc. Refer to the presentations. */
149b8e80941Smrg
150b8e80941Smrg        struct mali_shader_meta fragment_shader_core;
151b8e80941Smrg
152b8e80941Smrg        /* A frame is composed of a starting set value job, a number of vertex
153b8e80941Smrg         * and tiler jobs, linked to the fragment job at the end. See the
154b8e80941Smrg         * presentations for more information how this works */
155b8e80941Smrg
156b8e80941Smrg        unsigned draw_count;
157b8e80941Smrg
158b8e80941Smrg        mali_ptr set_value_job;
159b8e80941Smrg        mali_ptr vertex_jobs[MAX_DRAW_CALLS];
160b8e80941Smrg        mali_ptr tiler_jobs[MAX_DRAW_CALLS];
161b8e80941Smrg
162b8e80941Smrg        struct mali_job_descriptor_header *u_set_value_job;
163b8e80941Smrg        struct mali_job_descriptor_header *u_vertex_jobs[MAX_DRAW_CALLS];
164b8e80941Smrg        struct mali_job_descriptor_header *u_tiler_jobs[MAX_DRAW_CALLS];
165b8e80941Smrg
166b8e80941Smrg        unsigned vertex_job_count;
167b8e80941Smrg        unsigned tiler_job_count;
168b8e80941Smrg
169b8e80941Smrg        /* Per-draw Dirty flags are setup like any other driver */
170b8e80941Smrg        int dirty;
171b8e80941Smrg
172b8e80941Smrg        unsigned vertex_count;
173b8e80941Smrg
174b8e80941Smrg        union mali_attr attributes[PIPE_MAX_ATTRIBS];
175b8e80941Smrg
176b8e80941Smrg        unsigned varying_height;
177b8e80941Smrg
178b8e80941Smrg        struct mali_single_framebuffer vt_framebuffer_sfbd;
179b8e80941Smrg        struct bifrost_framebuffer vt_framebuffer_mfbd;
180b8e80941Smrg
181b8e80941Smrg        /* TODO: Multiple uniform buffers (index =/= 0), finer updates? */
182b8e80941Smrg
183b8e80941Smrg        struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
184b8e80941Smrg
185b8e80941Smrg        /* CSOs */
186b8e80941Smrg        struct panfrost_rasterizer *rasterizer;
187b8e80941Smrg
188b8e80941Smrg        struct panfrost_shader_variants *vs;
189b8e80941Smrg        struct panfrost_shader_variants *fs;
190b8e80941Smrg
191b8e80941Smrg        struct panfrost_vertex_state *vertex;
192b8e80941Smrg
193b8e80941Smrg        struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
194b8e80941Smrg        uint32_t vb_mask;
195b8e80941Smrg
196b8e80941Smrg        struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
197b8e80941Smrg        unsigned sampler_count[PIPE_SHADER_TYPES];
198b8e80941Smrg
199b8e80941Smrg        struct panfrost_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
200b8e80941Smrg        unsigned sampler_view_count[PIPE_SHADER_TYPES];
201b8e80941Smrg
202b8e80941Smrg        struct primconvert_context *primconvert;
203b8e80941Smrg        struct blitter_context *blitter;
204b8e80941Smrg
205b8e80941Smrg        struct panfrost_blend_state *blend;
206b8e80941Smrg
207b8e80941Smrg        struct pipe_viewport_state pipe_viewport;
208b8e80941Smrg        struct pipe_scissor_state scissor;
209b8e80941Smrg        struct pipe_blend_color blend_color;
210b8e80941Smrg        struct pipe_depth_stencil_alpha_state *depth_stencil;
211b8e80941Smrg        struct pipe_stencil_ref stencil_ref;
212b8e80941Smrg
213b8e80941Smrg        /* True for t6XX, false for t8xx. */
214b8e80941Smrg        bool is_t6xx;
215b8e80941Smrg
216b8e80941Smrg        /* If set, we'll require the use of single render-target framebuffer
217b8e80941Smrg         * descriptors (SFBD), for older hardware -- specifically, <T760 hardware, If
218b8e80941Smrg         * false, we'll use the MFBD no matter what. New hardware -does- retain support
219b8e80941Smrg         * for SFBD, and in theory we could flip between them on a per-RT basis, but
220b8e80941Smrg         * there's no real advantage to doing so */
221b8e80941Smrg        bool require_sfbd;
222b8e80941Smrg
223b8e80941Smrg	uint32_t out_sync;
224b8e80941Smrg};
225b8e80941Smrg
226b8e80941Smrg/* Corresponds to the CSO */
227b8e80941Smrg
228b8e80941Smrgstruct panfrost_rasterizer {
229b8e80941Smrg        struct pipe_rasterizer_state base;
230b8e80941Smrg
231b8e80941Smrg        /* Bitmask of front face, etc */
232b8e80941Smrg        unsigned tiler_gl_enables;
233b8e80941Smrg};
234b8e80941Smrg
235b8e80941Smrgstruct panfrost_blend_state {
236b8e80941Smrg        struct pipe_blend_state base;
237b8e80941Smrg
238b8e80941Smrg        /* Whether a blend shader is in use */
239b8e80941Smrg        bool has_blend_shader;
240b8e80941Smrg
241b8e80941Smrg        /* Compiled fixed function command */
242b8e80941Smrg        struct mali_blend_equation equation;
243b8e80941Smrg
244b8e80941Smrg        /* Compiled blend shader */
245b8e80941Smrg        mali_ptr blend_shader;
246b8e80941Smrg        int blend_work_count;
247b8e80941Smrg};
248b8e80941Smrg
249b8e80941Smrg/* Variants bundle together to form the backing CSO, bundling multiple
250b8e80941Smrg * shaders with varying emulated features baked in (alpha test
251b8e80941Smrg * parameters, etc) */
252b8e80941Smrg#define MAX_SHADER_VARIANTS 8
253b8e80941Smrg
254b8e80941Smrg/* A shader state corresponds to the actual, current variant of the shader */
255b8e80941Smrgstruct panfrost_shader_state {
256b8e80941Smrg        struct pipe_shader_state *base;
257b8e80941Smrg
258b8e80941Smrg        /* Compiled, mapped descriptor, ready for the hardware */
259b8e80941Smrg        bool compiled;
260b8e80941Smrg        struct mali_shader_meta *tripipe;
261b8e80941Smrg        mali_ptr tripipe_gpu;
262b8e80941Smrg
263b8e80941Smrg        /* Non-descript information */
264b8e80941Smrg        int uniform_count;
265b8e80941Smrg        bool can_discard;
266b8e80941Smrg        bool writes_point_size;
267b8e80941Smrg        bool reads_point_coord;
268b8e80941Smrg
269b8e80941Smrg        unsigned general_varying_stride;
270b8e80941Smrg        struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
271b8e80941Smrg
272b8e80941Smrg        unsigned sysval_count;
273b8e80941Smrg        unsigned sysval[MAX_SYSVAL_COUNT];
274b8e80941Smrg
275b8e80941Smrg        /* Information on this particular shader variant */
276b8e80941Smrg        struct pipe_alpha_state alpha_state;
277b8e80941Smrg};
278b8e80941Smrg
279b8e80941Smrg/* A collection of varyings (the CSO) */
280b8e80941Smrgstruct panfrost_shader_variants {
281b8e80941Smrg        struct pipe_shader_state base;
282b8e80941Smrg
283b8e80941Smrg        struct panfrost_shader_state variants[MAX_SHADER_VARIANTS];
284b8e80941Smrg        unsigned variant_count;
285b8e80941Smrg
286b8e80941Smrg        /* The current active variant */
287b8e80941Smrg        unsigned active_variant;
288b8e80941Smrg};
289b8e80941Smrg
290b8e80941Smrgstruct panfrost_vertex_state {
291b8e80941Smrg        unsigned num_elements;
292b8e80941Smrg
293b8e80941Smrg        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
294b8e80941Smrg        struct mali_attr_meta hw[PIPE_MAX_ATTRIBS];
295b8e80941Smrg};
296b8e80941Smrg
297b8e80941Smrgstruct panfrost_sampler_state {
298b8e80941Smrg        struct pipe_sampler_state base;
299b8e80941Smrg        struct mali_sampler_descriptor hw;
300b8e80941Smrg};
301b8e80941Smrg
302b8e80941Smrg/* Misnomer: Sampler view corresponds to textures, not samplers */
303b8e80941Smrg
304b8e80941Smrgstruct panfrost_sampler_view {
305b8e80941Smrg        struct pipe_sampler_view base;
306b8e80941Smrg        struct mali_texture_descriptor hw;
307b8e80941Smrg};
308b8e80941Smrg
309b8e80941Smrgstatic inline struct panfrost_context *
310b8e80941Smrgpan_context(struct pipe_context *pcontext)
311b8e80941Smrg{
312b8e80941Smrg        return (struct panfrost_context *) pcontext;
313b8e80941Smrg}
314b8e80941Smrg
315b8e80941Smrgstatic inline struct panfrost_screen *
316b8e80941Smrgpan_screen(struct pipe_screen *p)
317b8e80941Smrg{
318b8e80941Smrg   return (struct panfrost_screen *)p;
319b8e80941Smrg}
320b8e80941Smrg
321b8e80941Smrgstruct pipe_context *
322b8e80941Smrgpanfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
323b8e80941Smrg
324b8e80941Smrgvoid
325b8e80941Smrgpanfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data);
326b8e80941Smrg
327b8e80941Smrgstruct panfrost_transfer
328b8e80941Smrgpanfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler);
329b8e80941Smrg
330b8e80941Smrgunsigned
331b8e80941Smrgpanfrost_get_default_swizzle(unsigned components);
332b8e80941Smrg
333b8e80941Smrgvoid
334b8e80941Smrgpanfrost_flush(
335b8e80941Smrg        struct pipe_context *pipe,
336b8e80941Smrg        struct pipe_fence_handle **fence,
337b8e80941Smrg        unsigned flags);
338b8e80941Smrg
339b8e80941Smrgbool
340b8e80941Smrgpanfrost_is_scanout(struct panfrost_context *ctx);
341b8e80941Smrg
342b8e80941Smrgmali_ptr
343b8e80941Smrgpanfrost_sfbd_fragment(struct panfrost_context *ctx, bool flip_y);
344b8e80941Smrg
345b8e80941Smrgmali_ptr
346b8e80941Smrgpanfrost_mfbd_fragment(struct panfrost_context *ctx, bool flip_y);
347b8e80941Smrg
348b8e80941Smrgstruct bifrost_framebuffer
349b8e80941Smrgpanfrost_emit_mfbd(struct panfrost_context *ctx);
350b8e80941Smrg
351b8e80941Smrgstruct mali_single_framebuffer
352b8e80941Smrgpanfrost_emit_sfbd(struct panfrost_context *ctx);
353b8e80941Smrg
354b8e80941Smrgmali_ptr
355b8e80941Smrgpanfrost_fragment_job(struct panfrost_context *ctx);
356b8e80941Smrg
357b8e80941Smrgvoid
358b8e80941Smrgpanfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *meta, const char *src, int type, struct panfrost_shader_state *state);
359b8e80941Smrg
360b8e80941Smrg#endif
361