1b8e80941Smrg/*
2b8e80941Smrg * Copyright (c) 2011-2013 Luc Verhaegen <libv@skynet.be>
3b8e80941Smrg * Copyright (c) 2017-2019 Lima Project
4b8e80941Smrg *
5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
6b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
7b8e80941Smrg * to deal in the Software without restriction, including without limitation
8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sub license,
9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
10b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
11b8e80941Smrg *
12b8e80941Smrg * The above copyright notice and this permission notice (including the
13b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions
14b8e80941Smrg * of the Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22b8e80941Smrg * DEALINGS IN THE SOFTWARE.
23b8e80941Smrg *
24b8e80941Smrg */
25b8e80941Smrg
26b8e80941Smrg#include "util/u_math.h"
27b8e80941Smrg#include "util/u_format.h"
28b8e80941Smrg#include "util/u_debug.h"
29b8e80941Smrg#include "util/u_half.h"
30b8e80941Smrg#include "util/u_helpers.h"
31b8e80941Smrg#include "util/u_inlines.h"
32b8e80941Smrg#include "util/u_pack_color.h"
33b8e80941Smrg#include "util/hash_table.h"
34b8e80941Smrg#include "util/u_upload_mgr.h"
35b8e80941Smrg#include "util/u_prim.h"
36b8e80941Smrg#include "util/u_vbuf.h"
37b8e80941Smrg
38b8e80941Smrg#include "lima_context.h"
39b8e80941Smrg#include "lima_screen.h"
40b8e80941Smrg#include "lima_resource.h"
41b8e80941Smrg#include "lima_program.h"
42b8e80941Smrg#include "lima_bo.h"
43b8e80941Smrg#include "lima_submit.h"
44b8e80941Smrg#include "lima_texture.h"
45b8e80941Smrg#include "lima_util.h"
46b8e80941Smrg#include "lima_fence.h"
47b8e80941Smrg
48b8e80941Smrg#include <drm-uapi/lima_drm.h>
49b8e80941Smrg
50b8e80941Smrgstruct lima_gp_frame_reg {
51b8e80941Smrg   uint32_t vs_cmd_start;
52b8e80941Smrg   uint32_t vs_cmd_end;
53b8e80941Smrg   uint32_t plbu_cmd_start;
54b8e80941Smrg   uint32_t plbu_cmd_end;
55b8e80941Smrg   uint32_t tile_heap_start;
56b8e80941Smrg   uint32_t tile_heap_end;
57b8e80941Smrg};
58b8e80941Smrg
59b8e80941Smrgstruct lima_pp_frame_reg {
60b8e80941Smrg   uint32_t plbu_array_address;
61b8e80941Smrg   uint32_t render_address;
62b8e80941Smrg   uint32_t unused_0;
63b8e80941Smrg   uint32_t flags;
64b8e80941Smrg   uint32_t clear_value_depth;
65b8e80941Smrg   uint32_t clear_value_stencil;
66b8e80941Smrg   uint32_t clear_value_color;
67b8e80941Smrg   uint32_t clear_value_color_1;
68b8e80941Smrg   uint32_t clear_value_color_2;
69b8e80941Smrg   uint32_t clear_value_color_3;
70b8e80941Smrg   uint32_t width;
71b8e80941Smrg   uint32_t height;
72b8e80941Smrg   uint32_t fragment_stack_address;
73b8e80941Smrg   uint32_t fragment_stack_size;
74b8e80941Smrg   uint32_t unused_1;
75b8e80941Smrg   uint32_t unused_2;
76b8e80941Smrg   uint32_t one;
77b8e80941Smrg   uint32_t supersampled_height;
78b8e80941Smrg   uint32_t dubya;
79b8e80941Smrg   uint32_t onscreen;
80b8e80941Smrg   uint32_t blocking;
81b8e80941Smrg   uint32_t scale;
82b8e80941Smrg   uint32_t foureight;
83b8e80941Smrg};
84b8e80941Smrg
85b8e80941Smrgstruct lima_pp_wb_reg {
86b8e80941Smrg   uint32_t type;
87b8e80941Smrg   uint32_t address;
88b8e80941Smrg   uint32_t pixel_format;
89b8e80941Smrg   uint32_t downsample_factor;
90b8e80941Smrg   uint32_t pixel_layout;
91b8e80941Smrg   uint32_t pitch;
92b8e80941Smrg   uint32_t mrt_bits;
93b8e80941Smrg   uint32_t mrt_pitch;
94b8e80941Smrg   uint32_t zero;
95b8e80941Smrg   uint32_t unused0;
96b8e80941Smrg   uint32_t unused1;
97b8e80941Smrg   uint32_t unused2;
98b8e80941Smrg};
99b8e80941Smrg
100b8e80941Smrgstruct lima_render_state {
101b8e80941Smrg   uint32_t blend_color_bg;
102b8e80941Smrg   uint32_t blend_color_ra;
103b8e80941Smrg   uint32_t alpha_blend;
104b8e80941Smrg   uint32_t depth_test;
105b8e80941Smrg   uint32_t depth_range;
106b8e80941Smrg   uint32_t stencil_front;
107b8e80941Smrg   uint32_t stencil_back;
108b8e80941Smrg   uint32_t stencil_test;
109b8e80941Smrg   uint32_t multi_sample;
110b8e80941Smrg   uint32_t shader_address;
111b8e80941Smrg   uint32_t varying_types;
112b8e80941Smrg   uint32_t uniforms_address;
113b8e80941Smrg   uint32_t textures_address;
114b8e80941Smrg   uint32_t aux0;
115b8e80941Smrg   uint32_t aux1;
116b8e80941Smrg   uint32_t varyings_address;
117b8e80941Smrg};
118b8e80941Smrg
119b8e80941Smrg#define LIMA_PIXEL_FORMAT_B8G8R8A8     0x03
120b8e80941Smrg#define LIMA_PIXEL_FORMAT_Z16          0x0e
121b8e80941Smrg#define LIMA_PIXEL_FORMAT_Z24S8        0x0f
122b8e80941Smrg
123b8e80941Smrg/* plbu commands */
124b8e80941Smrg#define PLBU_CMD_BEGIN(max) { \
125b8e80941Smrg   int i = 0, max_n = max; \
126b8e80941Smrg   uint32_t *plbu_cmd = util_dynarray_grow_cap(&ctx->plbu_cmd_array, max_n * 4);
127b8e80941Smrg
128b8e80941Smrg#define PLBU_CMD_END() \
129b8e80941Smrg   assert(i <= max_n); \
130b8e80941Smrg   ctx->plbu_cmd_array.size += i * 4; \
131b8e80941Smrg}
132b8e80941Smrg
133b8e80941Smrg#define PLBU_CMD(v1, v2) \
134b8e80941Smrg   do { \
135b8e80941Smrg      plbu_cmd[i++] = v1; \
136b8e80941Smrg      plbu_cmd[i++] = v2; \
137b8e80941Smrg   } while (0)
138b8e80941Smrg
139b8e80941Smrg#define PLBU_CMD_BLOCK_STEP(shift_min, shift_h, shift_w) \
140b8e80941Smrg   PLBU_CMD(((shift_min) << 28) | ((shift_h) << 16) | (shift_w), 0x1000010C)
141b8e80941Smrg#define PLBU_CMD_TILED_DIMENSIONS(tiled_w, tiled_h) \
142b8e80941Smrg   PLBU_CMD((((tiled_w) - 1) << 24) | (((tiled_h) - 1) << 8), 0x10000109)
143b8e80941Smrg#define PLBU_CMD_BLOCK_STRIDE(block_w) PLBU_CMD(block_w, 0x30000000)
144b8e80941Smrg#define PLBU_CMD_ARRAY_ADDRESS(gp_stream, block_num) \
145b8e80941Smrg   PLBU_CMD(gp_stream, 0x28000000 | ((block_num) - 1) | 1)
146b8e80941Smrg#define PLBU_CMD_VIEWPORT_X(v) PLBU_CMD(v, 0x10000107)
147b8e80941Smrg#define PLBU_CMD_VIEWPORT_W(v) PLBU_CMD(v, 0x10000108)
148b8e80941Smrg#define PLBU_CMD_VIEWPORT_Y(v) PLBU_CMD(v, 0x10000105)
149b8e80941Smrg#define PLBU_CMD_VIEWPORT_H(v) PLBU_CMD(v, 0x10000106)
150b8e80941Smrg#define PLBU_CMD_ARRAYS_SEMAPHORE_BEGIN() PLBU_CMD(0x00010002, 0x60000000)
151b8e80941Smrg#define PLBU_CMD_ARRAYS_SEMAPHORE_END() PLBU_CMD(0x00010001, 0x60000000)
152b8e80941Smrg#define PLBU_CMD_PRIMITIVE_SETUP(low_prim, cull, index_size) \
153b8e80941Smrg   PLBU_CMD(((low_prim) ? 0x00003200 : 0x00002200) | (cull) | ((index_size) << 9), 0x1000010B)
154b8e80941Smrg#define PLBU_CMD_RSW_VERTEX_ARRAY(rsw, gl_pos) \
155b8e80941Smrg   PLBU_CMD(rsw, 0x80000000 | ((gl_pos) >> 4))
156b8e80941Smrg#define PLBU_CMD_SCISSORS(minx, maxx, miny, maxy) \
157b8e80941Smrg   PLBU_CMD(((minx) << 30) | ((maxy) - 1) << 15 | (miny), \
158b8e80941Smrg            0x70000000 | ((maxx) - 1) << 13 | ((minx) >> 2))
159b8e80941Smrg#define PLBU_CMD_UNKNOWN1() PLBU_CMD(0x00000000, 0x1000010A)
160b8e80941Smrg#define PLBU_CMD_UNKNOWN2() PLBU_CMD(0x00000200, 0x1000010B)
161b8e80941Smrg#define PLBU_CMD_LOW_PRIM_SIZE(v) PLBU_CMD(v, 0x1000010D)
162b8e80941Smrg#define PLBU_CMD_DEPTH_RANGE_NEAR(v) PLBU_CMD(v, 0x1000010E)
163b8e80941Smrg#define PLBU_CMD_DEPTH_RANGE_FAR(v) PLBU_CMD(v, 0x1000010F)
164b8e80941Smrg#define PLBU_CMD_INDEXED_DEST(gl_pos) PLBU_CMD(gl_pos, 0x10000100)
165b8e80941Smrg#define PLBU_CMD_INDICES(va) PLBU_CMD(va, 0x10000101)
166b8e80941Smrg#define PLBU_CMD_DRAW_ARRAYS(mode, start, count) \
167b8e80941Smrg   PLBU_CMD(((count) << 24) | (start), (((mode) & 0x1F) << 16) | ((count) >> 8))
168b8e80941Smrg#define PLBU_CMD_DRAW_ELEMENTS(mode, start, count) \
169b8e80941Smrg   PLBU_CMD(((count) << 24) | (start), \
170b8e80941Smrg            0x00200000 | (((mode) & 0x1F) << 16) | ((count) >> 8))
171b8e80941Smrg
172b8e80941Smrg/* vs commands */
173b8e80941Smrg#define VS_CMD_BEGIN(max) { \
174b8e80941Smrg   int i = 0, max_n = max; \
175b8e80941Smrg   uint32_t *vs_cmd = util_dynarray_grow_cap(&ctx->vs_cmd_array, max_n * 4);
176b8e80941Smrg
177b8e80941Smrg#define VS_CMD_END() \
178b8e80941Smrg   assert(i <= max_n); \
179b8e80941Smrg   ctx->vs_cmd_array.size += i * 4; \
180b8e80941Smrg}
181b8e80941Smrg
182b8e80941Smrg#define VS_CMD(v1, v2) \
183b8e80941Smrg   do { \
184b8e80941Smrg      vs_cmd[i++] = v1; \
185b8e80941Smrg      vs_cmd[i++] = v2; \
186b8e80941Smrg   } while (0)
187b8e80941Smrg
188b8e80941Smrg#define VS_CMD_ARRAYS_SEMAPHORE_BEGIN_1() VS_CMD(0x00028000, 0x50000000)
189b8e80941Smrg#define VS_CMD_ARRAYS_SEMAPHORE_BEGIN_2() VS_CMD(0x00000001, 0x50000000)
190b8e80941Smrg#define VS_CMD_ARRAYS_SEMAPHORE_END(index_draw) \
191b8e80941Smrg   VS_CMD((index_draw) ? 0x00018000 : 0x00000000, 0x50000000)
192b8e80941Smrg#define VS_CMD_UNIFORMS_ADDRESS(addr, size) \
193b8e80941Smrg   VS_CMD(addr, 0x30000000 | ((size) << 12))
194b8e80941Smrg#define VS_CMD_SHADER_ADDRESS(addr, size) \
195b8e80941Smrg   VS_CMD(addr, 0x40000000 | ((size) << 12))
196b8e80941Smrg#define VS_CMD_SHADER_INFO(prefetch, size) \
197b8e80941Smrg   VS_CMD(((prefetch) << 20) | ((((size) >> 4) - 1) << 10), 0x10000040)
198b8e80941Smrg#define VS_CMD_VARYING_ATTRIBUTE_COUNT(nv, na) \
199b8e80941Smrg   VS_CMD((((nv) - 1) << 8) | (((na) - 1) << 24), 0x10000042)
200b8e80941Smrg#define VS_CMD_UNKNOWN1() VS_CMD(0x00000003, 0x10000041)
201b8e80941Smrg#define VS_CMD_UNKNOWN2() VS_CMD(0x00000000, 0x60000000)
202b8e80941Smrg#define VS_CMD_ATTRIBUTES_ADDRESS(addr, na) \
203b8e80941Smrg   VS_CMD(addr, 0x20000000 | ((na) << 17))
204b8e80941Smrg#define VS_CMD_VARYINGS_ADDRESS(addr, nv) \
205b8e80941Smrg   VS_CMD(addr, 0x20000008 | ((nv) << 17))
206b8e80941Smrg#define VS_CMD_DRAW(num, index_draw) \
207b8e80941Smrg   VS_CMD(((num) << 24) | ((index_draw) ? 1 : 0), ((num) >> 8))
208b8e80941Smrg
209b8e80941Smrgstatic inline bool
210b8e80941Smrglima_ctx_dirty(struct lima_context *ctx)
211b8e80941Smrg{
212b8e80941Smrg   return ctx->plbu_cmd_array.size;
213b8e80941Smrg}
214b8e80941Smrg
215b8e80941Smrgstatic bool
216b8e80941Smrglima_fb_need_reload(struct lima_context *ctx)
217b8e80941Smrg{
218b8e80941Smrg   /* Depth buffer is always discarded */
219b8e80941Smrg   if (!ctx->framebuffer.base.nr_cbufs)
220b8e80941Smrg      return false;
221b8e80941Smrg   if (ctx->damage.region) {
222b8e80941Smrg      /* for EGL_KHR_partial_update we just want to reload the
223b8e80941Smrg       * region not aligned to tile boundary */
224b8e80941Smrg      if (!ctx->damage.aligned)
225b8e80941Smrg         return true;
226b8e80941Smrg   }
227b8e80941Smrg   else {
228b8e80941Smrg      struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
229b8e80941Smrg      if (surf->reload)
230b8e80941Smrg         return true;
231b8e80941Smrg   }
232b8e80941Smrg
233b8e80941Smrg   return false;
234b8e80941Smrg}
235b8e80941Smrg
236b8e80941Smrgstatic void
237b8e80941Smrglima_pack_reload_plbu_cmd(struct lima_context *ctx)
238b8e80941Smrg{
239b8e80941Smrg   #define lima_reload_render_state_offset 0x0000
240b8e80941Smrg   #define lima_reload_gl_pos_offset       0x0040
241b8e80941Smrg   #define lima_reload_varying_offset      0x0080
242b8e80941Smrg   #define lima_reload_tex_desc_offset     0x00c0
243b8e80941Smrg   #define lima_reload_tex_array_offset    0x0100
244b8e80941Smrg   #define lima_reload_buffer_size         0x0140
245b8e80941Smrg
246b8e80941Smrg   void *cpu;
247b8e80941Smrg   unsigned offset;
248b8e80941Smrg   struct pipe_resource *pres = NULL;
249b8e80941Smrg   u_upload_alloc(ctx->uploader, 0, lima_reload_buffer_size,
250b8e80941Smrg                  0x40, &offset, &pres, &cpu);
251b8e80941Smrg
252b8e80941Smrg   struct lima_resource *res = lima_resource(pres);
253b8e80941Smrg   uint32_t va = res->bo->va + offset;
254b8e80941Smrg
255b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
256b8e80941Smrg
257b8e80941Smrg   uint32_t reload_shader_first_instr_size =
258b8e80941Smrg      ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f;
259b8e80941Smrg   uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset;
260b8e80941Smrg
261b8e80941Smrg   struct lima_render_state reload_render_state = {
262b8e80941Smrg      .alpha_blend = 0xf03b1ad2,
263b8e80941Smrg      .depth_test = 0x0000000e,
264b8e80941Smrg      .depth_range = 0xffff0000,
265b8e80941Smrg      .stencil_front = 0x00000007,
266b8e80941Smrg      .stencil_back = 0x00000007,
267b8e80941Smrg      .multi_sample = 0x0000f007,
268b8e80941Smrg      .shader_address = reload_shader_va | reload_shader_first_instr_size,
269b8e80941Smrg      .varying_types = 0x00000001,
270b8e80941Smrg      .textures_address = va + lima_reload_tex_array_offset,
271b8e80941Smrg      .aux0 = 0x00004021,
272b8e80941Smrg      .varyings_address = va + lima_reload_varying_offset,
273b8e80941Smrg   };
274b8e80941Smrg   memcpy(cpu + lima_reload_render_state_offset, &reload_render_state,
275b8e80941Smrg          sizeof(reload_render_state));
276b8e80941Smrg
277b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
278b8e80941Smrg   uint32_t *td = cpu + lima_reload_tex_desc_offset;
279b8e80941Smrg   memset(td, 0, lima_tex_desc_size);
280b8e80941Smrg   lima_texture_desc_set_res(ctx, td, fb->base.cbufs[0]->texture, 0, 0);
281b8e80941Smrg   td[1] = 0x00000480;
282b8e80941Smrg   td[2] |= 0x00093800;
283b8e80941Smrg   td[4] = 0x00000000;
284b8e80941Smrg   td[5] = 0x00000000;
285b8e80941Smrg
286b8e80941Smrg   uint32_t *ta = cpu + lima_reload_tex_array_offset;
287b8e80941Smrg   ta[0] = va + lima_reload_tex_desc_offset;
288b8e80941Smrg
289b8e80941Smrg   float reload_gl_pos[] = {
290b8e80941Smrg      fb->base.width, 0,               0, 1,
291b8e80941Smrg      0,              0,               0, 1,
292b8e80941Smrg      0,              fb->base.height, 0, 1,
293b8e80941Smrg   };
294b8e80941Smrg   memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos,
295b8e80941Smrg          sizeof(reload_gl_pos));
296b8e80941Smrg
297b8e80941Smrg   float reload_varying[] = {
298b8e80941Smrg      fb->base.width, 0,               0, 0,
299b8e80941Smrg      0,              fb->base.height, 0, 0,
300b8e80941Smrg   };
301b8e80941Smrg   memcpy(cpu + lima_reload_varying_offset, reload_varying,
302b8e80941Smrg          sizeof(reload_varying));
303b8e80941Smrg
304b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_READ);
305b8e80941Smrg   pipe_resource_reference(&pres, NULL);
306b8e80941Smrg
307b8e80941Smrg   PLBU_CMD_BEGIN(20);
308b8e80941Smrg
309b8e80941Smrg   PLBU_CMD_VIEWPORT_X(0);
310b8e80941Smrg   PLBU_CMD_VIEWPORT_W(fui(fb->base.width));
311b8e80941Smrg   PLBU_CMD_VIEWPORT_Y(0);
312b8e80941Smrg   PLBU_CMD_VIEWPORT_H(fui(fb->base.height));
313b8e80941Smrg
314b8e80941Smrg   PLBU_CMD_RSW_VERTEX_ARRAY(
315b8e80941Smrg      va + lima_reload_render_state_offset,
316b8e80941Smrg      va + lima_reload_gl_pos_offset);
317b8e80941Smrg
318b8e80941Smrg   PLBU_CMD_UNKNOWN2();
319b8e80941Smrg   PLBU_CMD_UNKNOWN1();
320b8e80941Smrg
321b8e80941Smrg   PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
322b8e80941Smrg   PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset);
323b8e80941Smrg   PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
324b8e80941Smrg
325b8e80941Smrg   PLBU_CMD_END();
326b8e80941Smrg}
327b8e80941Smrg
328b8e80941Smrgstatic void
329b8e80941Smrglima_pack_clear_plbu_cmd(struct lima_context *ctx)
330b8e80941Smrg{
331b8e80941Smrg   #define lima_clear_render_state_offset 0x0000
332b8e80941Smrg   #define lima_clear_shader_offset       0x0040
333b8e80941Smrg   #define lima_clear_buffer_size         0x0080
334b8e80941Smrg
335b8e80941Smrg   void *cpu;
336b8e80941Smrg   unsigned offset;
337b8e80941Smrg   struct pipe_resource *pres = NULL;
338b8e80941Smrg   u_upload_alloc(ctx->uploader, 0, lima_clear_buffer_size,
339b8e80941Smrg                  0x40, &offset, &pres, &cpu);
340b8e80941Smrg
341b8e80941Smrg   struct lima_resource *res = lima_resource(pres);
342b8e80941Smrg   uint32_t va = res->bo->va + offset;
343b8e80941Smrg
344b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
345b8e80941Smrg   uint32_t gl_pos_va = screen->pp_buffer->va + pp_clear_gl_pos_offset;
346b8e80941Smrg
347b8e80941Smrg   /* const0 clear_color, mov.v1 $0 ^const0.xxxx, stop */
348b8e80941Smrg   uint32_t clear_shader[] = {
349b8e80941Smrg      0x00021025, 0x0000000c,
350b8e80941Smrg      (ctx->clear.color_16pc << 12) | 0x000007cf,
351b8e80941Smrg      ctx->clear.color_16pc >> 12,
352b8e80941Smrg      ctx->clear.color_16pc >> 44,
353b8e80941Smrg   };
354b8e80941Smrg   memcpy(cpu + lima_clear_shader_offset, &clear_shader,
355b8e80941Smrg          sizeof(clear_shader));
356b8e80941Smrg
357b8e80941Smrg   uint32_t clear_shader_va = va + lima_clear_shader_offset;
358b8e80941Smrg   uint32_t clear_shader_first_instr_size = clear_shader[0] & 0x1f;
359b8e80941Smrg
360b8e80941Smrg   struct lima_render_state clear_render_state = {
361b8e80941Smrg      .blend_color_bg = 0x00800080,
362b8e80941Smrg      .blend_color_ra = 0x00ff0080,
363b8e80941Smrg      .alpha_blend = 0xfc321892,
364b8e80941Smrg      .depth_test = 0x0000003e,
365b8e80941Smrg      .depth_range = 0xffff0000,
366b8e80941Smrg      .stencil_front = 0x00000007,
367b8e80941Smrg      .stencil_back = 0x00000007,
368b8e80941Smrg      .multi_sample = 0x0000f007,
369b8e80941Smrg      .shader_address = clear_shader_va | clear_shader_first_instr_size,
370b8e80941Smrg   };
371b8e80941Smrg   memcpy(cpu + lima_clear_render_state_offset, &clear_render_state,
372b8e80941Smrg          sizeof(clear_render_state));
373b8e80941Smrg
374b8e80941Smrg   PLBU_CMD_BEGIN(22);
375b8e80941Smrg
376b8e80941Smrg   PLBU_CMD_VIEWPORT_X(0);
377b8e80941Smrg   PLBU_CMD_VIEWPORT_W(0x45800000);
378b8e80941Smrg   PLBU_CMD_VIEWPORT_Y(0);
379b8e80941Smrg   PLBU_CMD_VIEWPORT_H(0x45800000);
380b8e80941Smrg
381b8e80941Smrg   struct pipe_scissor_state *scissor = &ctx->scissor;
382b8e80941Smrg   PLBU_CMD_SCISSORS(scissor->minx, scissor->maxx, scissor->miny, scissor->maxy);
383b8e80941Smrg
384b8e80941Smrg   PLBU_CMD_RSW_VERTEX_ARRAY(va + lima_clear_render_state_offset, gl_pos_va);
385b8e80941Smrg
386b8e80941Smrg   PLBU_CMD_UNKNOWN2();
387b8e80941Smrg   PLBU_CMD_UNKNOWN1();
388b8e80941Smrg
389b8e80941Smrg   PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
390b8e80941Smrg   PLBU_CMD_INDEXED_DEST(gl_pos_va);
391b8e80941Smrg   PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
392b8e80941Smrg
393b8e80941Smrg   PLBU_CMD_END();
394b8e80941Smrg}
395b8e80941Smrg
396b8e80941Smrgstatic void
397b8e80941Smrglima_pack_head_plbu_cmd(struct lima_context *ctx)
398b8e80941Smrg{
399b8e80941Smrg   /* first draw need create a PLBU command header */
400b8e80941Smrg   if (lima_ctx_dirty(ctx))
401b8e80941Smrg      return;
402b8e80941Smrg
403b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
404b8e80941Smrg
405b8e80941Smrg   PLBU_CMD_BEGIN(10);
406b8e80941Smrg
407b8e80941Smrg   PLBU_CMD_UNKNOWN2();
408b8e80941Smrg   PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w);
409b8e80941Smrg   PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h);
410b8e80941Smrg   PLBU_CMD_BLOCK_STRIDE(fb->block_w);
411b8e80941Smrg
412b8e80941Smrg   PLBU_CMD_ARRAY_ADDRESS(
413b8e80941Smrg      ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size,
414b8e80941Smrg      fb->block_w * fb->block_h);
415b8e80941Smrg
416b8e80941Smrg   PLBU_CMD_END();
417b8e80941Smrg
418b8e80941Smrg   if (lima_fb_need_reload(ctx))
419b8e80941Smrg      lima_pack_reload_plbu_cmd(ctx);
420b8e80941Smrg}
421b8e80941Smrg
422b8e80941Smrgstatic bool
423b8e80941Smrglima_is_scissor_zero(struct lima_context *ctx)
424b8e80941Smrg{
425b8e80941Smrg   if (!ctx->rasterizer || !ctx->rasterizer->base.scissor)
426b8e80941Smrg      return false;
427b8e80941Smrg
428b8e80941Smrg   struct pipe_scissor_state *scissor = &ctx->scissor;
429b8e80941Smrg   return
430b8e80941Smrg      scissor->minx == scissor->maxx
431b8e80941Smrg      && scissor->miny == scissor->maxy;
432b8e80941Smrg}
433b8e80941Smrg
434b8e80941Smrgstatic bool
435b8e80941Smrglima_is_scissor_full_fb(struct lima_context *ctx)
436b8e80941Smrg{
437b8e80941Smrg   if (!ctx->rasterizer || !ctx->rasterizer->base.scissor)
438b8e80941Smrg      return true;
439b8e80941Smrg
440b8e80941Smrg   struct pipe_scissor_state *scissor = &ctx->scissor;
441b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
442b8e80941Smrg   return
443b8e80941Smrg      scissor->minx == 0 && scissor->maxx == fb->base.width &&
444b8e80941Smrg      scissor->miny == 0 && scissor->maxy == fb->base.height;
445b8e80941Smrg}
446b8e80941Smrg
447b8e80941Smrgstatic void
448b8e80941Smrghilbert_rotate(int n, int *x, int *y, int rx, int ry)
449b8e80941Smrg{
450b8e80941Smrg   if (ry == 0) {
451b8e80941Smrg      if (rx == 1) {
452b8e80941Smrg         *x = n-1 - *x;
453b8e80941Smrg         *y = n-1 - *y;
454b8e80941Smrg      }
455b8e80941Smrg
456b8e80941Smrg      /* Swap x and y */
457b8e80941Smrg      int t  = *x;
458b8e80941Smrg      *x = *y;
459b8e80941Smrg      *y = t;
460b8e80941Smrg   }
461b8e80941Smrg}
462b8e80941Smrg
463b8e80941Smrgstatic void
464b8e80941Smrghilbert_coords(int n, int d, int *x, int *y)
465b8e80941Smrg{
466b8e80941Smrg   int rx, ry, i, t=d;
467b8e80941Smrg
468b8e80941Smrg   *x = *y = 0;
469b8e80941Smrg
470b8e80941Smrg   for (i = 0; (1 << i) < n; i++) {
471b8e80941Smrg
472b8e80941Smrg      rx = 1 & (t / 2);
473b8e80941Smrg      ry = 1 & (t ^ rx);
474b8e80941Smrg
475b8e80941Smrg      hilbert_rotate(1 << i, x, y, rx, ry);
476b8e80941Smrg
477b8e80941Smrg      *x += rx << i;
478b8e80941Smrg      *y += ry << i;
479b8e80941Smrg
480b8e80941Smrg      t /= 4;
481b8e80941Smrg   }
482b8e80941Smrg}
483b8e80941Smrg
484b8e80941Smrgstatic int
485b8e80941Smrglima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off)
486b8e80941Smrg{
487b8e80941Smrg   /* carefully calculate each stream start address:
488b8e80941Smrg    * 1. overflow: each stream size may be different due to
489b8e80941Smrg    *    fb->tiled_w * fb->tiled_h can't be divided by num_pp,
490b8e80941Smrg    *    extra size should be added to the preceeding stream
491b8e80941Smrg    * 2. alignment: each stream address should be 0x20 aligned
492b8e80941Smrg    */
493b8e80941Smrg   int delta = tiled_w * tiled_h / num_pp * 16 + 8;
494b8e80941Smrg   int remain = tiled_w * tiled_h % num_pp;
495b8e80941Smrg   int offset = 0;
496b8e80941Smrg
497b8e80941Smrg   for (int i = 0; i < num_pp; i++) {
498b8e80941Smrg      off[i] = offset;
499b8e80941Smrg
500b8e80941Smrg      offset += delta;
501b8e80941Smrg      if (remain) {
502b8e80941Smrg         offset += 16;
503b8e80941Smrg         remain--;
504b8e80941Smrg      }
505b8e80941Smrg      offset = align(offset, 0x20);
506b8e80941Smrg   }
507b8e80941Smrg
508b8e80941Smrg   return offset;
509b8e80941Smrg}
510b8e80941Smrg
511b8e80941Smrgstatic bool
512b8e80941Smrginside_damage_region(int x, int y, struct lima_damage_state *ds)
513b8e80941Smrg{
514b8e80941Smrg   if (!ds->region)
515b8e80941Smrg      return true;
516b8e80941Smrg
517b8e80941Smrg   for (int i = 0; i < ds->num_region; i++) {
518b8e80941Smrg      struct pipe_scissor_state *ss = ds->region + i;
519b8e80941Smrg      if (x >= ss->minx && x < ss->maxx &&
520b8e80941Smrg          y >= ss->miny && y < ss->maxy)
521b8e80941Smrg         return true;
522b8e80941Smrg   }
523b8e80941Smrg
524b8e80941Smrg   return false;
525b8e80941Smrg}
526b8e80941Smrg
527b8e80941Smrgstatic void
528b8e80941Smrglima_update_pp_stream(struct lima_context *ctx, int off_x, int off_y,
529b8e80941Smrg                      int tiled_w, int tiled_h)
530b8e80941Smrg{
531b8e80941Smrg   struct lima_pp_stream_state *ps = &ctx->pp_stream;
532b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
533b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
534b8e80941Smrg   int i, num_pp = screen->num_pp;
535b8e80941Smrg
536b8e80941Smrg   /* use hilbert_coords to generates 1D to 2D relationship.
537b8e80941Smrg    * 1D for pp stream index and 2D for plb block x/y on framebuffer.
538b8e80941Smrg    * if multi pp, interleave the 1D index to make each pp's render target
539b8e80941Smrg    * close enough which should result close workload
540b8e80941Smrg    */
541b8e80941Smrg   int max = MAX2(tiled_w, tiled_h);
542b8e80941Smrg   int dim = util_logbase2_ceil(max);
543b8e80941Smrg   int count = 1 << (dim + dim);
544b8e80941Smrg   int index = 0;
545b8e80941Smrg   uint32_t *stream[4];
546b8e80941Smrg   int si[4] = {0};
547b8e80941Smrg
548b8e80941Smrg   for (i = 0; i < num_pp; i++)
549b8e80941Smrg      stream[i] = ps->bo->map + ps->bo_offset + ps->offset[i];
550b8e80941Smrg
551b8e80941Smrg   for (i = 0; i < count; i++) {
552b8e80941Smrg      int x, y;
553b8e80941Smrg      hilbert_coords(max, i, &x, &y);
554b8e80941Smrg      if (x < tiled_w && y < tiled_h) {
555b8e80941Smrg         x += off_x;
556b8e80941Smrg         y += off_y;
557b8e80941Smrg
558b8e80941Smrg         if (!inside_damage_region(x, y, &ctx->damage))
559b8e80941Smrg            continue;
560b8e80941Smrg
561b8e80941Smrg         int pp = index % num_pp;
562b8e80941Smrg         int offset = ((y >> fb->shift_h) * fb->block_w +
563b8e80941Smrg                       (x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE;
564b8e80941Smrg         int plb_va = ctx->plb[ctx->plb_index]->va + offset;
565b8e80941Smrg
566b8e80941Smrg         stream[pp][si[pp]++] = 0;
567b8e80941Smrg         stream[pp][si[pp]++] = 0xB8000000 | x | (y << 8);
568b8e80941Smrg         stream[pp][si[pp]++] = 0xE0000002 | ((plb_va >> 3) & ~0xE0000003);
569b8e80941Smrg         stream[pp][si[pp]++] = 0xB0000000;
570b8e80941Smrg
571b8e80941Smrg         index++;
572b8e80941Smrg      }
573b8e80941Smrg   }
574b8e80941Smrg
575b8e80941Smrg   for (i = 0; i < num_pp; i++) {
576b8e80941Smrg      stream[i][si[i]++] = 0;
577b8e80941Smrg      stream[i][si[i]++] = 0xBC000000;
578b8e80941Smrg
579b8e80941Smrg      lima_dump_command_stream_print(
580b8e80941Smrg         stream[i], si[i] * 4, false, "pp plb stream %d at va %x\n",
581b8e80941Smrg         i, ps->bo->va + ps->bo_offset + ps->offset[i]);
582b8e80941Smrg   }
583b8e80941Smrg}
584b8e80941Smrg
585b8e80941Smrgstatic void
586b8e80941Smrglima_update_damage_pp_stream(struct lima_context *ctx)
587b8e80941Smrg{
588b8e80941Smrg   struct lima_damage_state *ds = &ctx->damage;
589b8e80941Smrg   struct pipe_scissor_state max = ds->region[0];
590b8e80941Smrg
591b8e80941Smrg   /* find a max region to cover all the damage region */
592b8e80941Smrg   for (int i = 1; i < ds->num_region; i++) {
593b8e80941Smrg      struct pipe_scissor_state *ss = ds->region + i;
594b8e80941Smrg      max.minx = MIN2(max.minx, ss->minx);
595b8e80941Smrg      max.miny = MIN2(max.miny, ss->miny);
596b8e80941Smrg      max.maxx = MAX2(max.maxx, ss->maxx);
597b8e80941Smrg      max.maxy = MAX2(max.maxy, ss->maxy);
598b8e80941Smrg   }
599b8e80941Smrg
600b8e80941Smrg   int tiled_w = max.maxx - max.minx;
601b8e80941Smrg   int tiled_h = max.maxy - max.miny;
602b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
603b8e80941Smrg   int size = lima_get_pp_stream_size(
604b8e80941Smrg      screen->num_pp, tiled_w, tiled_h, ctx->pp_stream.offset);
605b8e80941Smrg
606b8e80941Smrg   void *cpu;
607b8e80941Smrg   unsigned offset;
608b8e80941Smrg   struct pipe_resource *pres = NULL;
609b8e80941Smrg   u_upload_alloc(ctx->uploader, 0, size, 0x40, &offset, &pres, &cpu);
610b8e80941Smrg
611b8e80941Smrg   struct lima_resource *res = lima_resource(pres);
612b8e80941Smrg   ctx->pp_stream.bo = res->bo;
613b8e80941Smrg   ctx->pp_stream.bo_offset = offset;
614b8e80941Smrg
615b8e80941Smrg   lima_update_pp_stream(ctx, max.minx, max.miny, tiled_w, tiled_h);
616b8e80941Smrg
617b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_READ);
618b8e80941Smrg   pipe_resource_reference(&pres, NULL);
619b8e80941Smrg}
620b8e80941Smrg
621b8e80941Smrgstatic void
622b8e80941Smrglima_update_full_pp_stream(struct lima_context *ctx)
623b8e80941Smrg{
624b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
625b8e80941Smrg   struct lima_ctx_plb_pp_stream_key key = {
626b8e80941Smrg      .plb_index = ctx->plb_index,
627b8e80941Smrg      .tiled_w = fb->tiled_w,
628b8e80941Smrg      .tiled_h = fb->tiled_h,
629b8e80941Smrg   };
630b8e80941Smrg
631b8e80941Smrg   struct hash_entry *entry =
632b8e80941Smrg      _mesa_hash_table_search(ctx->plb_pp_stream, &key);
633b8e80941Smrg   struct lima_ctx_plb_pp_stream *s = entry->data;
634b8e80941Smrg
635b8e80941Smrg   if (s->bo) {
636b8e80941Smrg      ctx->pp_stream.bo = s->bo;
637b8e80941Smrg      ctx->pp_stream.bo_offset = 0;
638b8e80941Smrg      memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
639b8e80941Smrg   }
640b8e80941Smrg   else {
641b8e80941Smrg      struct lima_screen *screen = lima_screen(ctx->base.screen);
642b8e80941Smrg      int size = lima_get_pp_stream_size(
643b8e80941Smrg         screen->num_pp, fb->tiled_w, fb->tiled_h, s->offset);
644b8e80941Smrg      s->bo = lima_bo_create(screen, size, 0);
645b8e80941Smrg      lima_bo_map(s->bo);
646b8e80941Smrg
647b8e80941Smrg      ctx->pp_stream.bo = s->bo;
648b8e80941Smrg      ctx->pp_stream.bo_offset = 0;
649b8e80941Smrg      memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
650b8e80941Smrg
651b8e80941Smrg      lima_update_pp_stream(ctx, 0, 0, fb->tiled_w, fb->tiled_h);
652b8e80941Smrg   }
653b8e80941Smrg
654b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, s->bo, LIMA_SUBMIT_BO_READ);
655b8e80941Smrg}
656b8e80941Smrg
657b8e80941Smrgstatic void
658b8e80941Smrglima_update_submit_bo(struct lima_context *ctx)
659b8e80941Smrg{
660b8e80941Smrg   if (lima_ctx_dirty(ctx))
661b8e80941Smrg      return;
662b8e80941Smrg
663b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
664b8e80941Smrg   lima_submit_add_bo(ctx->gp_submit, ctx->plb_gp_stream, LIMA_SUBMIT_BO_READ);
665b8e80941Smrg   lima_submit_add_bo(ctx->gp_submit, ctx->plb[ctx->plb_index], LIMA_SUBMIT_BO_WRITE);
666b8e80941Smrg   lima_submit_add_bo(ctx->gp_submit, ctx->gp_tile_heap[ctx->plb_index], LIMA_SUBMIT_BO_WRITE);
667b8e80941Smrg
668b8e80941Smrg   lima_dump_command_stream_print(
669b8e80941Smrg      ctx->plb_gp_stream->map + ctx->plb_index * ctx->plb_gp_size,
670b8e80941Smrg      ctx->plb_gp_size, false, "gp plb stream at va %x\n",
671b8e80941Smrg      ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size);
672b8e80941Smrg
673b8e80941Smrg   if (ctx->damage.region)
674b8e80941Smrg      lima_update_damage_pp_stream(ctx);
675b8e80941Smrg   else if (ctx->plb_pp_stream)
676b8e80941Smrg      lima_update_full_pp_stream(ctx);
677b8e80941Smrg   else
678b8e80941Smrg      ctx->pp_stream.bo = NULL;
679b8e80941Smrg
680b8e80941Smrg   if (ctx->framebuffer.base.nr_cbufs) {
681b8e80941Smrg      struct lima_resource *res = lima_resource(ctx->framebuffer.base.cbufs[0]->texture);
682b8e80941Smrg      lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_WRITE);
683b8e80941Smrg   }
684b8e80941Smrg   if (ctx->framebuffer.base.zsbuf) {
685b8e80941Smrg      struct lima_resource *res = lima_resource(ctx->framebuffer.base.zsbuf->texture);
686b8e80941Smrg      lima_submit_add_bo(ctx->pp_submit, res->bo, LIMA_SUBMIT_BO_WRITE);
687b8e80941Smrg   }
688b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, ctx->plb[ctx->plb_index], LIMA_SUBMIT_BO_READ);
689b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, ctx->gp_tile_heap[ctx->plb_index], LIMA_SUBMIT_BO_READ);
690b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, screen->pp_buffer, LIMA_SUBMIT_BO_READ);
691b8e80941Smrg}
692b8e80941Smrg
693b8e80941Smrgstatic void
694b8e80941Smrglima_clear(struct pipe_context *pctx, unsigned buffers,
695b8e80941Smrg           const union pipe_color_union *color, double depth, unsigned stencil)
696b8e80941Smrg{
697b8e80941Smrg   struct lima_context *ctx = lima_context(pctx);
698b8e80941Smrg   bool full_fb_clear = lima_is_scissor_full_fb(ctx);
699b8e80941Smrg
700b8e80941Smrg   if (full_fb_clear) {
701b8e80941Smrg      lima_flush(ctx);
702b8e80941Smrg
703b8e80941Smrg      /* no need to reload if cleared */
704b8e80941Smrg      if (ctx->framebuffer.base.nr_cbufs && (buffers & PIPE_CLEAR_COLOR0)) {
705b8e80941Smrg         struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
706b8e80941Smrg         surf->reload = false;
707b8e80941Smrg      }
708b8e80941Smrg   }
709b8e80941Smrg
710b8e80941Smrg   struct lima_context_clear *clear = &ctx->clear;
711b8e80941Smrg   clear->buffers = buffers;
712b8e80941Smrg
713b8e80941Smrg   if (buffers & PIPE_CLEAR_COLOR0) {
714b8e80941Smrg      clear->color_8pc =
715b8e80941Smrg         ((uint32_t)float_to_ubyte(color->f[3]) << 24) |
716b8e80941Smrg         ((uint32_t)float_to_ubyte(color->f[2]) << 16) |
717b8e80941Smrg         ((uint32_t)float_to_ubyte(color->f[1]) << 8) |
718b8e80941Smrg         float_to_ubyte(color->f[0]);
719b8e80941Smrg
720b8e80941Smrg      clear->color_16pc =
721b8e80941Smrg         ((uint64_t)float_to_ushort(color->f[3]) << 48) |
722b8e80941Smrg         ((uint64_t)float_to_ushort(color->f[2]) << 32) |
723b8e80941Smrg         ((uint64_t)float_to_ushort(color->f[1]) << 16) |
724b8e80941Smrg         float_to_ushort(color->f[0]);
725b8e80941Smrg   }
726b8e80941Smrg
727b8e80941Smrg   if (buffers & PIPE_CLEAR_DEPTH)
728b8e80941Smrg      clear->depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth);
729b8e80941Smrg
730b8e80941Smrg   if (buffers & PIPE_CLEAR_STENCIL)
731b8e80941Smrg      clear->stencil = stencil;
732b8e80941Smrg
733b8e80941Smrg   lima_update_submit_bo(ctx);
734b8e80941Smrg
735b8e80941Smrg   lima_pack_head_plbu_cmd(ctx);
736b8e80941Smrg
737b8e80941Smrg   /* partial clear */
738b8e80941Smrg   if (!full_fb_clear)
739b8e80941Smrg      lima_pack_clear_plbu_cmd(ctx);
740b8e80941Smrg
741b8e80941Smrg   ctx->dirty |= LIMA_CONTEXT_DIRTY_CLEAR;
742b8e80941Smrg}
743b8e80941Smrg
744b8e80941Smrgenum lima_attrib_type {
745b8e80941Smrg   LIMA_ATTRIB_FLOAT = 0x000,
746b8e80941Smrg   /* todo: find out what lives here. */
747b8e80941Smrg   LIMA_ATTRIB_I16   = 0x004,
748b8e80941Smrg   LIMA_ATTRIB_U16   = 0x005,
749b8e80941Smrg   LIMA_ATTRIB_I8    = 0x006,
750b8e80941Smrg   LIMA_ATTRIB_U8    = 0x007,
751b8e80941Smrg   LIMA_ATTRIB_I8N   = 0x008,
752b8e80941Smrg   LIMA_ATTRIB_U8N   = 0x009,
753b8e80941Smrg   LIMA_ATTRIB_I16N  = 0x00A,
754b8e80941Smrg   LIMA_ATTRIB_U16N  = 0x00B,
755b8e80941Smrg   /* todo: where is the 32 int */
756b8e80941Smrg   /* todo: find out what lives here. */
757b8e80941Smrg   LIMA_ATTRIB_FIXED = 0x101
758b8e80941Smrg};
759b8e80941Smrg
760b8e80941Smrgstatic enum lima_attrib_type
761b8e80941Smrglima_pipe_format_to_attrib_type(enum pipe_format format)
762b8e80941Smrg{
763b8e80941Smrg   const struct util_format_description *desc = util_format_description(format);
764b8e80941Smrg   int i = util_format_get_first_non_void_channel(format);
765b8e80941Smrg   const struct util_format_channel_description *c = desc->channel + i;
766b8e80941Smrg
767b8e80941Smrg   switch (c->type) {
768b8e80941Smrg   case UTIL_FORMAT_TYPE_FLOAT:
769b8e80941Smrg      return LIMA_ATTRIB_FLOAT;
770b8e80941Smrg   case UTIL_FORMAT_TYPE_FIXED:
771b8e80941Smrg      return LIMA_ATTRIB_FIXED;
772b8e80941Smrg   case UTIL_FORMAT_TYPE_SIGNED:
773b8e80941Smrg      if (c->size == 8) {
774b8e80941Smrg         if (c->normalized)
775b8e80941Smrg            return LIMA_ATTRIB_I8N;
776b8e80941Smrg         else
777b8e80941Smrg            return LIMA_ATTRIB_I8;
778b8e80941Smrg      }
779b8e80941Smrg      else if (c->size == 16) {
780b8e80941Smrg         if (c->normalized)
781b8e80941Smrg            return LIMA_ATTRIB_I16N;
782b8e80941Smrg         else
783b8e80941Smrg            return LIMA_ATTRIB_I16;
784b8e80941Smrg      }
785b8e80941Smrg      break;
786b8e80941Smrg   case UTIL_FORMAT_TYPE_UNSIGNED:
787b8e80941Smrg      if (c->size == 8) {
788b8e80941Smrg         if (c->normalized)
789b8e80941Smrg            return LIMA_ATTRIB_U8N;
790b8e80941Smrg         else
791b8e80941Smrg            return LIMA_ATTRIB_U8;
792b8e80941Smrg      }
793b8e80941Smrg      else if (c->size == 16) {
794b8e80941Smrg         if (c->normalized)
795b8e80941Smrg            return LIMA_ATTRIB_U16N;
796b8e80941Smrg         else
797b8e80941Smrg            return LIMA_ATTRIB_U16;
798b8e80941Smrg      }
799b8e80941Smrg      break;
800b8e80941Smrg   }
801b8e80941Smrg
802b8e80941Smrg   return LIMA_ATTRIB_FLOAT;
803b8e80941Smrg}
804b8e80941Smrg
805b8e80941Smrgstatic void
806b8e80941Smrglima_pack_vs_cmd(struct lima_context *ctx, const struct pipe_draw_info *info)
807b8e80941Smrg{
808b8e80941Smrg   VS_CMD_BEGIN(24);
809b8e80941Smrg
810b8e80941Smrg   if (!info->index_size) {
811b8e80941Smrg      VS_CMD_ARRAYS_SEMAPHORE_BEGIN_1();
812b8e80941Smrg      VS_CMD_ARRAYS_SEMAPHORE_BEGIN_2();
813b8e80941Smrg   }
814b8e80941Smrg
815b8e80941Smrg   int uniform_size = ctx->vs->uniform_pending_offset + ctx->vs->constant_size + 32;
816b8e80941Smrg   VS_CMD_UNIFORMS_ADDRESS(
817b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_gp_uniform, LIMA_CTX_BUFF_SUBMIT_GP),
818b8e80941Smrg      align(uniform_size, 16));
819b8e80941Smrg
820b8e80941Smrg   VS_CMD_SHADER_ADDRESS(ctx->vs->bo->va, ctx->vs->shader_size);
821b8e80941Smrg   VS_CMD_SHADER_INFO(ctx->vs->prefetch, ctx->vs->shader_size);
822b8e80941Smrg
823b8e80941Smrg   int num_varryings = ctx->vs->num_varying;
824b8e80941Smrg   int num_attributes = ctx->vertex_elements->num_elements;
825b8e80941Smrg   VS_CMD_VARYING_ATTRIBUTE_COUNT(num_varryings, num_attributes);
826b8e80941Smrg
827b8e80941Smrg   VS_CMD_UNKNOWN1();
828b8e80941Smrg
829b8e80941Smrg   VS_CMD_ATTRIBUTES_ADDRESS(
830b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_gp_attribute_info, LIMA_CTX_BUFF_SUBMIT_GP),
831b8e80941Smrg      num_attributes);
832b8e80941Smrg
833b8e80941Smrg   VS_CMD_VARYINGS_ADDRESS(
834b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_gp_varying_info, LIMA_CTX_BUFF_SUBMIT_GP),
835b8e80941Smrg      num_varryings);
836b8e80941Smrg
837b8e80941Smrg   unsigned num = info->index_size ? (ctx->max_index - ctx->min_index + 1) : info->count;
838b8e80941Smrg   VS_CMD_DRAW(num, info->index_size);
839b8e80941Smrg
840b8e80941Smrg   VS_CMD_UNKNOWN2();
841b8e80941Smrg
842b8e80941Smrg   VS_CMD_ARRAYS_SEMAPHORE_END(info->index_size);
843b8e80941Smrg
844b8e80941Smrg   VS_CMD_END();
845b8e80941Smrg}
846b8e80941Smrg
847b8e80941Smrgstatic void
848b8e80941Smrglima_pack_plbu_cmd(struct lima_context *ctx, const struct pipe_draw_info *info)
849b8e80941Smrg{
850b8e80941Smrg   lima_pack_head_plbu_cmd(ctx);
851b8e80941Smrg
852b8e80941Smrg   /* If it's zero scissor, we skip adding all other commands */
853b8e80941Smrg   if (lima_is_scissor_zero(ctx))
854b8e80941Smrg      return;
855b8e80941Smrg
856b8e80941Smrg   PLBU_CMD_BEGIN(30);
857b8e80941Smrg
858b8e80941Smrg   PLBU_CMD_VIEWPORT_X(fui(ctx->viewport.x));
859b8e80941Smrg   PLBU_CMD_VIEWPORT_W(fui(ctx->viewport.width));
860b8e80941Smrg   PLBU_CMD_VIEWPORT_Y(fui(ctx->viewport.y));
861b8e80941Smrg   PLBU_CMD_VIEWPORT_H(fui(ctx->viewport.height));
862b8e80941Smrg
863b8e80941Smrg   if (!info->index_size)
864b8e80941Smrg      PLBU_CMD_ARRAYS_SEMAPHORE_BEGIN();
865b8e80941Smrg
866b8e80941Smrg   bool low_prim = info->mode < PIPE_PRIM_TRIANGLES;
867b8e80941Smrg   int cf = ctx->rasterizer->base.cull_face;
868b8e80941Smrg   int ccw = ctx->rasterizer->base.front_ccw;
869b8e80941Smrg   uint32_t cull = 0;
870b8e80941Smrg   if (cf != PIPE_FACE_NONE) {
871b8e80941Smrg      if (cf & PIPE_FACE_FRONT)
872b8e80941Smrg         cull |= ccw ? 0x00040000 : 0x00020000;
873b8e80941Smrg      if (cf & PIPE_FACE_BACK)
874b8e80941Smrg         cull |= ccw ? 0x00020000 : 0x00040000;
875b8e80941Smrg   }
876b8e80941Smrg   PLBU_CMD_PRIMITIVE_SETUP(low_prim, cull, info->index_size);
877b8e80941Smrg
878b8e80941Smrg   uint32_t gl_position_va =
879b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos,
880b8e80941Smrg                       LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP);
881b8e80941Smrg   PLBU_CMD_RSW_VERTEX_ARRAY(
882b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_pp_plb_rsw, LIMA_CTX_BUFF_SUBMIT_PP),
883b8e80941Smrg      gl_position_va);
884b8e80941Smrg
885b8e80941Smrg   /* TODO
886b8e80941Smrg    * - we should set it only for the first draw that enabled the scissor and for
887b8e80941Smrg    *   latter draw only if scissor is dirty
888b8e80941Smrg    */
889b8e80941Smrg   if (ctx->rasterizer->base.scissor) {
890b8e80941Smrg      struct pipe_scissor_state *scissor = &ctx->scissor;
891b8e80941Smrg      PLBU_CMD_SCISSORS(scissor->minx, scissor->maxx, scissor->miny, scissor->maxy);
892b8e80941Smrg   }
893b8e80941Smrg
894b8e80941Smrg   PLBU_CMD_UNKNOWN1();
895b8e80941Smrg
896b8e80941Smrg   PLBU_CMD_DEPTH_RANGE_NEAR(fui(ctx->viewport.near));
897b8e80941Smrg   PLBU_CMD_DEPTH_RANGE_FAR(fui(ctx->viewport.far));
898b8e80941Smrg
899b8e80941Smrg   if (low_prim) {
900b8e80941Smrg      uint32_t v = info->mode == PIPE_PRIM_POINTS ?
901b8e80941Smrg         fui(ctx->rasterizer->base.point_size) : fui(ctx->rasterizer->base.line_width);
902b8e80941Smrg      PLBU_CMD_LOW_PRIM_SIZE(v);
903b8e80941Smrg   }
904b8e80941Smrg
905b8e80941Smrg   if (info->index_size) {
906b8e80941Smrg      PLBU_CMD_INDEXED_DEST(gl_position_va);
907b8e80941Smrg
908b8e80941Smrg      struct pipe_resource *indexbuf = NULL;
909b8e80941Smrg      unsigned index_offset = 0;
910b8e80941Smrg      struct lima_resource *res;
911b8e80941Smrg      if (info->has_user_indices) {
912b8e80941Smrg         util_upload_index_buffer(&ctx->base, info, &indexbuf, &index_offset);
913b8e80941Smrg         res = lima_resource(indexbuf);
914b8e80941Smrg      }
915b8e80941Smrg      else
916b8e80941Smrg         res = lima_resource(info->index.resource);
917b8e80941Smrg
918b8e80941Smrg      lima_submit_add_bo(ctx->gp_submit, res->bo, LIMA_SUBMIT_BO_READ);
919b8e80941Smrg      PLBU_CMD_INDICES(res->bo->va + info->start * info->index_size + index_offset);
920b8e80941Smrg
921b8e80941Smrg      if (indexbuf)
922b8e80941Smrg         pipe_resource_reference(&indexbuf, NULL);
923b8e80941Smrg   }
924b8e80941Smrg   else {
925b8e80941Smrg      /* can this make the attribute info static? */
926b8e80941Smrg      PLBU_CMD_DRAW_ARRAYS(info->mode, info->start, info->count);
927b8e80941Smrg   }
928b8e80941Smrg
929b8e80941Smrg   PLBU_CMD_ARRAYS_SEMAPHORE_END();
930b8e80941Smrg
931b8e80941Smrg   if (info->index_size)
932b8e80941Smrg      PLBU_CMD_DRAW_ELEMENTS(info->mode, ctx->min_index, info->count);
933b8e80941Smrg
934b8e80941Smrg   PLBU_CMD_END();
935b8e80941Smrg}
936b8e80941Smrg
937b8e80941Smrgstatic int
938b8e80941Smrglima_blend_func(enum pipe_blend_func pipe)
939b8e80941Smrg{
940b8e80941Smrg   switch (pipe) {
941b8e80941Smrg   case PIPE_BLEND_ADD:
942b8e80941Smrg      return 2;
943b8e80941Smrg   case PIPE_BLEND_SUBTRACT:
944b8e80941Smrg      return 0;
945b8e80941Smrg   case PIPE_BLEND_REVERSE_SUBTRACT:
946b8e80941Smrg      return 1;
947b8e80941Smrg   case PIPE_BLEND_MIN:
948b8e80941Smrg      return 4;
949b8e80941Smrg   case PIPE_BLEND_MAX:
950b8e80941Smrg      return 5;
951b8e80941Smrg   }
952b8e80941Smrg   return -1;
953b8e80941Smrg}
954b8e80941Smrg
955b8e80941Smrgstatic int
956b8e80941Smrglima_blend_factor(enum pipe_blendfactor pipe)
957b8e80941Smrg{
958b8e80941Smrg   switch (pipe) {
959b8e80941Smrg   case PIPE_BLENDFACTOR_ONE:
960b8e80941Smrg      return 11;
961b8e80941Smrg   case PIPE_BLENDFACTOR_SRC_COLOR:
962b8e80941Smrg      return 0;
963b8e80941Smrg   case PIPE_BLENDFACTOR_SRC_ALPHA:
964b8e80941Smrg      return 16;
965b8e80941Smrg   case PIPE_BLENDFACTOR_DST_ALPHA:
966b8e80941Smrg      return 17;
967b8e80941Smrg   case PIPE_BLENDFACTOR_DST_COLOR:
968b8e80941Smrg      return 1;
969b8e80941Smrg   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
970b8e80941Smrg      return 7;
971b8e80941Smrg   case PIPE_BLENDFACTOR_CONST_COLOR:
972b8e80941Smrg      return 2;
973b8e80941Smrg   case PIPE_BLENDFACTOR_CONST_ALPHA:
974b8e80941Smrg      return 18;
975b8e80941Smrg   case PIPE_BLENDFACTOR_ZERO:
976b8e80941Smrg      return 3;
977b8e80941Smrg   case PIPE_BLENDFACTOR_INV_SRC_COLOR:
978b8e80941Smrg      return 8;
979b8e80941Smrg   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
980b8e80941Smrg      return 24;
981b8e80941Smrg   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
982b8e80941Smrg      return 25;
983b8e80941Smrg   case PIPE_BLENDFACTOR_INV_DST_COLOR:
984b8e80941Smrg      return 9;
985b8e80941Smrg   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
986b8e80941Smrg      return 10;
987b8e80941Smrg   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
988b8e80941Smrg      return 26;
989b8e80941Smrg   case PIPE_BLENDFACTOR_SRC1_COLOR:
990b8e80941Smrg   case PIPE_BLENDFACTOR_SRC1_ALPHA:
991b8e80941Smrg   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
992b8e80941Smrg   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
993b8e80941Smrg      return -1; /* not support */
994b8e80941Smrg   }
995b8e80941Smrg   return -1;
996b8e80941Smrg}
997b8e80941Smrg
998b8e80941Smrgstatic int
999b8e80941Smrglima_calculate_alpha_blend(enum pipe_blend_func rgb_func, enum pipe_blend_func alpha_func,
1000b8e80941Smrg                           enum pipe_blendfactor rgb_src_factor, enum pipe_blendfactor rgb_dst_factor,
1001b8e80941Smrg                           enum pipe_blendfactor alpha_src_factor, enum pipe_blendfactor alpha_dst_factor)
1002b8e80941Smrg{
1003b8e80941Smrg   return lima_blend_func(rgb_func) |
1004b8e80941Smrg      (lima_blend_func(alpha_func) << 3) |
1005b8e80941Smrg      (lima_blend_factor(rgb_src_factor) << 6) |
1006b8e80941Smrg      (lima_blend_factor(rgb_dst_factor) << 11) |
1007b8e80941Smrg      ((lima_blend_factor(alpha_src_factor) & 0xF) << 16) |
1008b8e80941Smrg      ((lima_blend_factor(alpha_dst_factor) & 0xF) << 20) |
1009b8e80941Smrg      0x0C000000; /* need check if this GLESv1 glAlphaFunc */
1010b8e80941Smrg}
1011b8e80941Smrg
1012b8e80941Smrgstatic int
1013b8e80941Smrglima_stencil_op(enum pipe_stencil_op pipe)
1014b8e80941Smrg{
1015b8e80941Smrg   switch (pipe) {
1016b8e80941Smrg   case PIPE_STENCIL_OP_KEEP:
1017b8e80941Smrg      return 0;
1018b8e80941Smrg   case PIPE_STENCIL_OP_ZERO:
1019b8e80941Smrg      return 2;
1020b8e80941Smrg   case PIPE_STENCIL_OP_REPLACE:
1021b8e80941Smrg      return 1;
1022b8e80941Smrg   case PIPE_STENCIL_OP_INCR:
1023b8e80941Smrg      return 6;
1024b8e80941Smrg   case PIPE_STENCIL_OP_DECR:
1025b8e80941Smrg      return 7;
1026b8e80941Smrg   case PIPE_STENCIL_OP_INCR_WRAP:
1027b8e80941Smrg      return 4;
1028b8e80941Smrg   case PIPE_STENCIL_OP_DECR_WRAP:
1029b8e80941Smrg      return 5;
1030b8e80941Smrg   case PIPE_STENCIL_OP_INVERT:
1031b8e80941Smrg      return 3;
1032b8e80941Smrg   }
1033b8e80941Smrg   return -1;
1034b8e80941Smrg}
1035b8e80941Smrg
1036b8e80941Smrgstatic int
1037b8e80941Smrglima_calculate_depth_test(struct pipe_depth_state *depth, struct pipe_rasterizer_state *rst)
1038b8e80941Smrg{
1039b8e80941Smrg   enum pipe_compare_func func = (depth->enabled ? depth->func : PIPE_FUNC_ALWAYS);
1040b8e80941Smrg
1041b8e80941Smrg   int offset_scale = 0;
1042b8e80941Smrg
1043b8e80941Smrg   //TODO: implement polygon offset
1044b8e80941Smrg#if 0
1045b8e80941Smrg   if (rst->offset_scale < -32)
1046b8e80941Smrg      offset_scale = -32;
1047b8e80941Smrg   else if (rst->offset_scale > 31)
1048b8e80941Smrg      offset_scale = 31;
1049b8e80941Smrg   else
1050b8e80941Smrg      offset_scale = rst->offset_scale * 4;
1051b8e80941Smrg
1052b8e80941Smrg   if (offset_scale < 0)
1053b8e80941Smrg      offset_scale = 0x100 + offset_scale;
1054b8e80941Smrg#endif
1055b8e80941Smrg
1056b8e80941Smrg   return (depth->enabled && depth->writemask) |
1057b8e80941Smrg      ((int)func << 1) |
1058b8e80941Smrg      (offset_scale << 16) |
1059b8e80941Smrg      0x30; /* find out what is this */
1060b8e80941Smrg}
1061b8e80941Smrg
1062b8e80941Smrgstatic void
1063b8e80941Smrglima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *info)
1064b8e80941Smrg{
1065b8e80941Smrg   struct lima_render_state *render =
1066b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_plb_rsw,
1067b8e80941Smrg                          sizeof(*render), true);
1068b8e80941Smrg
1069b8e80941Smrg   /* do hw support RGBA independ blend?
1070b8e80941Smrg    * PIPE_CAP_INDEP_BLEND_ENABLE
1071b8e80941Smrg    *
1072b8e80941Smrg    * how to handle the no cbuf only zbuf case?
1073b8e80941Smrg    */
1074b8e80941Smrg   struct pipe_rt_blend_state *rt = ctx->blend->base.rt;
1075b8e80941Smrg   render->blend_color_bg = float_to_ubyte(ctx->blend_color.color[2]) |
1076b8e80941Smrg      (float_to_ubyte(ctx->blend_color.color[1]) << 16);
1077b8e80941Smrg   render->blend_color_ra = float_to_ubyte(ctx->blend_color.color[0]) |
1078b8e80941Smrg      (float_to_ubyte(ctx->blend_color.color[3]) << 16);
1079b8e80941Smrg
1080b8e80941Smrg   if (rt->blend_enable) {
1081b8e80941Smrg      render->alpha_blend = lima_calculate_alpha_blend(rt->rgb_func, rt->alpha_func,
1082b8e80941Smrg         rt->rgb_src_factor, rt->rgb_dst_factor,
1083b8e80941Smrg         rt->alpha_src_factor, rt->alpha_dst_factor);
1084b8e80941Smrg   }
1085b8e80941Smrg   else {
1086b8e80941Smrg      /*
1087b8e80941Smrg       * Special handling for blending disabled.
1088b8e80941Smrg       * Binary driver is generating the same alpha_value,
1089b8e80941Smrg       * as when we would just enable blending, without changing/setting any blend equation/params.
1090b8e80941Smrg       * Normaly in this case mesa would set all rt fields (func/factor) to zero.
1091b8e80941Smrg       */
1092b8e80941Smrg      render->alpha_blend = lima_calculate_alpha_blend(PIPE_BLEND_ADD, PIPE_BLEND_ADD,
1093b8e80941Smrg         PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO,
1094b8e80941Smrg         PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO);
1095b8e80941Smrg   }
1096b8e80941Smrg
1097b8e80941Smrg   render->alpha_blend |= (rt->colormask & PIPE_MASK_RGBA) << 28;
1098b8e80941Smrg
1099b8e80941Smrg   struct pipe_rasterizer_state *rst = &ctx->rasterizer->base;
1100b8e80941Smrg   struct pipe_depth_state *depth = &ctx->zsa->base.depth;
1101b8e80941Smrg   render->depth_test = lima_calculate_depth_test(depth, rst);
1102b8e80941Smrg
1103b8e80941Smrg   /* overlap with plbu? any place can remove one? */
1104b8e80941Smrg   render->depth_range = float_to_ushort(ctx->viewport.near) |
1105b8e80941Smrg      (float_to_ushort(ctx->viewport.far) << 16);
1106b8e80941Smrg
1107b8e80941Smrg#if 0
1108b8e80941Smrg   struct pipe_stencil_state *stencil = ctx->zsa->base.stencil;
1109b8e80941Smrg   struct pipe_stencil_ref *ref = &ctx->stencil_ref;
1110b8e80941Smrg   render->stencil_front = stencil[0].func |
1111b8e80941Smrg      (lima_stencil_op(stencil[0].fail_op) << 3) |
1112b8e80941Smrg      (lima_stencil_op(stencil[0].zfail_op) << 6) |
1113b8e80941Smrg      (lima_stencil_op(stencil[0].zpass_op) << 9) |
1114b8e80941Smrg      (ref->ref_value[0] << 16) |
1115b8e80941Smrg      (stencil[0].valuemask << 24);
1116b8e80941Smrg   render->stencil_back = stencil[1].func |
1117b8e80941Smrg      (lima_stencil_op(stencil[1].fail_op) << 3) |
1118b8e80941Smrg      (lima_stencil_op(stencil[1].zfail_op) << 6) |
1119b8e80941Smrg      (lima_stencil_op(stencil[1].zpass_op) << 9) |
1120b8e80941Smrg      (ref->ref_value[1] << 16) |
1121b8e80941Smrg      (stencil[1].valuemask << 24);
1122b8e80941Smrg#else
1123b8e80941Smrg   render->stencil_front = 0xff000007;
1124b8e80941Smrg   render->stencil_back = 0xff000007;
1125b8e80941Smrg#endif
1126b8e80941Smrg
1127b8e80941Smrg   /* seems not correct? */
1128b8e80941Smrg   //struct pipe_alpha_state *alpha = &ctx->zsa->base.alpha;
1129b8e80941Smrg   render->stencil_test = 0;
1130b8e80941Smrg   //(stencil->enabled ? 0xFF : 0x00) | (float_to_ubyte(alpha->ref_value) << 16)
1131b8e80941Smrg
1132b8e80941Smrg   /* need more investigation */
1133b8e80941Smrg   if (info->mode == PIPE_PRIM_POINTS)
1134b8e80941Smrg      render->multi_sample = 0x0000F007;
1135b8e80941Smrg   else if (info->mode < PIPE_PRIM_TRIANGLES)
1136b8e80941Smrg      render->multi_sample = 0x0000F407;
1137b8e80941Smrg   else
1138b8e80941Smrg      render->multi_sample = 0x0000F807;
1139b8e80941Smrg   if (ctx->framebuffer.base.samples)
1140b8e80941Smrg      render->multi_sample |= 0x68;
1141b8e80941Smrg
1142b8e80941Smrg   render->shader_address =
1143b8e80941Smrg      ctx->fs->bo->va | (((uint32_t *)ctx->fs->bo->map)[0] & 0x1F);
1144b8e80941Smrg
1145b8e80941Smrg   /* seems not needed */
1146b8e80941Smrg   render->uniforms_address = 0x00000000;
1147b8e80941Smrg
1148b8e80941Smrg   render->textures_address = 0x00000000;
1149b8e80941Smrg
1150b8e80941Smrg   /* more investigation */
1151b8e80941Smrg   render->aux0 = 0x00000300 | (ctx->vs->varying_stride >> 3);
1152b8e80941Smrg   render->aux1 = 0x00003000;
1153b8e80941Smrg
1154b8e80941Smrg   if (ctx->tex_stateobj.num_samplers) {
1155b8e80941Smrg      render->textures_address =
1156b8e80941Smrg         lima_ctx_buff_va(ctx, lima_ctx_buff_pp_tex_desc, LIMA_CTX_BUFF_SUBMIT_PP);
1157b8e80941Smrg      render->aux0 |= ctx->tex_stateobj.num_samplers << 14;
1158b8e80941Smrg      render->aux0 |= 0x20;
1159b8e80941Smrg   }
1160b8e80941Smrg
1161b8e80941Smrg   if (ctx->const_buffer[PIPE_SHADER_FRAGMENT].buffer) {
1162b8e80941Smrg      render->uniforms_address =
1163b8e80941Smrg         lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform_array, LIMA_CTX_BUFF_SUBMIT_PP);
1164b8e80941Smrg      render->uniforms_address |= ((ctx->buffer_state[lima_ctx_buff_pp_uniform].size) / 4 - 1);
1165b8e80941Smrg      render->aux0 |= 0x80;
1166b8e80941Smrg      render->aux1 |= 0x10000;
1167b8e80941Smrg   }
1168b8e80941Smrg
1169b8e80941Smrg   if (ctx->vs->num_varying > 1) {
1170b8e80941Smrg      render->varying_types = 0x00000000;
1171b8e80941Smrg      render->varyings_address =
1172b8e80941Smrg         lima_ctx_buff_va(ctx, lima_ctx_buff_sh_varying, LIMA_CTX_BUFF_SUBMIT_PP);
1173b8e80941Smrg      for (int i = 1; i < ctx->vs->num_varying; i++) {
1174b8e80941Smrg         int val;
1175b8e80941Smrg
1176b8e80941Smrg         struct lima_varying_info *v = ctx->vs->varying + i;
1177b8e80941Smrg         if (v->component_size == 4)
1178b8e80941Smrg            val = v->components > 2 ? 0 : 1;
1179b8e80941Smrg         else
1180b8e80941Smrg            val = v->components > 2 ? 2 : 3;
1181b8e80941Smrg
1182b8e80941Smrg         int index = i - 1;
1183b8e80941Smrg         if (index < 10)
1184b8e80941Smrg            render->varying_types |= val << (3 * index);
1185b8e80941Smrg         else if (index == 10) {
1186b8e80941Smrg            render->varying_types |= val << 30;
1187b8e80941Smrg            render->varyings_address |= val >> 2;
1188b8e80941Smrg         }
1189b8e80941Smrg         else if (index == 11)
1190b8e80941Smrg            render->varyings_address |= val << 1;
1191b8e80941Smrg      }
1192b8e80941Smrg   }
1193b8e80941Smrg   else {
1194b8e80941Smrg      render->varying_types = 0x00000000;
1195b8e80941Smrg      render->varyings_address = 0x00000000;
1196b8e80941Smrg   }
1197b8e80941Smrg
1198b8e80941Smrg   lima_dump_command_stream_print(
1199b8e80941Smrg      render, sizeof(*render), false, "add render state at va %x\n",
1200b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_pp_plb_rsw, 0));
1201b8e80941Smrg}
1202b8e80941Smrg
1203b8e80941Smrgstatic void
1204b8e80941Smrglima_update_gp_attribute_info(struct lima_context *ctx, const struct pipe_draw_info *info)
1205b8e80941Smrg{
1206b8e80941Smrg   struct lima_vertex_element_state *ve = ctx->vertex_elements;
1207b8e80941Smrg   struct lima_context_vertex_buffer *vb = &ctx->vertex_buffers;
1208b8e80941Smrg
1209b8e80941Smrg   uint32_t *attribute =
1210b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_attribute_info,
1211b8e80941Smrg                          ve->num_elements * 8, true);
1212b8e80941Smrg
1213b8e80941Smrg   int n = 0;
1214b8e80941Smrg   for (int i = 0; i < ve->num_elements; i++) {
1215b8e80941Smrg      struct pipe_vertex_element *pve = ve->pipe + i;
1216b8e80941Smrg
1217b8e80941Smrg      assert(pve->vertex_buffer_index < vb->count);
1218b8e80941Smrg      assert(vb->enabled_mask & (1 << pve->vertex_buffer_index));
1219b8e80941Smrg
1220b8e80941Smrg      struct pipe_vertex_buffer *pvb = vb->vb + pve->vertex_buffer_index;
1221b8e80941Smrg      struct lima_resource *res = lima_resource(pvb->buffer.resource);
1222b8e80941Smrg
1223b8e80941Smrg      lima_submit_add_bo(ctx->gp_submit, res->bo, LIMA_SUBMIT_BO_READ);
1224b8e80941Smrg
1225b8e80941Smrg      unsigned start = info->index_size ? ctx->min_index : info->start;
1226b8e80941Smrg      attribute[n++] = res->bo->va + pvb->buffer_offset + pve->src_offset
1227b8e80941Smrg         + start * pvb->stride;
1228b8e80941Smrg      attribute[n++] = (pvb->stride << 11) |
1229b8e80941Smrg         (lima_pipe_format_to_attrib_type(pve->src_format) << 2) |
1230b8e80941Smrg         (util_format_get_nr_components(pve->src_format) - 1);
1231b8e80941Smrg   }
1232b8e80941Smrg
1233b8e80941Smrg   lima_dump_command_stream_print(
1234b8e80941Smrg      attribute, n * 4, false, "update attribute info at va %x\n",
1235b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_gp_attribute_info, 0));
1236b8e80941Smrg}
1237b8e80941Smrg
1238b8e80941Smrgstatic void
1239b8e80941Smrglima_update_gp_uniform(struct lima_context *ctx)
1240b8e80941Smrg{
1241b8e80941Smrg   struct lima_context_constant_buffer *ccb =
1242b8e80941Smrg      ctx->const_buffer + PIPE_SHADER_VERTEX;
1243b8e80941Smrg   struct lima_vs_shader_state *vs = ctx->vs;
1244b8e80941Smrg
1245b8e80941Smrg   int size = vs->uniform_pending_offset + vs->constant_size + 32;
1246b8e80941Smrg   void *vs_const_buff =
1247b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_uniform, size, true);
1248b8e80941Smrg
1249b8e80941Smrg   if (ccb->buffer)
1250b8e80941Smrg      memcpy(vs_const_buff, ccb->buffer, ccb->size);
1251b8e80941Smrg
1252b8e80941Smrg   memcpy(vs_const_buff + vs->uniform_pending_offset,
1253b8e80941Smrg          ctx->viewport.transform.scale,
1254b8e80941Smrg          sizeof(ctx->viewport.transform.scale));
1255b8e80941Smrg   memcpy(vs_const_buff + vs->uniform_pending_offset + 16,
1256b8e80941Smrg          ctx->viewport.transform.translate,
1257b8e80941Smrg          sizeof(ctx->viewport.transform.translate));
1258b8e80941Smrg
1259b8e80941Smrg   if (vs->constant)
1260b8e80941Smrg      memcpy(vs_const_buff + vs->uniform_pending_offset + 32,
1261b8e80941Smrg             vs->constant, vs->constant_size);
1262b8e80941Smrg
1263b8e80941Smrg   lima_dump_command_stream_print(
1264b8e80941Smrg      vs_const_buff, size, true,
1265b8e80941Smrg      "update gp uniform at va %x\n",
1266b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_gp_uniform, 0));
1267b8e80941Smrg}
1268b8e80941Smrg
1269b8e80941Smrgstatic void
1270b8e80941Smrglima_update_pp_uniform(struct lima_context *ctx)
1271b8e80941Smrg{
1272b8e80941Smrg   const float *const_buff = ctx->const_buffer[PIPE_SHADER_FRAGMENT].buffer;
1273b8e80941Smrg   size_t const_buff_size = ctx->const_buffer[PIPE_SHADER_FRAGMENT].size / sizeof(float);
1274b8e80941Smrg
1275b8e80941Smrg   if (!const_buff)
1276b8e80941Smrg      return;
1277b8e80941Smrg
1278b8e80941Smrg   uint16_t *fp16_const_buff =
1279b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_uniform,
1280b8e80941Smrg                          const_buff_size * sizeof(uint16_t), true);
1281b8e80941Smrg
1282b8e80941Smrg   uint32_t *array =
1283b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_pp_uniform_array, 4, true);
1284b8e80941Smrg
1285b8e80941Smrg   for (int i = 0; i < const_buff_size; i++)
1286b8e80941Smrg       fp16_const_buff[i] = util_float_to_half(const_buff[i]);
1287b8e80941Smrg
1288b8e80941Smrg   *array = lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform, LIMA_CTX_BUFF_SUBMIT_PP);
1289b8e80941Smrg
1290b8e80941Smrg   lima_dump_command_stream_print(
1291b8e80941Smrg      fp16_const_buff, const_buff_size * 2, false, "add pp uniform data at va %x\n",
1292b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform, 0));
1293b8e80941Smrg   lima_dump_command_stream_print(
1294b8e80941Smrg      array, 4, false, "add pp uniform info at va %x\n",
1295b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_pp_uniform_array, 0));
1296b8e80941Smrg}
1297b8e80941Smrg
1298b8e80941Smrgstatic void
1299b8e80941Smrglima_update_varying(struct lima_context *ctx, const struct pipe_draw_info *info)
1300b8e80941Smrg{
1301b8e80941Smrg   struct lima_vs_shader_state *vs = ctx->vs;
1302b8e80941Smrg
1303b8e80941Smrg   uint32_t *varying =
1304b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_varying_info,
1305b8e80941Smrg                          vs->num_varying * 8, true);
1306b8e80941Smrg   int n = 0;
1307b8e80941Smrg
1308b8e80941Smrg   /* should be LIMA_SUBMIT_BO_WRITE for GP, but each draw will use
1309b8e80941Smrg    * different part of this bo, so no need to set exclusive constraint */
1310b8e80941Smrg   lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_gl_pos,
1311b8e80941Smrg                       4 * 4 * info->count, false);
1312b8e80941Smrg
1313b8e80941Smrg   /* for gl_Position */
1314b8e80941Smrg   varying[n++] =
1315b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos,
1316b8e80941Smrg                       LIMA_CTX_BUFF_SUBMIT_GP | LIMA_CTX_BUFF_SUBMIT_PP);
1317b8e80941Smrg   varying[n++] = 0x8020;
1318b8e80941Smrg
1319b8e80941Smrg   int offset = 0;
1320b8e80941Smrg   for (int i = 1; i < vs->num_varying; i++) {
1321b8e80941Smrg      struct lima_varying_info *v = vs->varying + i;
1322b8e80941Smrg      int size = v->component_size * 4;
1323b8e80941Smrg
1324b8e80941Smrg      /* does component_size == 2 need to be 16 aligned? */
1325b8e80941Smrg      if (v->component_size == 4)
1326b8e80941Smrg         offset = align(offset, 16);
1327b8e80941Smrg
1328b8e80941Smrg      v->offset = offset;
1329b8e80941Smrg      offset += size;
1330b8e80941Smrg   }
1331b8e80941Smrg   vs->varying_stride = align(offset, 16);
1332b8e80941Smrg
1333b8e80941Smrg   if (vs->num_varying > 1)
1334b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_varying,
1335b8e80941Smrg                          vs->varying_stride * info->count, false);
1336b8e80941Smrg
1337b8e80941Smrg   for (int i = 1; i < vs->num_varying; i++) {
1338b8e80941Smrg      struct lima_varying_info *v = vs->varying + i;
1339b8e80941Smrg      varying[n++] =
1340b8e80941Smrg         lima_ctx_buff_va(ctx, lima_ctx_buff_sh_varying, LIMA_CTX_BUFF_SUBMIT_GP) +
1341b8e80941Smrg         v->offset;
1342b8e80941Smrg      varying[n++] = (vs->varying_stride << 11) | (v->components - 1) |
1343b8e80941Smrg         (v->component_size == 2 ? 0x0C : 0);
1344b8e80941Smrg   }
1345b8e80941Smrg
1346b8e80941Smrg   lima_dump_command_stream_print(
1347b8e80941Smrg      varying, n * 4, false, "update varying info at va %x\n",
1348b8e80941Smrg      lima_ctx_buff_va(ctx, lima_ctx_buff_gp_varying_info, 0));
1349b8e80941Smrg}
1350b8e80941Smrg
1351b8e80941Smrgstatic void
1352b8e80941Smrglima_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
1353b8e80941Smrg{
1354b8e80941Smrg   /* check if draw mode and vertex/index count match,
1355b8e80941Smrg    * otherwise gp will hang */
1356b8e80941Smrg   if (!u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) {
1357b8e80941Smrg      debug_printf("draw mode and vertex/index count mismatch\n");
1358b8e80941Smrg      return;
1359b8e80941Smrg   }
1360b8e80941Smrg
1361b8e80941Smrg   struct lima_context *ctx = lima_context(pctx);
1362b8e80941Smrg
1363b8e80941Smrg   if (!ctx->vs || !ctx->fs) {
1364b8e80941Smrg      debug_warn_once("no shader, skip draw\n");
1365b8e80941Smrg      return;
1366b8e80941Smrg   }
1367b8e80941Smrg
1368b8e80941Smrg   if (!lima_update_vs_state(ctx) || !lima_update_fs_state(ctx))
1369b8e80941Smrg      return;
1370b8e80941Smrg
1371b8e80941Smrg   lima_dump_command_stream_print(
1372b8e80941Smrg      ctx->vs->bo->map, ctx->vs->shader_size, false,
1373b8e80941Smrg      "add vs at va %x\n", ctx->vs->bo->va);
1374b8e80941Smrg
1375b8e80941Smrg   lima_dump_command_stream_print(
1376b8e80941Smrg      ctx->fs->bo->map, ctx->fs->shader_size, false,
1377b8e80941Smrg      "add fs at va %x\n", ctx->fs->bo->va);
1378b8e80941Smrg
1379b8e80941Smrg   lima_submit_add_bo(ctx->gp_submit, ctx->vs->bo, LIMA_SUBMIT_BO_READ);
1380b8e80941Smrg   lima_submit_add_bo(ctx->pp_submit, ctx->fs->bo, LIMA_SUBMIT_BO_READ);
1381b8e80941Smrg
1382b8e80941Smrg   lima_update_submit_bo(ctx);
1383b8e80941Smrg
1384b8e80941Smrg   /* Mali Utgard GPU always need min/max index info for index draw,
1385b8e80941Smrg    * compute it if upper layer does not do for us */
1386b8e80941Smrg   if (info->index_size && info->max_index == ~0u)
1387b8e80941Smrg      u_vbuf_get_minmax_index(pctx, info, &ctx->min_index, &ctx->max_index);
1388b8e80941Smrg   else {
1389b8e80941Smrg      ctx->min_index = info->min_index;
1390b8e80941Smrg      ctx->max_index = info->max_index;
1391b8e80941Smrg   }
1392b8e80941Smrg
1393b8e80941Smrg   lima_update_gp_attribute_info(ctx, info);
1394b8e80941Smrg
1395b8e80941Smrg   if ((ctx->dirty & LIMA_CONTEXT_DIRTY_CONST_BUFF &&
1396b8e80941Smrg        ctx->const_buffer[PIPE_SHADER_VERTEX].dirty) ||
1397b8e80941Smrg       ctx->dirty & LIMA_CONTEXT_DIRTY_VIEWPORT ||
1398b8e80941Smrg       ctx->dirty & LIMA_CONTEXT_DIRTY_SHADER_VERT) {
1399b8e80941Smrg      lima_update_gp_uniform(ctx);
1400b8e80941Smrg      ctx->const_buffer[PIPE_SHADER_VERTEX].dirty = false;
1401b8e80941Smrg   }
1402b8e80941Smrg
1403b8e80941Smrg   lima_update_varying(ctx, info);
1404b8e80941Smrg
1405b8e80941Smrg   /* If it's zero scissor, don't build vs cmd list */
1406b8e80941Smrg   if (!lima_is_scissor_zero(ctx))
1407b8e80941Smrg      lima_pack_vs_cmd(ctx, info);
1408b8e80941Smrg
1409b8e80941Smrg   if (ctx->dirty & LIMA_CONTEXT_DIRTY_CONST_BUFF &&
1410b8e80941Smrg       ctx->const_buffer[PIPE_SHADER_FRAGMENT].dirty) {
1411b8e80941Smrg      lima_update_pp_uniform(ctx);
1412b8e80941Smrg      ctx->const_buffer[PIPE_SHADER_FRAGMENT].dirty = false;
1413b8e80941Smrg   }
1414b8e80941Smrg
1415b8e80941Smrg   if (ctx->dirty & LIMA_CONTEXT_DIRTY_TEXTURES)
1416b8e80941Smrg      lima_update_textures(ctx);
1417b8e80941Smrg
1418b8e80941Smrg   lima_pack_render_state(ctx, info);
1419b8e80941Smrg   lima_pack_plbu_cmd(ctx, info);
1420b8e80941Smrg
1421b8e80941Smrg   ctx->dirty = 0;
1422b8e80941Smrg}
1423b8e80941Smrg
1424b8e80941Smrgstatic void
1425b8e80941Smrglima_finish_plbu_cmd(struct lima_context *ctx)
1426b8e80941Smrg{
1427b8e80941Smrg   int i = 0;
1428b8e80941Smrg   uint32_t *plbu_cmd = util_dynarray_grow_cap(&ctx->plbu_cmd_array, 2 * 4);
1429b8e80941Smrg
1430b8e80941Smrg   plbu_cmd[i++] = 0x00000000;
1431b8e80941Smrg   plbu_cmd[i++] = 0x50000000; /* END */
1432b8e80941Smrg
1433b8e80941Smrg   ctx->plbu_cmd_array.size += i * 4;
1434b8e80941Smrg}
1435b8e80941Smrg
1436b8e80941Smrgstatic void
1437b8e80941Smrglima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
1438b8e80941Smrg{
1439b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
1440b8e80941Smrg   struct lima_resource *res = lima_resource(fb->base.zsbuf->texture);
1441b8e80941Smrg   int level = fb->base.zsbuf->u.tex.level;
1442b8e80941Smrg
1443b8e80941Smrg   uint32_t format;
1444b8e80941Smrg
1445b8e80941Smrg   switch (fb->base.zsbuf->format) {
1446b8e80941Smrg   case PIPE_FORMAT_Z16_UNORM:
1447b8e80941Smrg      format = LIMA_PIXEL_FORMAT_Z16;
1448b8e80941Smrg      break;
1449b8e80941Smrg   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1450b8e80941Smrg   case PIPE_FORMAT_Z24X8_UNORM:
1451b8e80941Smrg   default:
1452b8e80941Smrg      /* Assume Z24S8 */
1453b8e80941Smrg      format = LIMA_PIXEL_FORMAT_Z24S8;
1454b8e80941Smrg      break;
1455b8e80941Smrg   }
1456b8e80941Smrg
1457b8e80941Smrg   struct lima_pp_wb_reg *wb = (void *)wb_reg;
1458b8e80941Smrg   wb[wb_idx].type = 0x01; /* 1 for depth, stencil */
1459b8e80941Smrg   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
1460b8e80941Smrg   wb[wb_idx].pixel_format = format;
1461b8e80941Smrg   if (res->tiled) {
1462b8e80941Smrg      wb[wb_idx].pixel_layout = 0x2;
1463b8e80941Smrg      wb[wb_idx].pitch = fb->tiled_w;
1464b8e80941Smrg   } else {
1465b8e80941Smrg      wb[wb_idx].pixel_layout = 0x0;
1466b8e80941Smrg      wb[wb_idx].pitch = res->levels[level].stride / 8;
1467b8e80941Smrg   }
1468b8e80941Smrg   wb[wb_idx].mrt_bits = 0;
1469b8e80941Smrg}
1470b8e80941Smrg
1471b8e80941Smrgstatic void
1472b8e80941Smrglima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
1473b8e80941Smrg{
1474b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
1475b8e80941Smrg   struct lima_resource *res = lima_resource(fb->base.cbufs[0]->texture);
1476b8e80941Smrg   int level = fb->base.cbufs[0]->u.tex.level;
1477b8e80941Smrg
1478b8e80941Smrg   bool swap_channels = false;
1479b8e80941Smrg   switch (fb->base.cbufs[0]->format) {
1480b8e80941Smrg   case PIPE_FORMAT_R8G8B8A8_UNORM:
1481b8e80941Smrg   case PIPE_FORMAT_R8G8B8X8_UNORM:
1482b8e80941Smrg      swap_channels = true;
1483b8e80941Smrg      break;
1484b8e80941Smrg   default:
1485b8e80941Smrg      break;
1486b8e80941Smrg   }
1487b8e80941Smrg
1488b8e80941Smrg   struct lima_pp_wb_reg *wb = (void *)wb_reg;
1489b8e80941Smrg   wb[wb_idx].type = 0x02; /* 2 for color buffer */
1490b8e80941Smrg   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
1491b8e80941Smrg   wb[wb_idx].pixel_format = LIMA_PIXEL_FORMAT_B8G8R8A8;
1492b8e80941Smrg   if (res->tiled) {
1493b8e80941Smrg      wb[wb_idx].pixel_layout = 0x2;
1494b8e80941Smrg      wb[wb_idx].pitch = fb->tiled_w;
1495b8e80941Smrg   } else {
1496b8e80941Smrg      wb[wb_idx].pixel_layout = 0x0;
1497b8e80941Smrg      wb[wb_idx].pitch = res->levels[level].stride / 8;
1498b8e80941Smrg   }
1499b8e80941Smrg   wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
1500b8e80941Smrg}
1501b8e80941Smrg
1502b8e80941Smrg
1503b8e80941Smrgstatic void
1504b8e80941Smrglima_pack_pp_frame_reg(struct lima_context *ctx, uint32_t *frame_reg,
1505b8e80941Smrg                       uint32_t *wb_reg)
1506b8e80941Smrg{
1507b8e80941Smrg   struct lima_context_framebuffer *fb = &ctx->framebuffer;
1508b8e80941Smrg   struct lima_pp_frame_reg *frame = (void *)frame_reg;
1509b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
1510b8e80941Smrg   int wb_idx = 0;
1511b8e80941Smrg
1512b8e80941Smrg   frame->render_address = screen->pp_buffer->va + pp_frame_rsw_offset;
1513b8e80941Smrg   frame->flags = 0x02;
1514b8e80941Smrg   frame->clear_value_depth = ctx->clear.depth;
1515b8e80941Smrg   frame->clear_value_stencil = ctx->clear.stencil;
1516b8e80941Smrg   frame->clear_value_color = ctx->clear.color_8pc;
1517b8e80941Smrg   frame->clear_value_color_1 = ctx->clear.color_8pc;
1518b8e80941Smrg   frame->clear_value_color_2 = ctx->clear.color_8pc;
1519b8e80941Smrg   frame->clear_value_color_3 = ctx->clear.color_8pc;
1520b8e80941Smrg   frame->one = 1;
1521b8e80941Smrg
1522b8e80941Smrg   frame->width = fb->base.width - 1;
1523b8e80941Smrg   frame->height = fb->base.height - 1;
1524b8e80941Smrg
1525b8e80941Smrg   /* frame->fragment_stack_address is overwritten per-pp in the kernel
1526b8e80941Smrg    * by the values of pp_frame.fragment_stack_address[i] */
1527b8e80941Smrg
1528b8e80941Smrg   /* These are "stack size" and "stack offset" shifted,
1529b8e80941Smrg    * here they are assumed to be always the same. */
1530b8e80941Smrg   uint32_t fs_stack_size = ctx->fs ? ctx->fs->stack_size : 0;
1531b8e80941Smrg   frame->fragment_stack_size = fs_stack_size << 16 | fs_stack_size;
1532b8e80941Smrg
1533b8e80941Smrg   /* related with MSAA and different value when r4p0/r7p0 */
1534b8e80941Smrg   frame->supersampled_height = fb->base.height * 2 - 1;
1535b8e80941Smrg   frame->scale = 0xE0C;
1536b8e80941Smrg
1537b8e80941Smrg   frame->dubya = 0x77;
1538b8e80941Smrg   frame->onscreen = 1;
1539b8e80941Smrg   frame->blocking = (fb->shift_min << 28) | (fb->shift_h << 16) | fb->shift_w;
1540b8e80941Smrg   frame->foureight = 0x8888;
1541b8e80941Smrg
1542b8e80941Smrg   if (fb->base.nr_cbufs)
1543b8e80941Smrg      lima_pack_wb_cbuf_reg(ctx, wb_reg, wb_idx++);
1544b8e80941Smrg
1545b8e80941Smrg   /* Mali4x0 can use on-tile buffer for depth/stencil, so to save some
1546b8e80941Smrg    * memory bandwidth don't write depth/stencil back to memory if we're
1547b8e80941Smrg    * rendering to scanout
1548b8e80941Smrg    */
1549b8e80941Smrg   if (!lima_is_scanout(ctx) && fb->base.zsbuf)
1550b8e80941Smrg      lima_pack_wb_zsbuf_reg(ctx, wb_reg, wb_idx++);
1551b8e80941Smrg}
1552b8e80941Smrg
1553b8e80941Smrgstatic void
1554b8e80941Smrg_lima_flush(struct lima_context *ctx, bool end_of_frame)
1555b8e80941Smrg{
1556b8e80941Smrg   lima_finish_plbu_cmd(ctx);
1557b8e80941Smrg
1558b8e80941Smrg   int vs_cmd_size = ctx->vs_cmd_array.size;
1559b8e80941Smrg   int plbu_cmd_size = ctx->plbu_cmd_array.size;
1560b8e80941Smrg   uint32_t vs_cmd_va = 0;
1561b8e80941Smrg   uint32_t plbu_cmd_va;
1562b8e80941Smrg
1563b8e80941Smrg   if (vs_cmd_size) {
1564b8e80941Smrg      void *vs_cmd =
1565b8e80941Smrg         lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_vs_cmd, vs_cmd_size, true);
1566b8e80941Smrg      memcpy(vs_cmd, util_dynarray_begin(&ctx->vs_cmd_array), vs_cmd_size);
1567b8e80941Smrg      util_dynarray_clear(&ctx->vs_cmd_array);
1568b8e80941Smrg      vs_cmd_va = lima_ctx_buff_va(ctx, lima_ctx_buff_gp_vs_cmd,
1569b8e80941Smrg                                   LIMA_CTX_BUFF_SUBMIT_GP);
1570b8e80941Smrg
1571b8e80941Smrg      lima_dump_command_stream_print(
1572b8e80941Smrg         vs_cmd, vs_cmd_size, false, "flush vs cmd at va %x\n", vs_cmd_va);
1573b8e80941Smrg   }
1574b8e80941Smrg
1575b8e80941Smrg   void *plbu_cmd =
1576b8e80941Smrg      lima_ctx_buff_alloc(ctx, lima_ctx_buff_gp_plbu_cmd, plbu_cmd_size, true);
1577b8e80941Smrg   memcpy(plbu_cmd, util_dynarray_begin(&ctx->plbu_cmd_array), plbu_cmd_size);
1578b8e80941Smrg   util_dynarray_clear(&ctx->plbu_cmd_array);
1579b8e80941Smrg   plbu_cmd_va = lima_ctx_buff_va(ctx, lima_ctx_buff_gp_plbu_cmd,
1580b8e80941Smrg                                  LIMA_CTX_BUFF_SUBMIT_GP);
1581b8e80941Smrg
1582b8e80941Smrg   lima_dump_command_stream_print(
1583b8e80941Smrg      plbu_cmd, plbu_cmd_size, false, "flush plbu cmd at va %x\n", plbu_cmd_va);
1584b8e80941Smrg
1585b8e80941Smrg   struct lima_screen *screen = lima_screen(ctx->base.screen);
1586b8e80941Smrg   struct drm_lima_gp_frame gp_frame;
1587b8e80941Smrg   struct lima_gp_frame_reg *gp_frame_reg = (void *)gp_frame.frame;
1588b8e80941Smrg   gp_frame_reg->vs_cmd_start = vs_cmd_va;
1589b8e80941Smrg   gp_frame_reg->vs_cmd_end = vs_cmd_va + vs_cmd_size;
1590b8e80941Smrg   gp_frame_reg->plbu_cmd_start = plbu_cmd_va;
1591b8e80941Smrg   gp_frame_reg->plbu_cmd_end = plbu_cmd_va + plbu_cmd_size;
1592b8e80941Smrg   gp_frame_reg->tile_heap_start = ctx->gp_tile_heap[ctx->plb_index]->va;
1593b8e80941Smrg   gp_frame_reg->tile_heap_end = ctx->gp_tile_heap[ctx->plb_index]->va + gp_tile_heap_size;
1594b8e80941Smrg
1595b8e80941Smrg   lima_dump_command_stream_print(
1596b8e80941Smrg      &gp_frame, sizeof(gp_frame), false, "add gp frame\n");
1597b8e80941Smrg
1598b8e80941Smrg   if (!lima_submit_start(ctx->gp_submit, &gp_frame, sizeof(gp_frame)))
1599b8e80941Smrg      fprintf(stderr, "gp submit error\n");
1600b8e80941Smrg
1601b8e80941Smrg   if (lima_dump_command_stream) {
1602b8e80941Smrg      if (lima_submit_wait(ctx->gp_submit, PIPE_TIMEOUT_INFINITE)) {
1603b8e80941Smrg         if (ctx->buffer_state[lima_ctx_buff_sh_gl_pos].res) {
1604b8e80941Smrg            float *pos = lima_ctx_buff_map(ctx, lima_ctx_buff_sh_gl_pos);
1605b8e80941Smrg            lima_dump_command_stream_print(
1606b8e80941Smrg               pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
1607b8e80941Smrg               lima_ctx_buff_va(ctx, lima_ctx_buff_sh_gl_pos, 0));
1608b8e80941Smrg         }
1609b8e80941Smrg
1610b8e80941Smrg         uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]);
1611b8e80941Smrg         lima_dump_command_stream_print(
1612b8e80941Smrg            plb, LIMA_CTX_PLB_BLK_SIZE, false, "plb dump at va %x\n",
1613b8e80941Smrg            ctx->plb[ctx->plb_index]->va);
1614b8e80941Smrg      }
1615b8e80941Smrg      else {
1616b8e80941Smrg         fprintf(stderr, "gp submit wait error\n");
1617b8e80941Smrg         exit(1);
1618b8e80941Smrg      }
1619b8e80941Smrg   }
1620b8e80941Smrg
1621b8e80941Smrg   struct lima_pp_stream_state *ps = &ctx->pp_stream;
1622b8e80941Smrg   if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
1623b8e80941Smrg      struct drm_lima_m400_pp_frame pp_frame = {0};
1624b8e80941Smrg      lima_pack_pp_frame_reg(ctx, pp_frame.frame, pp_frame.wb);
1625b8e80941Smrg      pp_frame.num_pp = screen->num_pp;
1626b8e80941Smrg
1627b8e80941Smrg      for (int i = 0; i < screen->num_pp; i++) {
1628b8e80941Smrg         pp_frame.plbu_array_address[i] = ps->bo->va + ps->bo_offset + ps->offset[i];
1629b8e80941Smrg         pp_frame.fragment_stack_address[i] = screen->pp_buffer->va +
1630b8e80941Smrg            pp_stack_offset + pp_stack_pp_size * i;
1631b8e80941Smrg      }
1632b8e80941Smrg
1633b8e80941Smrg      lima_dump_command_stream_print(
1634b8e80941Smrg         &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
1635b8e80941Smrg
1636b8e80941Smrg      if (!lima_submit_start(ctx->pp_submit, &pp_frame, sizeof(pp_frame)))
1637b8e80941Smrg         fprintf(stderr, "pp submit error\n");
1638b8e80941Smrg   }
1639b8e80941Smrg   else {
1640b8e80941Smrg      struct drm_lima_m450_pp_frame pp_frame = {0};
1641b8e80941Smrg      lima_pack_pp_frame_reg(ctx, pp_frame.frame, pp_frame.wb);
1642b8e80941Smrg      pp_frame.num_pp = screen->num_pp;
1643b8e80941Smrg
1644b8e80941Smrg      for (int i = 0; i < screen->num_pp; i++)
1645b8e80941Smrg         pp_frame.fragment_stack_address[i] = screen->pp_buffer->va +
1646b8e80941Smrg            pp_stack_offset + pp_stack_pp_size * i;
1647b8e80941Smrg
1648b8e80941Smrg      if (ps->bo) {
1649b8e80941Smrg         for (int i = 0; i < screen->num_pp; i++)
1650b8e80941Smrg            pp_frame.plbu_array_address[i] = ps->bo->va + ps->bo_offset + ps->offset[i];
1651b8e80941Smrg      }
1652b8e80941Smrg      else {
1653b8e80941Smrg         pp_frame.use_dlbu = true;
1654b8e80941Smrg
1655b8e80941Smrg         struct lima_context_framebuffer *fb = &ctx->framebuffer;
1656b8e80941Smrg         pp_frame.dlbu_regs[0] = ctx->plb[ctx->plb_index]->va;
1657b8e80941Smrg         pp_frame.dlbu_regs[1] = ((fb->tiled_h - 1) << 16) | (fb->tiled_w - 1);
1658b8e80941Smrg         unsigned s = util_logbase2(LIMA_CTX_PLB_BLK_SIZE) - 7;
1659b8e80941Smrg         pp_frame.dlbu_regs[2] = (s << 28) | (fb->shift_h << 16) | fb->shift_w;
1660b8e80941Smrg         pp_frame.dlbu_regs[3] = ((fb->tiled_h - 1) << 24) | ((fb->tiled_w - 1) << 16);
1661b8e80941Smrg      }
1662b8e80941Smrg
1663b8e80941Smrg      lima_dump_command_stream_print(
1664b8e80941Smrg         &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
1665b8e80941Smrg
1666b8e80941Smrg      if (!lima_submit_start(ctx->pp_submit, &pp_frame, sizeof(pp_frame)))
1667b8e80941Smrg         fprintf(stderr, "pp submit error\n");
1668b8e80941Smrg   }
1669b8e80941Smrg
1670b8e80941Smrg   if (lima_dump_command_stream) {
1671b8e80941Smrg      if (!lima_submit_wait(ctx->pp_submit, PIPE_TIMEOUT_INFINITE)) {
1672b8e80941Smrg         fprintf(stderr, "pp wait error\n");
1673b8e80941Smrg         exit(1);
1674b8e80941Smrg      }
1675b8e80941Smrg   }
1676b8e80941Smrg
1677b8e80941Smrg   ctx->plb_index = (ctx->plb_index + 1) % lima_ctx_num_plb;
1678b8e80941Smrg
1679b8e80941Smrg   if (ctx->framebuffer.base.nr_cbufs) {
1680b8e80941Smrg      /* this surface may need reload when next draw if not end of frame */
1681b8e80941Smrg      struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
1682b8e80941Smrg      surf->reload = !end_of_frame;
1683b8e80941Smrg   }
1684b8e80941Smrg}
1685b8e80941Smrg
1686b8e80941Smrgvoid
1687b8e80941Smrglima_flush(struct lima_context *ctx)
1688b8e80941Smrg{
1689b8e80941Smrg   if (!lima_ctx_dirty(ctx))
1690b8e80941Smrg      return;
1691b8e80941Smrg
1692b8e80941Smrg   _lima_flush(ctx, false);
1693b8e80941Smrg}
1694b8e80941Smrg
1695b8e80941Smrgstatic void
1696b8e80941Smrglima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1697b8e80941Smrg                unsigned flags)
1698b8e80941Smrg{
1699b8e80941Smrg   struct lima_context *ctx = lima_context(pctx);
1700b8e80941Smrg   if (!lima_ctx_dirty(ctx))
1701b8e80941Smrg      return;
1702b8e80941Smrg
1703b8e80941Smrg   _lima_flush(ctx, flags & PIPE_FLUSH_END_OF_FRAME);
1704b8e80941Smrg
1705b8e80941Smrg   if (fence) {
1706b8e80941Smrg      int fd;
1707b8e80941Smrg      if (lima_submit_get_out_sync(ctx->pp_submit, &fd))
1708b8e80941Smrg         *fence = lima_fence_create(fd);
1709b8e80941Smrg   }
1710b8e80941Smrg}
1711b8e80941Smrg
1712b8e80941Smrgvoid
1713b8e80941Smrglima_draw_init(struct lima_context *ctx)
1714b8e80941Smrg{
1715b8e80941Smrg   ctx->base.clear = lima_clear;
1716b8e80941Smrg   ctx->base.draw_vbo = lima_draw_vbo;
1717b8e80941Smrg   ctx->base.flush = lima_pipe_flush;
1718b8e80941Smrg}
1719