1/*
2 * Copyright 2021 Alyssa Rosenzweig
3 * Copyright (C) 2019-2020 Collabora, Ltd.
4 * Copyright 2010 Red Hat Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25#include <stdio.h>
26#include <errno.h>
27#include "pipe/p_defines.h"
28#include "pipe/p_state.h"
29#include "pipe/p_context.h"
30#include "pipe/p_screen.h"
31#include "util/u_memory.h"
32#include "util/u_inlines.h"
33#include "util/u_transfer.h"
34#include "gallium/auxiliary/util/u_draw.h"
35#include "gallium/auxiliary/util/u_helpers.h"
36#include "gallium/auxiliary/util/u_viewport.h"
37#include "gallium/auxiliary/util/u_blend.h"
38#include "gallium/auxiliary/util/u_framebuffer.h"
39#include "gallium/auxiliary/tgsi/tgsi_from_mesa.h"
40#include "gallium/auxiliary/nir/tgsi_to_nir.h"
41#include "compiler/nir/nir.h"
42#include "asahi/compiler/agx_compile.h"
43#include "agx_state.h"
44#include "asahi/lib/agx_pack.h"
45#include "asahi/lib/agx_formats.h"
46
47static struct pipe_stream_output_target *
48agx_create_stream_output_target(struct pipe_context *pctx,
49                                struct pipe_resource *prsc,
50                                unsigned buffer_offset,
51                                unsigned buffer_size)
52{
53   struct pipe_stream_output_target *target;
54
55   target = &rzalloc(pctx, struct agx_streamout_target)->base;
56
57   if (!target)
58      return NULL;
59
60   pipe_reference_init(&target->reference, 1);
61   pipe_resource_reference(&target->buffer, prsc);
62
63   target->context = pctx;
64   target->buffer_offset = buffer_offset;
65   target->buffer_size = buffer_size;
66
67   return target;
68}
69
70static void
71agx_stream_output_target_destroy(struct pipe_context *pctx,
72                                 struct pipe_stream_output_target *target)
73{
74   pipe_resource_reference(&target->buffer, NULL);
75   ralloc_free(target);
76}
77
78static void
79agx_set_stream_output_targets(struct pipe_context *pctx,
80                              unsigned num_targets,
81                              struct pipe_stream_output_target **targets,
82                              const unsigned *offsets)
83{
84   struct agx_context *ctx = agx_context(pctx);
85   struct agx_streamout *so = &ctx->streamout;
86
87   assert(num_targets <= ARRAY_SIZE(so->targets));
88
89   for (unsigned i = 0; i < num_targets; i++) {
90      if (offsets[i] != -1)
91         agx_so_target(targets[i])->offset = offsets[i];
92
93      pipe_so_target_reference(&so->targets[i], targets[i]);
94   }
95
96   for (unsigned i = 0; i < so->num_targets; i++)
97      pipe_so_target_reference(&so->targets[i], NULL);
98
99   so->num_targets = num_targets;
100}
101
102static void
103agx_set_blend_color(struct pipe_context *pctx,
104                    const struct pipe_blend_color *state)
105{
106   struct agx_context *ctx = agx_context(pctx);
107
108   if (state)
109      memcpy(&ctx->blend_color, state, sizeof(*state));
110}
111
112static void *
113agx_create_blend_state(struct pipe_context *ctx,
114                       const struct pipe_blend_state *state)
115{
116   struct agx_blend *so = CALLOC_STRUCT(agx_blend);
117
118   assert(!state->alpha_to_coverage);
119   assert(!state->alpha_to_coverage_dither);
120   assert(!state->alpha_to_one);
121   assert(!state->advanced_blend_func);
122
123   if (state->logicop_enable) {
124      so->logicop_enable = true;
125      so->logicop_func = state->logicop_func;
126      return so;
127   }
128
129   for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
130      unsigned rti = state->independent_blend_enable ? i : 0;
131      struct pipe_rt_blend_state rt = state->rt[rti];
132
133      if (!rt.blend_enable) {
134         static const nir_lower_blend_channel replace = {
135            .func = BLEND_FUNC_ADD,
136            .src_factor = BLEND_FACTOR_ZERO,
137            .invert_src_factor = true,
138            .dst_factor = BLEND_FACTOR_ZERO,
139            .invert_dst_factor = false,
140         };
141
142         so->rt[i].rgb = replace;
143         so->rt[i].alpha = replace;
144      } else {
145         so->rt[i].rgb.func = util_blend_func_to_shader(rt.rgb_func);
146         so->rt[i].rgb.src_factor = util_blend_factor_to_shader(rt.rgb_src_factor);
147         so->rt[i].rgb.invert_src_factor = util_blend_factor_is_inverted(rt.rgb_src_factor);
148         so->rt[i].rgb.dst_factor = util_blend_factor_to_shader(rt.rgb_dst_factor);
149         so->rt[i].rgb.invert_dst_factor = util_blend_factor_is_inverted(rt.rgb_dst_factor);
150
151         so->rt[i].alpha.func = util_blend_func_to_shader(rt.alpha_func);
152         so->rt[i].alpha.src_factor = util_blend_factor_to_shader(rt.alpha_src_factor);
153         so->rt[i].alpha.invert_src_factor = util_blend_factor_is_inverted(rt.alpha_src_factor);
154         so->rt[i].alpha.dst_factor = util_blend_factor_to_shader(rt.alpha_dst_factor);
155         so->rt[i].alpha.invert_dst_factor = util_blend_factor_is_inverted(rt.alpha_dst_factor);
156
157	 so->blend_enable = true;
158      }
159
160      so->rt[i].colormask = rt.colormask;
161   }
162
163   return so;
164}
165
166static void
167agx_bind_blend_state(struct pipe_context *pctx, void *cso)
168{
169   struct agx_context *ctx = agx_context(pctx);
170   ctx->blend = cso;
171}
172
173static const enum agx_stencil_op agx_stencil_ops[PIPE_STENCIL_OP_INVERT + 1] = {
174   [PIPE_STENCIL_OP_KEEP] = AGX_STENCIL_OP_KEEP,
175   [PIPE_STENCIL_OP_ZERO] = AGX_STENCIL_OP_ZERO,
176   [PIPE_STENCIL_OP_REPLACE] = AGX_STENCIL_OP_REPLACE,
177   [PIPE_STENCIL_OP_INCR] = AGX_STENCIL_OP_INCR_SAT,
178   [PIPE_STENCIL_OP_DECR] = AGX_STENCIL_OP_DECR_SAT,
179   [PIPE_STENCIL_OP_INCR_WRAP] = AGX_STENCIL_OP_INCR_WRAP,
180   [PIPE_STENCIL_OP_DECR_WRAP] = AGX_STENCIL_OP_DECR_WRAP,
181   [PIPE_STENCIL_OP_INVERT] = AGX_STENCIL_OP_INVERT,
182};
183
184static void
185agx_pack_rasterizer_face(struct agx_rasterizer_face_packed *out,
186                         struct pipe_stencil_state st,
187                         enum agx_zs_func z_func,
188                         bool disable_z_write)
189{
190   agx_pack(out, RASTERIZER_FACE, cfg) {
191      cfg.depth_function = z_func;
192      cfg.disable_depth_write = disable_z_write;
193
194      if (st.enabled) {
195         cfg.stencil_write_mask = st.writemask;
196         cfg.stencil_read_mask = st.valuemask;
197
198         cfg.depth_pass   = agx_stencil_ops[st.zpass_op];
199         cfg.depth_fail   = agx_stencil_ops[st.zfail_op];
200         cfg.stencil_fail = agx_stencil_ops[st.fail_op];
201
202         cfg.stencil_compare = (enum agx_zs_func) st.func;
203      } else {
204         cfg.stencil_write_mask = 0xFF;
205         cfg.stencil_read_mask = 0xFF;
206
207         cfg.depth_pass = AGX_STENCIL_OP_KEEP;
208         cfg.depth_fail = AGX_STENCIL_OP_KEEP;
209         cfg.stencil_fail = AGX_STENCIL_OP_KEEP;
210
211         cfg.stencil_compare = AGX_ZS_FUNC_ALWAYS;
212      }
213   }
214}
215
216static void *
217agx_create_zsa_state(struct pipe_context *ctx,
218                     const struct pipe_depth_stencil_alpha_state *state)
219{
220   struct agx_zsa *so = CALLOC_STRUCT(agx_zsa);
221   assert(!state->depth_bounds_test && "todo");
222
223   so->base = *state;
224
225   /* Z func can be used as-is */
226   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NEVER    == AGX_ZS_FUNC_NEVER);
227   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LESS     == AGX_ZS_FUNC_LESS);
228   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_EQUAL    == AGX_ZS_FUNC_EQUAL);
229   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_LEQUAL   == AGX_ZS_FUNC_LEQUAL);
230   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GREATER  == AGX_ZS_FUNC_GREATER);
231   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_NOTEQUAL == AGX_ZS_FUNC_NOT_EQUAL);
232   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_GEQUAL   == AGX_ZS_FUNC_GEQUAL);
233   STATIC_ASSERT((enum agx_zs_func) PIPE_FUNC_ALWAYS   == AGX_ZS_FUNC_ALWAYS);
234
235   enum agx_zs_func z_func = state->depth_enabled ?
236                ((enum agx_zs_func) state->depth_func) : AGX_ZS_FUNC_ALWAYS;
237
238   agx_pack_rasterizer_face(&so->front,
239         state->stencil[0], z_func, !state->depth_writemask);
240
241   if (state->stencil[1].enabled) {
242      agx_pack_rasterizer_face(&so->back,
243            state->stencil[1], z_func, !state->depth_writemask);
244   } else {
245      /* One sided stencil */
246      so->back = so->front;
247   }
248
249   return so;
250}
251
252static void
253agx_bind_zsa_state(struct pipe_context *pctx, void *cso)
254{
255   struct agx_context *ctx = agx_context(pctx);
256
257   if (cso)
258      memcpy(&ctx->zs, cso, sizeof(ctx->zs));
259}
260
261static void *
262agx_create_rs_state(struct pipe_context *ctx,
263                    const struct pipe_rasterizer_state *cso)
264{
265   struct agx_rasterizer *so = CALLOC_STRUCT(agx_rasterizer);
266   so->base = *cso;
267
268   /* Line width is packed in a 4:4 fixed point format */
269   unsigned line_width_fixed = ((unsigned) (cso->line_width * 16.0f)) - 1;
270
271   /* Clamp to maximum line width */
272   so->line_width = MIN2(line_width_fixed, 0xFF);
273
274   agx_pack(so->cull, CULL, cfg) {
275      cfg.cull_front = cso->cull_face & PIPE_FACE_FRONT;
276      cfg.cull_back = cso->cull_face & PIPE_FACE_BACK;
277      cfg.front_face_ccw = cso->front_ccw;
278      cfg.depth_clip = cso->depth_clip_near;
279      cfg.depth_clamp = !cso->depth_clip_near;
280   };
281
282   return so;
283}
284
285static void
286agx_bind_rasterizer_state(struct pipe_context *pctx, void *cso)
287{
288   struct agx_context *ctx = agx_context(pctx);
289   struct agx_rasterizer *so = cso;
290
291   /* Check if scissor state has changed, since scissor enable is part of the
292    * rasterizer state but everything else needed for scissors is part of
293    * viewport/scissor states */
294   bool scissor_changed = (cso == NULL) || (ctx->rast == NULL) ||
295      (ctx->rast->base.scissor != so->base.scissor);
296
297   ctx->rast = so;
298
299   if (scissor_changed)
300      ctx->dirty |= AGX_DIRTY_SCISSOR;
301}
302
303static enum agx_wrap
304agx_wrap_from_pipe(enum pipe_tex_wrap in)
305{
306   switch (in) {
307   case PIPE_TEX_WRAP_REPEAT: return AGX_WRAP_REPEAT;
308   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return AGX_WRAP_CLAMP_TO_EDGE;
309   case PIPE_TEX_WRAP_MIRROR_REPEAT: return AGX_WRAP_MIRRORED_REPEAT;
310   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return AGX_WRAP_CLAMP_TO_BORDER;
311   default: unreachable("todo: more wrap modes");
312   }
313}
314
315static enum agx_mip_filter
316agx_mip_filter_from_pipe(enum pipe_tex_mipfilter in)
317{
318   switch (in) {
319   case PIPE_TEX_MIPFILTER_NEAREST: return AGX_MIP_FILTER_NEAREST;
320   case PIPE_TEX_MIPFILTER_LINEAR: return AGX_MIP_FILTER_LINEAR;
321   case PIPE_TEX_MIPFILTER_NONE: return AGX_MIP_FILTER_NONE;
322   }
323
324   unreachable("Invalid mip filter");
325}
326
327static const enum agx_compare_func agx_compare_funcs[PIPE_FUNC_ALWAYS + 1] = {
328   [PIPE_FUNC_NEVER] = AGX_COMPARE_FUNC_NEVER,
329   [PIPE_FUNC_LESS] = AGX_COMPARE_FUNC_LESS,
330   [PIPE_FUNC_EQUAL] = AGX_COMPARE_FUNC_EQUAL,
331   [PIPE_FUNC_LEQUAL] = AGX_COMPARE_FUNC_LEQUAL,
332   [PIPE_FUNC_GREATER] = AGX_COMPARE_FUNC_GREATER,
333   [PIPE_FUNC_NOTEQUAL] = AGX_COMPARE_FUNC_NOT_EQUAL,
334   [PIPE_FUNC_GEQUAL] = AGX_COMPARE_FUNC_GEQUAL,
335   [PIPE_FUNC_ALWAYS] = AGX_COMPARE_FUNC_ALWAYS,
336};
337
338static void *
339agx_create_sampler_state(struct pipe_context *pctx,
340                         const struct pipe_sampler_state *state)
341{
342   struct agx_device *dev = agx_device(pctx->screen);
343   struct agx_bo *bo = agx_bo_create(dev, AGX_SAMPLER_LENGTH,
344                                     AGX_MEMORY_TYPE_FRAMEBUFFER);
345
346   assert(state->min_lod == 0 && "todo: lod clamps");
347   assert(state->lod_bias == 0 && "todo: lod bias");
348
349   agx_pack(bo->ptr.cpu, SAMPLER, cfg) {
350      cfg.magnify_linear = (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
351      cfg.minify_linear = (state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
352      cfg.mip_filter = agx_mip_filter_from_pipe(state->min_mip_filter);
353      cfg.wrap_s = agx_wrap_from_pipe(state->wrap_s);
354      cfg.wrap_t = agx_wrap_from_pipe(state->wrap_t);
355      cfg.wrap_r = agx_wrap_from_pipe(state->wrap_r);
356      cfg.pixel_coordinates = !state->normalized_coords;
357      cfg.compare_func = agx_compare_funcs[state->compare_func];
358   }
359
360   struct agx_sampler_state *so = CALLOC_STRUCT(agx_sampler_state);
361   so->base = *state;
362   so->desc = bo;
363
364   return so;
365}
366
367static void
368agx_delete_sampler_state(struct pipe_context *ctx, void *state)
369{
370   struct agx_bo *bo = state;
371   agx_bo_unreference(bo);
372}
373
374static void
375agx_bind_sampler_states(struct pipe_context *pctx,
376                        enum pipe_shader_type shader,
377                        unsigned start, unsigned count,
378                        void **states)
379{
380   struct agx_context *ctx = agx_context(pctx);
381
382   ctx->stage[shader].sampler_count = states ? count : 0;
383
384   memcpy(&ctx->stage[shader].samplers[start], states,
385          sizeof(struct agx_sampler_state *) * count);
386}
387
388/* Channels agree for RGBA but are weird for force 0/1 */
389
390static enum agx_channel
391agx_channel_from_pipe(enum pipe_swizzle in)
392{
393   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_X == AGX_CHANNEL_R);
394   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Y == AGX_CHANNEL_G);
395   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_Z == AGX_CHANNEL_B);
396   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_W == AGX_CHANNEL_A);
397   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_0 & 0x4);
398   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_1 & 0x4);
399   STATIC_ASSERT((enum agx_channel) PIPE_SWIZZLE_NONE & 0x4);
400
401   if ((in & 0x4) == 0)
402      return (enum agx_channel) in;
403   else if (in == PIPE_SWIZZLE_1)
404      return AGX_CHANNEL_1;
405   else
406      return AGX_CHANNEL_0;
407}
408
409static enum agx_layout
410agx_translate_layout(uint64_t modifier)
411{
412   switch (modifier) {
413   case DRM_FORMAT_MOD_APPLE_64X64_MORTON_ORDER:
414      return AGX_LAYOUT_TILED_64X64;
415   case DRM_FORMAT_MOD_LINEAR:
416      return AGX_LAYOUT_LINEAR;
417   default:
418      unreachable("Invalid modifier");
419   }
420}
421
422static enum agx_texture_dimension
423agx_translate_texture_dimension(enum pipe_texture_target dim)
424{
425   switch (dim) {
426   case PIPE_TEXTURE_2D: return AGX_TEXTURE_DIMENSION_2D;
427   case PIPE_TEXTURE_CUBE: return AGX_TEXTURE_DIMENSION_CUBE;
428   default: unreachable("Unsupported texture dimension");
429   }
430}
431
432static struct pipe_sampler_view *
433agx_create_sampler_view(struct pipe_context *pctx,
434                        struct pipe_resource *texture,
435                        const struct pipe_sampler_view *state)
436{
437   struct agx_device *dev = agx_device(pctx->screen);
438   struct agx_resource *rsrc = agx_resource(texture);
439   struct agx_sampler_view *so = CALLOC_STRUCT(agx_sampler_view);
440
441   if (!so)
442      return NULL;
443
444   /* We prepare the descriptor at CSO create time */
445   so->desc = agx_bo_create(dev, AGX_TEXTURE_LENGTH,
446                            AGX_MEMORY_TYPE_FRAMEBUFFER);
447
448   const struct util_format_description *desc =
449      util_format_description(state->format);
450
451   /* We only have a single swizzle for the user swizzle and the format fixup,
452    * so compose them now. */
453   uint8_t out_swizzle[4];
454   uint8_t view_swizzle[4] = {
455      state->swizzle_r, state->swizzle_g,
456      state->swizzle_b, state->swizzle_a
457   };
458
459   util_format_compose_swizzles(desc->swizzle, view_swizzle, out_swizzle);
460
461   unsigned level = state->u.tex.first_level;
462   assert(state->u.tex.first_layer == 0);
463
464   /* Pack the descriptor into GPU memory */
465   agx_pack(so->desc->ptr.cpu, TEXTURE, cfg) {
466      cfg.dimension = agx_translate_texture_dimension(state->target);
467      cfg.layout = agx_translate_layout(rsrc->modifier);
468      cfg.format = agx_pixel_format[state->format].hw;
469      cfg.swizzle_r = agx_channel_from_pipe(out_swizzle[0]);
470      cfg.swizzle_g = agx_channel_from_pipe(out_swizzle[1]);
471      cfg.swizzle_b = agx_channel_from_pipe(out_swizzle[2]);
472      cfg.swizzle_a = agx_channel_from_pipe(out_swizzle[3]);
473      cfg.width = u_minify(texture->width0, level);
474      cfg.height = u_minify(texture->height0, level);
475      cfg.levels = state->u.tex.last_level - level + 1;
476      cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
477      cfg.address = rsrc->bo->ptr.gpu + rsrc->slices[level].offset;
478      cfg.unk_2 = false;
479
480      cfg.stride = (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) ?
481         (rsrc->slices[level].line_stride - 16) :
482         AGX_RT_STRIDE_TILED;
483   }
484
485   /* Initialize base object */
486   so->base = *state;
487   so->base.texture = NULL;
488   pipe_resource_reference(&so->base.texture, texture);
489   pipe_reference_init(&so->base.reference, 1);
490   so->base.context = pctx;
491   return &so->base;
492}
493
494static void
495agx_set_sampler_views(struct pipe_context *pctx,
496                      enum pipe_shader_type shader,
497                      unsigned start, unsigned count,
498                      unsigned unbind_num_trailing_slots,
499                      bool take_ownership,
500                      struct pipe_sampler_view **views)
501{
502   struct agx_context *ctx = agx_context(pctx);
503   unsigned new_nr = 0;
504   unsigned i;
505
506   assert(start == 0);
507
508   if (!views)
509      count = 0;
510
511   for (i = 0; i < count; ++i) {
512      if (views[i])
513         new_nr = i + 1;
514
515      if (take_ownership) {
516         pipe_sampler_view_reference((struct pipe_sampler_view **)
517                                     &ctx->stage[shader].textures[i], NULL);
518         ctx->stage[shader].textures[i] = (struct agx_sampler_view *)views[i];
519      } else {
520         pipe_sampler_view_reference((struct pipe_sampler_view **)
521                                     &ctx->stage[shader].textures[i], views[i]);
522      }
523   }
524
525   for (; i < ctx->stage[shader].texture_count; i++) {
526      pipe_sampler_view_reference((struct pipe_sampler_view **)
527                                  &ctx->stage[shader].textures[i], NULL);
528   }
529   ctx->stage[shader].texture_count = new_nr;
530}
531
532static void
533agx_sampler_view_destroy(struct pipe_context *ctx,
534                         struct pipe_sampler_view *pview)
535{
536   struct agx_sampler_view *view = (struct agx_sampler_view *) pview;
537   pipe_resource_reference(&view->base.texture, NULL);
538   agx_bo_unreference(view->desc);
539   FREE(view);
540}
541
542static struct pipe_surface *
543agx_create_surface(struct pipe_context *ctx,
544                   struct pipe_resource *texture,
545                   const struct pipe_surface *surf_tmpl)
546{
547   struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface);
548
549   if (!surface)
550      return NULL;
551   pipe_reference_init(&surface->reference, 1);
552   pipe_resource_reference(&surface->texture, texture);
553   surface->context = ctx;
554   surface->format = surf_tmpl->format;
555   surface->width = texture->width0;
556   surface->height = texture->height0;
557   surface->texture = texture;
558   surface->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
559   surface->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
560   surface->u.tex.level = surf_tmpl->u.tex.level;
561
562   return surface;
563}
564
565static void
566agx_set_clip_state(struct pipe_context *ctx,
567                   const struct pipe_clip_state *state)
568{
569}
570
571static void
572agx_set_polygon_stipple(struct pipe_context *ctx,
573                        const struct pipe_poly_stipple *state)
574{
575}
576
577static void
578agx_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
579{
580   struct agx_context *ctx = agx_context(pipe);
581   ctx->sample_mask = sample_mask;
582}
583
584static void
585agx_set_scissor_states(struct pipe_context *pctx,
586                       unsigned start_slot,
587                       unsigned num_scissors,
588                       const struct pipe_scissor_state *scissor)
589{
590   struct agx_context *ctx = agx_context(pctx);
591
592   assert(start_slot == 0 && "no geometry shaders");
593   assert(num_scissors == 1 && "no geometry shaders");
594
595   ctx->scissor = *scissor;
596   ctx->dirty |= AGX_DIRTY_SCISSOR;
597}
598
599static void
600agx_set_stencil_ref(struct pipe_context *pctx,
601                    const struct pipe_stencil_ref state)
602{
603   struct agx_context *ctx = agx_context(pctx);
604   ctx->stencil_ref = state;
605}
606
607static void
608agx_set_viewport_states(struct pipe_context *pctx,
609                        unsigned start_slot,
610                        unsigned num_viewports,
611                        const struct pipe_viewport_state *vp)
612{
613   struct agx_context *ctx = agx_context(pctx);
614
615   assert(start_slot == 0 && "no geometry shaders");
616   assert(num_viewports == 1 && "no geometry shaders");
617
618   ctx->dirty |= AGX_DIRTY_VIEWPORT;
619   ctx->viewport = *vp;
620}
621
622struct agx_viewport_scissor {
623   uint64_t viewport;
624   unsigned scissor;
625};
626
627static struct agx_viewport_scissor
628agx_upload_viewport_scissor(struct agx_pool *pool,
629                            struct agx_batch *batch,
630                            const struct pipe_viewport_state *vp,
631                            const struct pipe_scissor_state *ss)
632{
633   struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_VIEWPORT_LENGTH, 64);
634
635   float trans_x = vp->translate[0], trans_y = vp->translate[1];
636   float abs_scale_x = fabsf(vp->scale[0]), abs_scale_y = fabsf(vp->scale[1]);
637
638   /* Calculate the extent of the viewport. Note if a particular dimension of
639    * the viewport is an odd number of pixels, both the translate and the scale
640    * will have a fractional part of 0.5, so adding and subtracting them yields
641    * an integer. Therefore we don't need to round explicitly */
642   unsigned minx = CLAMP((int) (trans_x - abs_scale_x), 0, batch->width);
643   unsigned miny = CLAMP((int) (trans_y - abs_scale_y), 0, batch->height);
644   unsigned maxx = CLAMP((int) (trans_x + abs_scale_x), 0, batch->width);
645   unsigned maxy = CLAMP((int) (trans_y + abs_scale_y), 0, batch->height);
646
647   if (ss) {
648      minx = MAX2(ss->minx, minx);
649      miny = MAX2(ss->miny, miny);
650      maxx = MIN2(ss->maxx, maxx);
651      maxy = MIN2(ss->maxy, maxy);
652   }
653
654   assert(maxx > minx && maxy > miny);
655
656   float minz, maxz;
657   util_viewport_zmin_zmax(vp, false, &minz, &maxz);
658
659   agx_pack(T.cpu, VIEWPORT, cfg) {
660      cfg.min_tile_x = minx / 32;
661      cfg.min_tile_y = miny / 32;
662      cfg.max_tile_x = DIV_ROUND_UP(maxx, 32);
663      cfg.max_tile_y = DIV_ROUND_UP(maxy, 32);
664      cfg.clip_tile = true;
665
666      cfg.translate_x = vp->translate[0];
667      cfg.translate_y = vp->translate[1];
668      cfg.scale_x = vp->scale[0];
669      cfg.scale_y = vp->scale[1];
670
671      /* Assumes [0, 1] clip coordinates. If half-z is not in use, lower_half_z
672       * is called to ensure this works. */
673      cfg.translate_z = minz;
674      cfg.scale_z = maxz - minz;
675   };
676
677   /* Allocate a new scissor descriptor */
678   struct agx_scissor_packed *ptr = batch->scissor.bo->ptr.cpu;
679   unsigned index = (batch->scissor.count++);
680
681   agx_pack(ptr + index, SCISSOR, cfg) {
682      cfg.min_x = minx;
683      cfg.min_y = miny;
684      cfg.min_z = minz;
685      cfg.max_x = maxx;
686      cfg.max_y = maxy;
687      cfg.max_z = maxz;
688   }
689
690   return (struct agx_viewport_scissor) {
691      .viewport = T.gpu,
692      .scissor = index
693   };
694}
695
696/* A framebuffer state can be reused across batches, so it doesn't make sense
697 * to add surfaces to the BO list here. Instead we added them when flushing.
698 */
699
700static void
701agx_set_framebuffer_state(struct pipe_context *pctx,
702                          const struct pipe_framebuffer_state *state)
703{
704   struct agx_context *ctx = agx_context(pctx);
705
706   if (!state)
707      return;
708
709   /* XXX: eliminate this flush with batch tracking logic */
710   pctx->flush(pctx, NULL, 0);
711
712   util_copy_framebuffer_state(&ctx->framebuffer, state);
713   ctx->batch->width = state->width;
714   ctx->batch->height = state->height;
715   ctx->batch->nr_cbufs = state->nr_cbufs;
716   ctx->batch->cbufs[0] = state->cbufs[0];
717   ctx->batch->zsbuf = state->zsbuf;
718   ctx->dirty = ~0;
719
720   for (unsigned i = 0; i < state->nr_cbufs; ++i) {
721      struct pipe_surface *surf = state->cbufs[i];
722      struct agx_resource *tex = agx_resource(surf->texture);
723      const struct util_format_description *desc =
724         util_format_description(surf->format);
725
726      agx_pack(ctx->render_target[i], RENDER_TARGET, cfg) {
727         cfg.layout = agx_translate_layout(tex->modifier);
728         cfg.format = agx_pixel_format[surf->format].hw;
729         cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]);
730         cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]);
731         cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]);
732         cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]);
733         cfg.width = state->width;
734         cfg.height = state->height;
735         cfg.buffer = tex->bo->ptr.gpu;
736
737         cfg.stride = (tex->modifier == DRM_FORMAT_MOD_LINEAR) ?
738            (tex->slices[0].line_stride - 4) :
739            AGX_RT_STRIDE_TILED;
740      };
741   }
742}
743
744/* Likewise constant buffers, textures, and samplers are handled in a common
745 * per-draw path, with dirty tracking to reduce the costs involved.
746 */
747
748static void
749agx_set_constant_buffer(struct pipe_context *pctx,
750                        enum pipe_shader_type shader, uint index,
751                        bool take_ownership,
752                        const struct pipe_constant_buffer *cb)
753{
754   struct agx_context *ctx = agx_context(pctx);
755   struct agx_stage *s = &ctx->stage[shader];
756
757   util_copy_constant_buffer(&s->cb[index], cb, take_ownership);
758
759   unsigned mask = (1 << index);
760
761   if (cb)
762      s->cb_mask |= mask;
763   else
764      s->cb_mask &= ~mask;
765}
766
767static void
768agx_surface_destroy(struct pipe_context *ctx,
769                    struct pipe_surface *surface)
770{
771   pipe_resource_reference(&surface->texture, NULL);
772   FREE(surface);
773}
774
775static void
776agx_delete_state(struct pipe_context *ctx, void *state)
777{
778   FREE(state);
779}
780
781/* BOs added to the batch in the uniform upload path */
782
783static void
784agx_set_vertex_buffers(struct pipe_context *pctx,
785                       unsigned start_slot, unsigned count,
786                       unsigned unbind_num_trailing_slots,
787                       bool take_ownership,
788                       const struct pipe_vertex_buffer *buffers)
789{
790   struct agx_context *ctx = agx_context(pctx);
791
792   util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers,
793                                start_slot, count, unbind_num_trailing_slots, take_ownership);
794
795   ctx->dirty |= AGX_DIRTY_VERTEX;
796}
797
798static void *
799agx_create_vertex_elements(struct pipe_context *ctx,
800                           unsigned count,
801                           const struct pipe_vertex_element *state)
802{
803   assert(count < AGX_MAX_ATTRIBS);
804
805   struct agx_attribute *attribs = calloc(sizeof(*attribs), AGX_MAX_ATTRIBS);
806   for (unsigned i = 0; i < count; ++i) {
807      const struct pipe_vertex_element ve = state[i];
808
809      const struct util_format_description *desc =
810         util_format_description(ve.src_format);
811
812      unsigned chan_size = desc->channel[0].size / 8;
813
814      assert(chan_size == 1 || chan_size == 2 || chan_size == 4);
815      assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
816      assert((ve.src_offset & (chan_size - 1)) == 0);
817
818      attribs[i] = (struct agx_attribute) {
819         .buf = ve.vertex_buffer_index,
820         .src_offset = ve.src_offset / chan_size,
821         .nr_comps_minus_1 = desc->nr_channels - 1,
822         .format = agx_vertex_format[ve.src_format],
823         .divisor = ve.instance_divisor
824      };
825   }
826
827   return attribs;
828}
829
830static void
831agx_bind_vertex_elements_state(struct pipe_context *pctx, void *cso)
832{
833   struct agx_context *ctx = agx_context(pctx);
834   ctx->attributes = cso;
835   ctx->dirty |= AGX_DIRTY_VERTEX;
836}
837
838static uint32_t asahi_shader_key_hash(const void *key)
839{
840   return _mesa_hash_data(key, sizeof(struct asahi_shader_key));
841}
842
843static bool asahi_shader_key_equal(const void *a, const void *b)
844{
845   return memcmp(a, b, sizeof(struct asahi_shader_key)) == 0;
846}
847
848static void *
849agx_create_shader_state(struct pipe_context *pctx,
850                        const struct pipe_shader_state *cso)
851{
852   struct agx_uncompiled_shader *so = CALLOC_STRUCT(agx_uncompiled_shader);
853
854   if (!so)
855      return NULL;
856
857   so->base = *cso;
858
859   if (cso->type == PIPE_SHADER_IR_NIR) {
860      so->nir = cso->ir.nir;
861   } else {
862      assert(cso->type == PIPE_SHADER_IR_TGSI);
863      so->nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
864   }
865
866   so->variants = _mesa_hash_table_create(NULL, asahi_shader_key_hash, asahi_shader_key_equal);
867   return so;
868}
869
870static bool
871agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
872                  enum pipe_shader_type stage, struct asahi_shader_key *key)
873{
874   struct agx_uncompiled_shader *so = ctx->stage[stage].shader;
875   assert(so != NULL);
876
877   struct hash_entry *he = _mesa_hash_table_search(so->variants, key);
878
879   if (he) {
880      if ((*out) == he->data)
881         return false;
882
883      *out = he->data;
884      return true;
885   }
886
887   struct agx_compiled_shader *compiled = CALLOC_STRUCT(agx_compiled_shader);
888   struct util_dynarray binary;
889   util_dynarray_init(&binary, NULL);
890
891   nir_shader *nir = nir_shader_clone(NULL, so->nir);
892
893   if (key->blend.blend_enable) {
894      nir_lower_blend_options opts = {
895         .format = { key->rt_formats[0] },
896         .scalar_blend_const = true
897      };
898
899      memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
900      NIR_PASS_V(nir, nir_lower_blend, opts);
901   } else if (key->blend.logicop_enable) {
902      nir_lower_blend_options opts = {
903         .format = { key->rt_formats[0] },
904         .logicop_enable = true,
905         .logicop_func = key->blend.logicop_func,
906      };
907
908      NIR_PASS_V(nir, nir_lower_blend, opts);
909   }
910
911   if (stage == PIPE_SHADER_FRAGMENT)
912      NIR_PASS_V(nir, nir_lower_fragcolor, key->nr_cbufs);
913
914   agx_compile_shader_nir(nir, &key->base, &binary, &compiled->info);
915
916   struct agx_varyings *varyings = &compiled->info.varyings;
917   unsigned packed_varying_sz = (AGX_VARYING_HEADER_LENGTH + varyings->nr_descs * AGX_VARYING_LENGTH);
918   uint8_t *packed_varyings = alloca(packed_varying_sz);
919
920   agx_pack(packed_varyings, VARYING_HEADER, cfg) {
921      cfg.triangle_slots = cfg.point_slots = varyings->nr_slots;
922   }
923
924   memcpy(packed_varyings + AGX_VARYING_HEADER_LENGTH, varyings->packed,
925         varyings->nr_descs * AGX_VARYING_LENGTH);
926
927   if (binary.size) {
928      struct agx_device *dev = agx_device(ctx->base.screen);
929      compiled->bo = agx_bo_create(dev,
930                                   ALIGN_POT(binary.size, 256) + (3 * packed_varying_sz),
931                                   AGX_MEMORY_TYPE_SHADER);
932      memcpy(compiled->bo->ptr.cpu, binary.data, binary.size);
933
934
935      /* TODO: Why is the varying descriptor duplicated 3x? */
936      unsigned offs = ALIGN_POT(binary.size, 256);
937      for (unsigned copy = 0; copy < 3; ++copy) {
938         memcpy(((uint8_t *) compiled->bo->ptr.cpu) + offs, packed_varyings, packed_varying_sz);
939         offs += packed_varying_sz;
940      }
941
942      compiled->varyings = compiled->bo->ptr.gpu + ALIGN_POT(binary.size, 256);
943   }
944
945   ralloc_free(nir);
946   util_dynarray_fini(&binary);
947
948   he = _mesa_hash_table_insert(so->variants, key, compiled);
949   *out = he->data;
950   return true;
951}
952
953static bool
954agx_update_vs(struct agx_context *ctx)
955{
956   struct agx_vs_shader_key key = {
957      .num_vbufs = util_last_bit(ctx->vb_mask),
958      .clip_halfz = ctx->rast->base.clip_halfz,
959   };
960
961   memcpy(key.attributes, ctx->attributes,
962          sizeof(key.attributes[0]) * AGX_MAX_ATTRIBS);
963
964   u_foreach_bit(i, ctx->vb_mask) {
965      key.vbuf_strides[i] = ctx->vertex_buffers[i].stride;
966   }
967
968   struct asahi_shader_key akey = {
969      .base.vs = key
970   };
971
972   return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &akey);
973}
974
975static bool
976agx_update_fs(struct agx_context *ctx)
977{
978   struct asahi_shader_key key = {
979      .nr_cbufs = ctx->batch->nr_cbufs,
980   };
981
982   for (unsigned i = 0; i < key.nr_cbufs; ++i) {
983      struct pipe_surface *surf = ctx->batch->cbufs[i];
984
985      if (surf) {
986         enum pipe_format fmt = surf->format;
987         key.rt_formats[i] = fmt;
988         key.base.fs.tib_formats[i] = agx_pixel_format[fmt].internal;
989      } else {
990         key.rt_formats[i] = PIPE_FORMAT_NONE;
991      }
992   }
993
994   memcpy(&key.blend, ctx->blend, sizeof(key.blend));
995
996   return agx_update_shader(ctx, &ctx->fs, PIPE_SHADER_FRAGMENT, &key);
997}
998
999static void
1000agx_bind_shader_state(struct pipe_context *pctx, void *cso)
1001{
1002   if (!cso)
1003      return;
1004
1005   struct agx_context *ctx = agx_context(pctx);
1006   struct agx_uncompiled_shader *so = cso;
1007
1008   enum pipe_shader_type type = pipe_shader_type_from_mesa(so->nir->info.stage);
1009   ctx->stage[type].shader = so;
1010}
1011
1012static void
1013agx_delete_compiled_shader(struct hash_entry *ent)
1014{
1015   struct agx_compiled_shader *so = ent->data;
1016   agx_bo_unreference(so->bo);
1017   FREE(so);
1018}
1019
1020static void
1021agx_delete_shader_state(struct pipe_context *ctx,
1022                        void *cso)
1023{
1024   struct agx_uncompiled_shader *so = cso;
1025   _mesa_hash_table_destroy(so->variants, agx_delete_compiled_shader);
1026   free(so);
1027}
1028
1029/* Pipeline consists of a sequence of binding commands followed by a set shader command */
1030static uint32_t
1031agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage)
1032{
1033   /* Pipelines must be 64-byte aligned */
1034   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
1035                        (16 * AGX_BIND_UNIFORM_LENGTH) + // XXX: correct sizes, break up at compile time
1036                        (ctx->stage[stage].texture_count * AGX_BIND_TEXTURE_LENGTH) +
1037                        (PIPE_MAX_SAMPLERS * AGX_BIND_SAMPLER_LENGTH) +
1038                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
1039                        64);
1040
1041   uint8_t *record = ptr.cpu;
1042
1043   /* There is a maximum number of half words we may push with a single
1044    * BIND_UNIFORM record, so split up the range to fit. We only need to call
1045    * agx_push_location once, however, which reduces the cost. */
1046   unsigned unif_records = 0;
1047
1048   for (unsigned i = 0; i < cs->info.push_ranges; ++i) {
1049      struct agx_push push = cs->info.push[i];
1050      uint64_t buffer = agx_push_location(ctx, push, stage);
1051      unsigned halfs_per_record = 14;
1052      unsigned records = DIV_ROUND_UP(push.length, halfs_per_record);
1053
1054      /* Ensure we don't overflow */
1055      unif_records += records;
1056      assert(unif_records < 16);
1057
1058      for (unsigned j = 0; j < records; ++j) {
1059         agx_pack(record, BIND_UNIFORM, cfg) {
1060            cfg.start_halfs = push.base + (j * halfs_per_record);
1061            cfg.size_halfs = MIN2(push.length - (j * halfs_per_record), halfs_per_record);
1062            cfg.buffer = buffer + (j * halfs_per_record * 2);
1063         }
1064
1065         record += AGX_BIND_UNIFORM_LENGTH;
1066      }
1067   }
1068
1069   for (unsigned i = 0; i < ctx->stage[stage].texture_count; ++i) {
1070      struct agx_sampler_view *tex = ctx->stage[stage].textures[i];
1071      agx_batch_add_bo(ctx->batch, tex->desc);
1072      agx_batch_add_bo(ctx->batch, agx_resource(tex->base.texture)->bo);
1073
1074
1075      agx_pack(record, BIND_TEXTURE, cfg) {
1076         cfg.start = i;
1077         cfg.count = 1;
1078         cfg.buffer = tex->desc->ptr.gpu;
1079      }
1080
1081      record += AGX_BIND_TEXTURE_LENGTH;
1082   }
1083
1084   for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
1085      struct agx_sampler_state *sampler = ctx->stage[stage].samplers[i];
1086
1087      if (!sampler)
1088         continue;
1089
1090      struct agx_bo *bo = sampler->desc;
1091      agx_batch_add_bo(ctx->batch, bo);
1092
1093      agx_pack(record, BIND_SAMPLER, cfg) {
1094         cfg.start = i;
1095         cfg.count = 1;
1096         cfg.buffer = bo->ptr.gpu;
1097      }
1098
1099      record += AGX_BIND_SAMPLER_LENGTH;
1100   }
1101
1102   /* TODO: Can we prepack this? */
1103   if (stage == PIPE_SHADER_FRAGMENT) {
1104      agx_pack(record, SET_SHADER_EXTENDED, cfg) {
1105         cfg.code = cs->bo->ptr.gpu;
1106         cfg.register_quadwords = 0;
1107         cfg.unk_3 = 0x8d;
1108         cfg.unk_1 = 0x2010bd;
1109         cfg.unk_2 = 0x0d;
1110         cfg.unk_2b = 1;
1111         cfg.unk_3b = 0x1;
1112         cfg.unk_4 = 0x800;
1113         cfg.preshader_unk = 0xc080;
1114         cfg.spill_size = 0x2;
1115      }
1116
1117      record += AGX_SET_SHADER_EXTENDED_LENGTH;
1118   } else {
1119      agx_pack(record, SET_SHADER, cfg) {
1120         cfg.code = cs->bo->ptr.gpu;
1121         cfg.register_quadwords = 0;
1122         cfg.unk_2b = cs->info.varyings.nr_slots;
1123         cfg.unk_2 = 0x0d;
1124      }
1125
1126      record += AGX_SET_SHADER_LENGTH;
1127   }
1128
1129   /* End pipeline */
1130   memset(record, 0, 8);
1131   assert(ptr.gpu < (1ull << 32));
1132   return ptr.gpu;
1133}
1134
1135/* Internal pipelines (TODO: refactor?) */
1136uint64_t
1137agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf)
1138{
1139   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
1140                        (1 * AGX_BIND_UNIFORM_LENGTH) +
1141                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
1142                        64);
1143
1144   uint8_t *record = ptr.cpu;
1145
1146   agx_pack(record, BIND_UNIFORM, cfg) {
1147      cfg.start_halfs = (6 * 2);
1148      cfg.size_halfs = 4;
1149      cfg.buffer = clear_buf;
1150   }
1151
1152   record += AGX_BIND_UNIFORM_LENGTH;
1153
1154   /* TODO: Can we prepack this? */
1155   agx_pack(record, SET_SHADER_EXTENDED, cfg) {
1156      cfg.code = code;
1157      cfg.register_quadwords = 1;
1158      cfg.unk_3 = 0x8d;
1159      cfg.unk_2 = 0x0d;
1160      cfg.unk_2b = 4;
1161      cfg.frag_unk = 0x880100;
1162      cfg.preshader_mode = 0; // XXX
1163   }
1164
1165   record += AGX_SET_SHADER_EXTENDED_LENGTH;
1166
1167   /* End pipeline */
1168   memset(record, 0, 8);
1169   return ptr.gpu;
1170}
1171
1172uint64_t
1173agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf)
1174{
1175   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
1176                        (1 * AGX_BIND_TEXTURE_LENGTH) +
1177                        (1 * AGX_BIND_SAMPLER_LENGTH) +
1178                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
1179                        64);
1180
1181   uint8_t *record = ptr.cpu;
1182   struct agx_ptr sampler = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_SAMPLER_LENGTH, 64);
1183   struct agx_ptr texture = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_TEXTURE_LENGTH, 64);
1184
1185   agx_pack(sampler.cpu, SAMPLER, cfg) {
1186      cfg.magnify_linear = true;
1187      cfg.minify_linear = false;
1188      cfg.mip_filter = AGX_MIP_FILTER_NONE;
1189      cfg.wrap_s = AGX_WRAP_CLAMP_TO_EDGE;
1190      cfg.wrap_t = AGX_WRAP_CLAMP_TO_EDGE;
1191      cfg.wrap_r = AGX_WRAP_CLAMP_TO_EDGE;
1192      cfg.pixel_coordinates = true;
1193      cfg.compare_func = AGX_COMPARE_FUNC_ALWAYS;
1194      cfg.unk_2 = 0;
1195      cfg.unk_3 = 0;
1196   }
1197
1198   agx_pack(texture.cpu, TEXTURE, cfg) {
1199      struct agx_resource *rsrc = agx_resource(surf->texture);
1200      const struct util_format_description *desc =
1201         util_format_description(surf->format);
1202
1203      cfg.layout = agx_translate_layout(rsrc->modifier);
1204      cfg.format = agx_pixel_format[surf->format].hw;
1205      cfg.swizzle_r = agx_channel_from_pipe(desc->swizzle[0]);
1206      cfg.swizzle_g = agx_channel_from_pipe(desc->swizzle[1]);
1207      cfg.swizzle_b = agx_channel_from_pipe(desc->swizzle[2]);
1208      cfg.swizzle_a = agx_channel_from_pipe(desc->swizzle[3]);
1209      cfg.width = surf->width;
1210      cfg.height = surf->height;
1211      cfg.levels = 1;
1212      cfg.srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
1213      cfg.address = rsrc->bo->ptr.gpu;
1214      cfg.unk_2 = false;
1215
1216      cfg.stride = (rsrc->modifier == DRM_FORMAT_MOD_LINEAR) ?
1217         (rsrc->slices[0].line_stride - 16) :
1218         AGX_RT_STRIDE_TILED;
1219   }
1220
1221   agx_pack(record, BIND_TEXTURE, cfg) {
1222      cfg.start = 0;
1223      cfg.count = 1;
1224      cfg.buffer = texture.gpu;
1225   }
1226
1227   record += AGX_BIND_TEXTURE_LENGTH;
1228
1229   agx_pack(record, BIND_SAMPLER, cfg) {
1230      cfg.start = 0;
1231      cfg.count = 1;
1232      cfg.buffer = sampler.gpu;
1233   }
1234
1235   record += AGX_BIND_SAMPLER_LENGTH;
1236
1237   /* TODO: Can we prepack this? */
1238   agx_pack(record, SET_SHADER_EXTENDED, cfg) {
1239      cfg.code = code;
1240      cfg.register_quadwords = 0;
1241      cfg.unk_3 = 0x8d;
1242      cfg.unk_2 = 0x0d;
1243      cfg.unk_2b = 4;
1244      cfg.unk_4 = 0;
1245      cfg.frag_unk = 0x880100;
1246      cfg.preshader_mode = 0; // XXX
1247   }
1248
1249   record += AGX_SET_SHADER_EXTENDED_LENGTH;
1250
1251   /* End pipeline */
1252   memset(record, 0, 8);
1253   return ptr.gpu;
1254}
1255
1256uint64_t
1257agx_build_store_pipeline(struct agx_context *ctx, uint32_t code,
1258                         uint64_t render_target)
1259{
1260   struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool,
1261                        (1 * AGX_BIND_TEXTURE_LENGTH) +
1262                        (1 * AGX_BIND_UNIFORM_LENGTH) +
1263                        AGX_SET_SHADER_EXTENDED_LENGTH + 8,
1264                        64);
1265
1266   uint8_t *record = ptr.cpu;
1267
1268   agx_pack(record, BIND_TEXTURE, cfg) {
1269      cfg.start = 0;
1270      cfg.count = 1;
1271      cfg.buffer = render_target;
1272   }
1273
1274   record += AGX_BIND_TEXTURE_LENGTH;
1275
1276   uint32_t unk[] = { 0, ~0 };
1277
1278   agx_pack(record, BIND_UNIFORM, cfg) {
1279      cfg.start_halfs = 4;
1280      cfg.size_halfs = 4;
1281      cfg.buffer = agx_pool_upload_aligned(&ctx->batch->pool, unk, sizeof(unk), 16);
1282   }
1283
1284   record += AGX_BIND_UNIFORM_LENGTH;
1285
1286   /* TODO: Can we prepack this? */
1287   agx_pack(record, SET_SHADER_EXTENDED, cfg) {
1288      cfg.code = code;
1289      cfg.register_quadwords = 1;
1290      cfg.unk_2 = 0xd;
1291      cfg.unk_3 = 0x8d;
1292      cfg.frag_unk = 0x880100;
1293      cfg.preshader_mode = 0; // XXX
1294   }
1295
1296   record += AGX_SET_SHADER_EXTENDED_LENGTH;
1297
1298   /* End pipeline */
1299   memset(record, 0, 8);
1300   return ptr.gpu;
1301}
1302
1303static uint64_t
1304demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pipeline, uint32_t varyings, unsigned input_count)
1305{
1306   struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_BIND_PIPELINE_LENGTH, 64);
1307
1308   agx_pack(t.cpu, BIND_PIPELINE, cfg) {
1309      cfg.tag = AGX_BIND_PIPELINE_FRAGMENT;
1310      cfg.sampler_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
1311      cfg.texture_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
1312      cfg.input_count = input_count;
1313      cfg.pipeline = pipeline;
1314      cfg.fs_varyings = varyings;
1315   };
1316
1317   return t.gpu;
1318}
1319
1320static uint64_t
1321demo_interpolation(struct agx_compiled_shader *fs, struct agx_pool *pool)
1322{
1323   struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64);
1324
1325   agx_pack(t.cpu, INTERPOLATION, cfg) {
1326      cfg.varying_count = fs->info.varyings.nr_slots;
1327   };
1328
1329   return t.gpu;
1330}
1331
1332static uint64_t
1333demo_linkage(struct agx_compiled_shader *vs, struct agx_pool *pool)
1334{
1335   struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64);
1336
1337   agx_pack(t.cpu, LINKAGE, cfg) {
1338      cfg.varying_count = vs->info.varyings.nr_slots;
1339
1340      // 0x2 for fragcoordz, 0x1 for varyings at all
1341      cfg.unk_1 = 0x210000 | (vs->info.writes_psiz ? 0x40000 : 0);
1342   };
1343
1344   return t.gpu;
1345}
1346
1347static uint64_t
1348demo_rasterizer(struct agx_context *ctx, struct agx_pool *pool, bool is_points)
1349{
1350   struct agx_rasterizer *rast = ctx->rast;
1351   struct agx_rasterizer_packed out;
1352
1353   agx_pack(&out, RASTERIZER, cfg) {
1354      bool back_stencil = ctx->zs.base.stencil[1].enabled;
1355      cfg.front.stencil_reference = ctx->stencil_ref.ref_value[0];
1356      cfg.back.stencil_reference = back_stencil ?
1357         ctx->stencil_ref.ref_value[1] :
1358         cfg.front.stencil_reference;
1359
1360      cfg.front.line_width = cfg.back.line_width = rast->line_width;
1361      cfg.front.polygon_mode = cfg.back.polygon_mode = AGX_POLYGON_MODE_FILL;
1362
1363      cfg.unk_fill_lines = is_points; /* XXX: what is this? */
1364
1365      /* Always enable scissoring so we may scissor to the viewport (TODO:
1366       * optimize this out if the viewport is the default and the app does not
1367       * use the scissor test) */
1368      cfg.scissor_enable = true;
1369   };
1370
1371   /* Words 2-3: front */
1372   out.opaque[2] |= ctx->zs.front.opaque[0];
1373   out.opaque[3] |= ctx->zs.front.opaque[1];
1374
1375   /* Words 4-5: back */
1376   out.opaque[4] |= ctx->zs.back.opaque[0];
1377   out.opaque[5] |= ctx->zs.back.opaque[1];
1378
1379   return agx_pool_upload_aligned(pool, &out, sizeof(out), 64);
1380}
1381
1382static uint64_t
1383demo_unk11(struct agx_pool *pool, bool prim_lines, bool prim_points, bool reads_tib)
1384{
1385#define UNK11_FILL_MODE_LINES_1 (1 << 26)
1386
1387#define UNK11_FILL_MODE_LINES_2 (0x5004 << 16)
1388#define UNK11_LINES (0x10000000)
1389#define UNK11_POINTS (0x40000000)
1390
1391#define UNK11_READS_TIB (0x20000000)
1392
1393   uint32_t unk[] = {
1394      0x200004a,
1395      0x200 | ((prim_lines || prim_points) ? UNK11_FILL_MODE_LINES_1 : 0) | (reads_tib ? UNK11_READS_TIB : 0),
1396      0x7e00000 | (prim_lines ? UNK11_LINES : 0) | (prim_points ? UNK11_POINTS : 0),
1397      0x7e00000 | (prim_lines ? UNK11_LINES : 0) | (prim_points ? UNK11_POINTS : 0),
1398
1399      0x1ffff
1400   };
1401
1402   return agx_pool_upload(pool, unk, sizeof(unk));
1403}
1404
1405static uint64_t
1406demo_unk12(struct agx_pool *pool)
1407{
1408   uint32_t unk[] = {
1409      0x410000,
1410      0x1e3ce508,
1411      0xa0
1412   };
1413
1414   return agx_pool_upload(pool, unk, sizeof(unk));
1415}
1416
1417static uint64_t
1418agx_set_scissor_index(struct agx_pool *pool, unsigned index)
1419{
1420   struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_SET_SCISSOR_LENGTH, 64);
1421
1422   agx_pack(T.cpu, SET_SCISSOR, cfg) {
1423      cfg.index = index;
1424   };
1425
1426   return T.gpu;
1427}
1428
1429static void
1430agx_push_record(uint8_t **out, unsigned size_words, uint64_t ptr)
1431{
1432   assert(ptr < (1ull << 40));
1433   assert(size_words < (1ull << 24));
1434
1435   uint64_t value = (size_words | (ptr << 24));
1436   memcpy(*out, &value, sizeof(value));
1437   *out += sizeof(value);
1438}
1439
1440static uint8_t *
1441agx_encode_state(struct agx_context *ctx, uint8_t *out,
1442                 uint32_t pipeline_vertex, uint32_t pipeline_fragment, uint32_t varyings,
1443                 bool is_lines, bool is_points)
1444{
1445   agx_pack(out, BIND_PIPELINE, cfg) {
1446      cfg.tag = AGX_BIND_PIPELINE_VERTEX;
1447      cfg.pipeline = pipeline_vertex;
1448      cfg.vs_output_count_1 = ctx->vs->info.varyings.nr_slots;
1449      cfg.vs_output_count_2 = ctx->vs->info.varyings.nr_slots;
1450      cfg.sampler_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
1451      cfg.texture_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
1452   }
1453
1454   /* yes, it's really 17 bytes */
1455   out += AGX_BIND_PIPELINE_LENGTH;
1456   *(out++) = 0x0;
1457
1458   struct agx_pool *pool = &ctx->batch->pool;
1459   struct agx_ptr zero = agx_pool_alloc_aligned(pool, 16, 256);
1460   memset(zero.cpu, 0, 16);
1461
1462   bool reads_tib = ctx->fs->info.reads_tib;
1463
1464   agx_push_record(&out, 0, zero.gpu);
1465   agx_push_record(&out, 5, demo_interpolation(ctx->fs, pool));
1466   agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment, varyings, ctx->fs->info.varyings.nr_descs));
1467   agx_push_record(&out, 4, demo_linkage(ctx->vs, pool));
1468   agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points));
1469   agx_push_record(&out, 5, demo_unk11(pool, is_lines, is_points, reads_tib));
1470
1471   if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR)) {
1472      struct agx_viewport_scissor vps = agx_upload_viewport_scissor(pool,
1473            ctx->batch, &ctx->viewport,
1474            ctx->rast->base.scissor ? &ctx->scissor : NULL);
1475
1476      agx_push_record(&out, 10, vps.viewport);
1477      agx_push_record(&out, 2, agx_set_scissor_index(pool, vps.scissor));
1478   }
1479
1480   agx_push_record(&out, 3, demo_unk12(pool));
1481   agx_push_record(&out, 2, agx_pool_upload(pool, ctx->rast->cull, sizeof(ctx->rast->cull)));
1482
1483   return (out - 1); // XXX: alignment fixup, or something
1484}
1485
1486static enum agx_primitive
1487agx_primitive_for_pipe(enum pipe_prim_type mode)
1488{
1489   switch (mode) {
1490   case PIPE_PRIM_POINTS: return AGX_PRIMITIVE_POINTS;
1491   case PIPE_PRIM_LINES: return AGX_PRIMITIVE_LINES;
1492   case PIPE_PRIM_LINE_STRIP: return AGX_PRIMITIVE_LINE_STRIP;
1493   case PIPE_PRIM_LINE_LOOP: return AGX_PRIMITIVE_LINE_LOOP;
1494   case PIPE_PRIM_TRIANGLES: return AGX_PRIMITIVE_TRIANGLES;
1495   case PIPE_PRIM_TRIANGLE_STRIP: return AGX_PRIMITIVE_TRIANGLE_STRIP;
1496   case PIPE_PRIM_TRIANGLE_FAN: return AGX_PRIMITIVE_TRIANGLE_FAN;
1497   case PIPE_PRIM_QUADS: return AGX_PRIMITIVE_QUADS;
1498   case PIPE_PRIM_QUAD_STRIP: return AGX_PRIMITIVE_QUAD_STRIP;
1499   default: unreachable("todo: other primitive types");
1500   }
1501}
1502
1503static uint64_t
1504agx_index_buffer_ptr(struct agx_batch *batch,
1505                     const struct pipe_draw_start_count_bias *draw,
1506                     const struct pipe_draw_info *info)
1507{
1508   off_t offset = draw->start * info->index_size;
1509
1510   if (!info->has_user_indices) {
1511      struct agx_bo *bo = agx_resource(info->index.resource)->bo;
1512      agx_batch_add_bo(batch, bo);
1513
1514      return bo->ptr.gpu + offset;
1515   } else {
1516      return agx_pool_upload_aligned(&batch->pool,
1517                                     ((uint8_t *) info->index.user) + offset,
1518                                     draw->count * info->index_size, 64);
1519   }
1520}
1521
1522static bool
1523agx_scissor_culls_everything(struct agx_context *ctx)
1524{
1525        const struct pipe_scissor_state ss = ctx->scissor;
1526
1527        return ctx->rast->base.scissor &&
1528		((ss.minx == ss.maxx) || (ss.miny == ss.maxy));
1529}
1530
1531static void
1532agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
1533             unsigned drawid_offset,
1534             const struct pipe_draw_indirect_info *indirect,
1535             const struct pipe_draw_start_count_bias *draws,
1536             unsigned num_draws)
1537{
1538   if (num_draws > 1) {
1539      util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws);
1540      return;
1541   }
1542
1543   if (info->index_size && draws->index_bias)
1544      unreachable("todo: index bias");
1545
1546   struct agx_context *ctx = agx_context(pctx);
1547   struct agx_batch *batch = ctx->batch;
1548
1549   if (agx_scissor_culls_everything(ctx))
1550	   return;
1551
1552   /* TODO: masks */
1553   ctx->batch->draw |= ~0;
1554
1555   /* TODO: Dirty track */
1556   agx_update_vs(ctx);
1557   agx_update_fs(ctx);
1558
1559   agx_batch_add_bo(batch, ctx->vs->bo);
1560   agx_batch_add_bo(batch, ctx->fs->bo);
1561
1562   bool is_lines =
1563      (info->mode == PIPE_PRIM_LINES) ||
1564      (info->mode == PIPE_PRIM_LINE_STRIP) ||
1565      (info->mode == PIPE_PRIM_LINE_LOOP);
1566
1567   uint8_t *out = agx_encode_state(ctx, batch->encoder_current,
1568                                   agx_build_pipeline(ctx, ctx->vs, PIPE_SHADER_VERTEX),
1569                                   agx_build_pipeline(ctx, ctx->fs, PIPE_SHADER_FRAGMENT),
1570                                   ctx->fs->varyings, is_lines, info->mode == PIPE_PRIM_POINTS);
1571
1572   enum agx_primitive prim = agx_primitive_for_pipe(info->mode);
1573   unsigned idx_size = info->index_size;
1574
1575   if (idx_size) {
1576      uint64_t ib = agx_index_buffer_ptr(batch, draws, info);
1577
1578      /* Index sizes are encoded logarithmically */
1579      STATIC_ASSERT(__builtin_ctz(1) == AGX_INDEX_SIZE_U8);
1580      STATIC_ASSERT(__builtin_ctz(2) == AGX_INDEX_SIZE_U16);
1581      STATIC_ASSERT(__builtin_ctz(4) == AGX_INDEX_SIZE_U32);
1582      assert((idx_size == 1) || (idx_size == 2) || (idx_size == 4));
1583
1584      agx_pack(out, INDEXED_DRAW, cfg) {
1585         cfg.restart_index = info->restart_index;
1586         cfg.unk_2a = (ib >> 32);
1587         cfg.primitive = prim;
1588         cfg.restart_enable = info->primitive_restart;
1589         cfg.index_size = __builtin_ctz(idx_size);
1590         cfg.index_buffer_offset = (ib & BITFIELD_MASK(32));
1591         cfg.index_buffer_size = ALIGN_POT(draws->count * idx_size, 4);
1592         cfg.index_count = draws->count;
1593         cfg.instance_count = info->instance_count;
1594         cfg.base_vertex = draws->index_bias;
1595      };
1596
1597      out += AGX_INDEXED_DRAW_LENGTH;
1598   } else {
1599      agx_pack(out, DRAW, cfg) {
1600         cfg.primitive = prim;
1601         cfg.vertex_start = draws->start;
1602         cfg.vertex_count = draws->count;
1603         cfg.instance_count = info->instance_count;
1604      };
1605
1606      out += AGX_DRAW_LENGTH;
1607   }
1608
1609   batch->encoder_current = out;
1610   ctx->dirty = 0;
1611}
1612
1613void agx_init_state_functions(struct pipe_context *ctx);
1614
1615void
1616agx_init_state_functions(struct pipe_context *ctx)
1617{
1618   ctx->create_blend_state = agx_create_blend_state;
1619   ctx->create_depth_stencil_alpha_state = agx_create_zsa_state;
1620   ctx->create_fs_state = agx_create_shader_state;
1621   ctx->create_rasterizer_state = agx_create_rs_state;
1622   ctx->create_sampler_state = agx_create_sampler_state;
1623   ctx->create_sampler_view = agx_create_sampler_view;
1624   ctx->create_surface = agx_create_surface;
1625   ctx->create_vertex_elements_state = agx_create_vertex_elements;
1626   ctx->create_vs_state = agx_create_shader_state;
1627   ctx->bind_blend_state = agx_bind_blend_state;
1628   ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state;
1629   ctx->bind_sampler_states = agx_bind_sampler_states;
1630   ctx->bind_fs_state = agx_bind_shader_state;
1631   ctx->bind_rasterizer_state = agx_bind_rasterizer_state;
1632   ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state;
1633   ctx->bind_vs_state = agx_bind_shader_state;
1634   ctx->delete_blend_state = agx_delete_state;
1635   ctx->delete_depth_stencil_alpha_state = agx_delete_state;
1636   ctx->delete_fs_state = agx_delete_shader_state;
1637   ctx->delete_rasterizer_state = agx_delete_state;
1638   ctx->delete_sampler_state = agx_delete_sampler_state;
1639   ctx->delete_vertex_elements_state = agx_delete_state;
1640   ctx->delete_vs_state = agx_delete_state;
1641   ctx->set_blend_color = agx_set_blend_color;
1642   ctx->set_clip_state = agx_set_clip_state;
1643   ctx->set_constant_buffer = agx_set_constant_buffer;
1644   ctx->set_sampler_views = agx_set_sampler_views;
1645   ctx->set_framebuffer_state = agx_set_framebuffer_state;
1646   ctx->set_polygon_stipple = agx_set_polygon_stipple;
1647   ctx->set_sample_mask = agx_set_sample_mask;
1648   ctx->set_scissor_states = agx_set_scissor_states;
1649   ctx->set_stencil_ref = agx_set_stencil_ref;
1650   ctx->set_vertex_buffers = agx_set_vertex_buffers;
1651   ctx->set_viewport_states = agx_set_viewport_states;
1652   ctx->sampler_view_destroy = agx_sampler_view_destroy;
1653   ctx->surface_destroy = agx_surface_destroy;
1654   ctx->draw_vbo = agx_draw_vbo;
1655   ctx->create_stream_output_target = agx_create_stream_output_target;
1656   ctx->stream_output_target_destroy = agx_stream_output_target_destroy;
1657   ctx->set_stream_output_targets = agx_set_stream_output_targets;
1658}
1659