panvk_vX_cs.c revision 7ec681f3
1/*
2 * Copyright (C) 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "genxml/gen_macros.h"
25
26#include "util/macros.h"
27#include "compiler/shader_enums.h"
28
29#include "vk_util.h"
30
31#include "panfrost-quirks.h"
32#include "pan_cs.h"
33#include "pan_encoder.h"
34#include "pan_pool.h"
35#include "pan_shader.h"
36
37#include "panvk_cs.h"
38#include "panvk_private.h"
39#include "panvk_varyings.h"
40
41static enum mali_mipmap_mode
42panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
43{
44   switch (mode) {
45   case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
46   case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
47   default: unreachable("Invalid mipmap mode");
48   }
49}
50
51static unsigned
52panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
53{
54   switch (mode) {
55   case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
56   case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
57   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
58   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
59   case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
60   default: unreachable("Invalid wrap");
61   }
62}
63
64static void
65panvk_translate_sampler_border_color(const VkSamplerCreateInfo *pCreateInfo,
66                                     uint32_t border_color[4])
67{
68   const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor =
69      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
70
71   switch (pCreateInfo->borderColor) {
72   case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
73   case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
74      border_color[0] = border_color[1] = border_color[2] = fui(0.0);
75      border_color[3] =
76         pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ?
77         fui(1.0) : fui(0.0);
78      break;
79   case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
80   case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
81      border_color[0] = border_color[1] = border_color[2] = 0;
82      border_color[3] =
83         pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
84         UINT_MAX : 0;
85      break;
86   case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
87      border_color[0] = border_color[1] = border_color[2] = border_color[3] = fui(1.0);
88      break;
89   case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
90      border_color[0] = border_color[1] = border_color[2] = border_color[3] = UINT_MAX;
91      break;
92   case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
93   case VK_BORDER_COLOR_INT_CUSTOM_EXT:
94      memcpy(border_color, pBorderColor->customBorderColor.int32, sizeof(uint32_t) * 4);
95      break;
96   default:
97      unreachable("Invalid border color");
98   }
99}
100
101static mali_pixel_format
102panvk_varying_hw_format(const struct panvk_device *dev,
103                        const struct panvk_varyings_info *varyings,
104                        gl_shader_stage stage, unsigned idx)
105{
106   const struct panfrost_device *pdev = &dev->physical_device->pdev;
107   gl_varying_slot loc = varyings->stage[stage].loc[idx];
108   bool fs = stage == MESA_SHADER_FRAGMENT;
109
110   switch (loc) {
111   case VARYING_SLOT_PNTC:
112   case VARYING_SLOT_PSIZ:
113#if PAN_ARCH <= 6
114      return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
115#else
116      return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
117#endif
118   case VARYING_SLOT_POS:
119#if PAN_ARCH <= 6
120      return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
121             panfrost_get_default_swizzle(4);
122#else
123      return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
124             MALI_RGB_COMPONENT_ORDER_RGBA;
125#endif
126   default:
127      assert(!panvk_varying_is_builtin(stage, loc));
128      if (varyings->varying[loc].format != PIPE_FORMAT_NONE)
129         return pdev->formats[varyings->varying[loc].format].hw;
130#if PAN_ARCH >= 7
131      return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
132#else
133      return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
134#endif
135   }
136}
137
138static void
139panvk_emit_varying(const struct panvk_device *dev,
140                   const struct panvk_varyings_info *varyings,
141                   gl_shader_stage stage, unsigned idx,
142                   void *attrib)
143{
144   gl_varying_slot loc = varyings->stage[stage].loc[idx];
145   bool fs = stage == MESA_SHADER_FRAGMENT;
146
147   pan_pack(attrib, ATTRIBUTE, cfg) {
148      if (!panvk_varying_is_builtin(stage, loc)) {
149         cfg.buffer_index = varyings->varying[loc].buf;
150         cfg.offset = varyings->varying[loc].offset;
151      } else {
152         cfg.buffer_index =
153            panvk_varying_buf_index(varyings,
154                                    panvk_varying_buf_id(fs, loc));
155      }
156      cfg.offset_enable = PAN_ARCH == 5;
157      cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
158   }
159}
160
161void
162panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
163                              const struct panvk_varyings_info *varyings,
164                              gl_shader_stage stage,
165                              void *descs)
166{
167   struct mali_attribute_packed *attrib = descs;
168
169   for (unsigned i = 0; i < varyings->stage[stage].count; i++)
170      panvk_emit_varying(dev, varyings, stage, i, attrib++);
171}
172
173static void
174panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
175                       enum panvk_varying_buf_id id, void *buf)
176{
177   unsigned buf_idx = panvk_varying_buf_index(varyings, id);
178
179   pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
180#if PAN_ARCH == 5
181      enum mali_attribute_special special_id = panvk_varying_special_buf_id(id);
182      if (special_id) {
183         cfg.type = 0;
184         cfg.special = special_id;
185         continue;
186      }
187#endif
188      unsigned offset = varyings->buf[buf_idx].address & 63;
189
190      cfg.stride = varyings->buf[buf_idx].stride;
191      cfg.size = varyings->buf[buf_idx].size + offset;
192      cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
193   }
194}
195
196void
197panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
198                                  void *descs)
199{
200   struct mali_attribute_buffer_packed *buf = descs;
201
202   for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
203      if (varyings->buf_mask & (1 << i))
204         panvk_emit_varying_buf(varyings, i, buf++);
205   }
206
207   /* We need an empty entry to stop prefetching on Bifrost */
208#if PAN_ARCH >= 6
209   memset(buf, 0, sizeof(*buf));
210#endif
211}
212
213static void
214panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
215                      const struct panvk_draw_info *draw,
216                      const struct panvk_attrib_buf *bufs,
217                      unsigned buf_count,
218                      unsigned idx, void *desc)
219{
220   const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
221
222#if PAN_ARCH == 5
223   if (buf_info->special) {
224      switch (buf_info->special_id) {
225      case PAN_VERTEX_ID:
226         panfrost_vertex_id(draw->padded_vertex_count, desc,
227                            draw->instance_count > 1);
228         return;
229      case PAN_INSTANCE_ID:
230         panfrost_instance_id(draw->padded_vertex_count, desc,
231                              draw->instance_count > 1);
232         return;
233      default:
234         unreachable("Invalid attribute ID");
235      }
236   }
237#endif
238
239   assert(idx < buf_count);
240   const struct panvk_attrib_buf *buf = &bufs[idx];
241   unsigned divisor = buf_info->per_instance ?
242                      draw->padded_vertex_count : 0;
243   unsigned stride = divisor && draw->instance_count == 1 ?
244                     0 : buf_info->stride;
245   mali_ptr addr = buf->address & ~63ULL;
246   unsigned size = buf->size + (buf->address & 63);
247
248   /* TODO: support instanced arrays */
249   pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
250      if (draw->instance_count > 1 && divisor) {
251         cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
252         cfg.divisor = divisor;
253      }
254
255      cfg.pointer = addr;
256      cfg.stride = stride;
257      cfg.size = size;
258   }
259}
260
261void
262panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
263                                 const struct panvk_attrib_buf *bufs,
264                                 unsigned buf_count,
265                                 const struct panvk_draw_info *draw,
266                                 void *descs)
267{
268   struct mali_attribute_buffer_packed *buf = descs;
269
270   for (unsigned i = 0; i < info->buf_count; i++)
271      panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++);
272
273   /* A NULL entry is needed to stop prefecting on Bifrost */
274#if PAN_ARCH >= 6
275   memset(buf, 0, sizeof(*buf));
276#endif
277}
278
279void
280panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
281                             void *desc)
282{
283   uint32_t border_color[4];
284
285   panvk_translate_sampler_border_color(pCreateInfo, border_color);
286
287   pan_pack(desc, SAMPLER, cfg) {
288      cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
289      cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
290      cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
291      cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
292
293      cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
294      cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
295      cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
296      cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
297      cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
298      cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
299      cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
300      cfg.border_color_r = border_color[0];
301      cfg.border_color_g = border_color[1];
302      cfg.border_color_b = border_color[2];
303      cfg.border_color_a = border_color[3];
304   }
305}
306
307static void
308panvk_emit_attrib(const struct panvk_device *dev,
309                  const struct panvk_attribs_info *attribs,
310                  const struct panvk_attrib_buf *bufs,
311                  unsigned buf_count,
312                  unsigned idx, void *attrib)
313{
314   const struct panfrost_device *pdev = &dev->physical_device->pdev;
315
316   pan_pack(attrib, ATTRIBUTE, cfg) {
317      cfg.buffer_index = attribs->attrib[idx].buf;
318      cfg.offset = attribs->attrib[idx].offset +
319                   (bufs[cfg.buffer_index].address & 63);
320      cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
321   }
322}
323
324void
325panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
326                             const struct panvk_attribs_info *attribs,
327                             const struct panvk_attrib_buf *bufs,
328                             unsigned buf_count,
329                             void *descs)
330{
331   struct mali_attribute_packed *attrib = descs;
332
333   for (unsigned i = 0; i < attribs->attrib_count; i++)
334      panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++);
335}
336
337void
338panvk_per_arch(emit_ubo)(mali_ptr address, size_t size,  void *desc)
339{
340   pan_pack(desc, UNIFORM_BUFFER, cfg) {
341      cfg.pointer = address;
342      cfg.entries = DIV_ROUND_UP(size, 16);
343   }
344}
345
346void
347panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
348                          const struct panvk_descriptor_state *state,
349                          void *descs)
350{
351   struct mali_uniform_buffer_packed *ubos = descs;
352
353   for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) {
354      const struct panvk_descriptor_set_layout *set_layout =
355         pipeline->layout->sets[i].layout;
356      const struct panvk_descriptor_set *set = state->sets[i].set;
357      unsigned offset = pipeline->layout->sets[i].ubo_offset;
358
359      if (!set_layout)
360         continue;
361
362      if (!set) {
363         unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos;
364         memset(&ubos[offset], 0, num_ubos * sizeof(*ubos));
365      } else {
366         memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos));
367         if (set_layout->num_dynoffsets) {
368            panvk_per_arch(emit_ubo)(state->sets[i].dynoffsets.gpu,
369                                     set->layout->num_dynoffsets * sizeof(uint32_t),
370                                     &ubos[offset + set_layout->num_ubos]);
371         }
372      }
373   }
374
375   for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
376      if (!pipeline->sysvals[i].ids.sysval_count)
377         continue;
378
379      panvk_per_arch(emit_ubo)(pipeline->sysvals[i].ubo ? : state->sysvals[i],
380                               pipeline->sysvals[i].ids.sysval_count * 16,
381                               &ubos[pipeline->sysvals[i].ubo_idx]);
382   }
383}
384
385void
386panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
387                                const struct panvk_draw_info *draw,
388                                void *job)
389{
390   void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
391
392   memcpy(section, &draw->invocation, pan_size(INVOCATION));
393
394   pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
395      cfg.job_task_split = 5;
396   }
397
398   pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
399      cfg.draw_descriptor_is_64b = true;
400      cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
401      cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
402      cfg.attribute_buffers = draw->attribute_bufs;
403      cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
404      cfg.varying_buffers = draw->varying_bufs;
405      cfg.thread_storage = draw->tls;
406      cfg.offset_start = draw->offset_start;
407      cfg.instance_size = draw->instance_count > 1 ?
408                          draw->padded_vertex_count : 1;
409      cfg.uniform_buffers = draw->ubos;
410      cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
411      cfg.textures = draw->textures;
412      cfg.samplers = draw->samplers;
413   }
414}
415
416static void
417panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
418                           const struct panvk_draw_info *draw,
419                           void *prim)
420{
421   pan_pack(prim, PRIMITIVE, cfg) {
422      cfg.draw_mode = pipeline->ia.topology;
423      if (pipeline->ia.writes_point_size)
424         cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
425
426      cfg.first_provoking_vertex = true;
427      if (pipeline->ia.primitive_restart)
428         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
429      cfg.job_task_split = 6;
430      /* TODO: indexed draws */
431      cfg.index_count = draw->vertex_count;
432   }
433}
434
435static void
436panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
437                                const struct panvk_draw_info *draw,
438                                void *primsz)
439{
440   pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
441      if (pipeline->ia.writes_point_size) {
442         cfg.size_array = draw->psiz;
443      } else {
444         cfg.constant = draw->line_width;
445      }
446   }
447}
448
449static void
450panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
451                     const struct panvk_draw_info *draw,
452                     void *dcd)
453{
454   pan_pack(dcd, DRAW, cfg) {
455      cfg.four_components_per_vertex = true;
456      cfg.draw_descriptor_is_64b = true;
457      cfg.front_face_ccw = pipeline->rast.front_ccw;
458      cfg.cull_front_face = pipeline->rast.cull_front_face;
459      cfg.cull_back_face = pipeline->rast.cull_back_face;
460      cfg.position = draw->position;
461      cfg.state = draw->fs_rsd;
462      cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
463      cfg.attribute_buffers = draw->attribute_bufs;
464      cfg.viewport = draw->viewport;
465      cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
466      cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
467#if PAN_ARCH == 5
468      cfg.fbd = draw->fb;
469#else
470      cfg.thread_storage = draw->tls;
471#endif
472
473      /* For all primitives but lines DRAW.flat_shading_vertex must
474       * be set to 0 and the provoking vertex is selected with the
475       * PRIMITIVE.first_provoking_vertex field.
476       */
477      if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
478          pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
479          pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
480         /* The logic is inverted on bifrost. */
481#if PAN_ARCH == 5
482         cfg.flat_shading_vertex = false;
483#else
484         cfg.flat_shading_vertex = true;
485#endif
486      }
487
488      cfg.offset_start = draw->offset_start;
489      cfg.instance_size = draw->instance_count > 1 ?
490                         draw->padded_vertex_count : 1;
491      cfg.uniform_buffers = draw->ubos;
492      cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
493      cfg.textures = draw->textures;
494      cfg.samplers = draw->samplers;
495
496      /* TODO: occlusion queries */
497   }
498}
499
500void
501panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
502                               const struct panvk_draw_info *draw,
503                               void *job)
504{
505   void *section;
506
507   section = pan_section_ptr(job, TILER_JOB, INVOCATION);
508   memcpy(section, &draw->invocation, pan_size(INVOCATION));
509
510   section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
511   panvk_emit_tiler_primitive(pipeline, draw, section);
512
513   section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
514   panvk_emit_tiler_primitive_size(pipeline, draw, section);
515
516   section = pan_section_ptr(job, TILER_JOB, DRAW);
517   panvk_emit_tiler_dcd(pipeline, draw, section);
518
519#if PAN_ARCH >= 6
520   pan_section_pack(job, TILER_JOB, TILER, cfg) {
521      cfg.address = draw->tiler_ctx->bifrost;
522   }
523   pan_section_pack(job, TILER_JOB, PADDING, padding);
524#endif
525}
526
527void
528panvk_per_arch(emit_viewport)(const VkViewport *viewport,
529                              const VkRect2D *scissor,
530                              void *vpd)
531{
532   /* The spec says "width must be greater than 0.0" */
533   assert(viewport->x >= 0);
534   int minx = (int)viewport->x;
535   int maxx = (int)(viewport->x + viewport->width);
536
537   /* Viewport height can be negative */
538   int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
539   int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
540
541   assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
542   miny = MAX2(scissor->offset.x, minx);
543   miny = MAX2(scissor->offset.y, miny);
544   maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
545   maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
546
547   /* Make sure we don't end up with a max < min when width/height is 0 */
548   maxx = maxx > minx ? maxx - 1 : maxx;
549   maxy = maxy > miny ? maxy - 1 : maxy;
550
551   assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
552   assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
553
554   pan_pack(vpd, VIEWPORT, cfg) {
555      cfg.scissor_minimum_x = minx;
556      cfg.scissor_minimum_y = miny;
557      cfg.scissor_maximum_x = maxx;
558      cfg.scissor_maximum_y = maxy;
559      cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
560      cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
561   }
562}
563
564#if PAN_ARCH >= 6
565static enum mali_register_file_format
566bifrost_blend_type_from_nir(nir_alu_type nir_type)
567{
568   switch(nir_type) {
569   case 0: /* Render target not in use */
570      return 0;
571   case nir_type_float16:
572      return MALI_REGISTER_FILE_FORMAT_F16;
573   case nir_type_float32:
574      return MALI_REGISTER_FILE_FORMAT_F32;
575   case nir_type_int32:
576      return MALI_REGISTER_FILE_FORMAT_I32;
577   case nir_type_uint32:
578      return MALI_REGISTER_FILE_FORMAT_U32;
579   case nir_type_int16:
580      return MALI_REGISTER_FILE_FORMAT_I16;
581   case nir_type_uint16:
582      return MALI_REGISTER_FILE_FORMAT_U16;
583   default:
584      unreachable("Unsupported blend shader type for NIR alu type");
585   }
586}
587#endif
588
589void
590panvk_per_arch(emit_blend)(const struct panvk_device *dev,
591                           const struct panvk_pipeline *pipeline,
592                           unsigned rt, void *bd)
593{
594   const struct pan_blend_state *blend = &pipeline->blend.state;
595   const struct pan_blend_rt_state *rts = &blend->rts[rt];
596   bool dithered = false;
597
598   pan_pack(bd, BLEND, cfg) {
599      if (!blend->rt_count || !rts->equation.color_mask) {
600         cfg.enable = false;
601#if PAN_ARCH >= 6
602         cfg.internal.mode = MALI_BLEND_MODE_OFF;
603#endif
604         continue;
605      }
606
607      cfg.srgb = util_format_is_srgb(rts->format);
608      cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
609      cfg.round_to_fb_precision = !dithered;
610
611#if PAN_ARCH <= 5
612      cfg.blend_shader = false;
613      pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
614                                           &cfg.equation);
615      cfg.constant =
616         pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
617                                blend->constants);
618#else
619      const struct panfrost_device *pdev = &dev->physical_device->pdev;
620      const struct util_format_description *format_desc =
621         util_format_description(rts->format);
622      unsigned chan_size = 0;
623      for (unsigned i = 0; i < format_desc->nr_channels; i++)
624         chan_size = MAX2(format_desc->channel[i].size, chan_size);
625
626      pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
627                                           &cfg.equation);
628
629      /* Fixed point constant */
630      float fconst =
631         pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
632                                blend->constants);
633      u16 constant = fconst * ((1 << chan_size) - 1);
634      constant <<= 16 - chan_size;
635      cfg.constant = constant;
636
637      if (pan_blend_is_opaque(blend->rts[rt].equation))
638         cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
639      else
640         cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
641
642      /* If we want the conversion to work properly,
643       * num_comps must be set to 4
644       */
645      cfg.internal.fixed_function.num_comps = 4;
646      cfg.internal.fixed_function.conversion.memory_format =
647         panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
648      cfg.internal.fixed_function.conversion.register_format =
649         bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
650      cfg.internal.fixed_function.rt = rt;
651#endif
652   }
653}
654
655void
656panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
657                                    const struct panvk_pipeline *pipeline,
658                                    unsigned rt, const float *constants,
659                                    void *bd)
660{
661   float constant = constants[pipeline->blend.constant[rt].index];
662
663   pan_pack(bd, BLEND, cfg) {
664      cfg.enable = false;
665#if PAN_ARCH == 5
666      cfg.constant = constant;
667#else
668      cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
669#endif
670   }
671}
672
673void
674panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
675                                const struct panvk_cmd_state *state,
676                                void *rsd)
677{
678   pan_pack(rsd, RENDERER_STATE, cfg) {
679      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
680         cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
681         cfg.depth_factor = state->rast.depth_bias.slope_factor;
682         cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
683      }
684
685      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
686         cfg.stencil_front.mask = state->zs.s_front.compare_mask;
687         cfg.stencil_back.mask = state->zs.s_back.compare_mask;
688      }
689
690      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
691         cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
692         cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
693      }
694
695      if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
696         cfg.stencil_front.reference_value = state->zs.s_front.ref;
697         cfg.stencil_back.reference_value = state->zs.s_back.ref;
698      }
699   }
700}
701
702void
703panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
704                                 const struct panvk_pipeline *pipeline,
705                                 void *rsd)
706{
707   const struct pan_shader_info *info = &pipeline->fs.info;
708
709   pan_pack(rsd, RENDERER_STATE, cfg) {
710      if (pipeline->fs.required) {
711         pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg);
712
713#if PAN_ARCH == 5
714         /* If either depth or stencil is enabled, discard matters */
715         bool zs_enabled =
716            (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) ||
717            pipeline->zs.s_test;
718
719         cfg.properties.work_register_count = info->work_reg_count;
720         cfg.properties.force_early_z =
721            info->fs.can_early_z && !pipeline->ms.alpha_to_coverage &&
722            pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS;
723
724
725         /* Workaround a hardware errata where early-z cannot be enabled
726          * when discarding even when the depth buffer is read-only, by
727          * lying to the hardware about the discard and setting the
728          * reads tilebuffer? flag to compensate */
729         cfg.properties.shader_reads_tilebuffer =
730            info->fs.outputs_read ||
731            (!zs_enabled && info->fs.can_discard);
732         cfg.properties.shader_contains_discard =
733            zs_enabled && info->fs.can_discard;
734#else
735         uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0;
736         uint8_t rt_mask = pipeline->fs.rt_mask;
737         cfg.properties.allow_forward_pixel_to_kill =
738                 pipeline->fs.info.fs.can_fpk &&
739                 !(rt_mask & ~rt_written) &&
740                 !pipeline->ms.alpha_to_coverage &&
741                 !pipeline->blend.reads_dest;
742#endif
743      } else {
744#if PAN_ARCH == 5
745         cfg.shader.shader = 0x1;
746         cfg.properties.work_register_count = 1;
747         cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
748         cfg.properties.force_early_z = true;
749#else
750         cfg.properties.shader_modifies_coverage = true;
751         cfg.properties.allow_forward_pixel_to_kill = true;
752         cfg.properties.allow_forward_pixel_to_be_killed = true;
753         cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
754#endif
755      }
756
757      bool msaa = pipeline->ms.rast_samples > 1;
758      cfg.multisample_misc.multisample_enable = msaa;
759      cfg.multisample_misc.sample_mask =
760         msaa ? pipeline->ms.sample_mask : UINT16_MAX;
761
762      cfg.multisample_misc.depth_function =
763         pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
764
765      cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
766      cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
767      cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
768      cfg.multisample_misc.shader_depth_range_fixed = true;
769
770      cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
771      cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
772      cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
773      cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable;
774      cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable;
775      cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
776
777      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
778         cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
779         cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
780         cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
781      }
782
783      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
784         cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
785         cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
786      }
787
788      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
789         cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
790         cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
791      }
792
793      if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
794         cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
795         cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
796      }
797
798      cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
799      cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
800      cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
801      cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
802      cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
803      cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
804      cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
805      cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
806   }
807}
808
809void
810panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
811                                const struct pan_shader_info *shader_info,
812                                mali_ptr shader_ptr,
813                                void *rsd)
814{
815   assert(shader_info->stage != MESA_SHADER_FRAGMENT);
816
817   pan_pack(rsd, RENDERER_STATE, cfg) {
818      pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
819   }
820}
821
822void
823panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
824                                   unsigned width, unsigned height,
825                                   const struct panfrost_ptr *descs)
826{
827#if PAN_ARCH == 5
828   unreachable("Not supported on v5");
829#else
830   const struct panfrost_device *pdev = &dev->physical_device->pdev;
831
832   pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
833      cfg.size = pdev->tiler_heap->size;
834      cfg.base = pdev->tiler_heap->ptr.gpu;
835      cfg.bottom = pdev->tiler_heap->ptr.gpu;
836      cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
837   }
838
839   pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
840      cfg.hierarchy_mask = 0x28;
841      cfg.fb_width = width;
842      cfg.fb_height = height;
843      cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
844   }
845#endif
846}
847