panvk_vX_pipeline.c revision 7ec681f3
1/*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_pipeline.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#include "panvk_cs.h"
30#include "panvk_private.h"
31
32#include "pan_bo.h"
33
34#include "nir/nir.h"
35#include "nir/nir_builder.h"
36#include "spirv/nir_spirv.h"
37#include "util/debug.h"
38#include "util/mesa-sha1.h"
39#include "util/u_atomic.h"
40#include "vk_format.h"
41#include "vk_util.h"
42
43#include "panfrost/util/pan_lower_framebuffer.h"
44
45#include "panfrost-quirks.h"
46
47struct panvk_pipeline_builder
48{
49   struct panvk_device *device;
50   struct panvk_pipeline_cache *cache;
51   const VkAllocationCallbacks *alloc;
52   const VkGraphicsPipelineCreateInfo *create_info;
53   const struct panvk_pipeline_layout *layout;
54
55   struct panvk_shader *shaders[MESA_SHADER_STAGES];
56   struct {
57      uint32_t shader_offset;
58      uint32_t rsd_offset;
59      uint32_t sysvals_offset;
60   } stages[MESA_SHADER_STAGES];
61   uint32_t blend_shader_offsets[MAX_RTS];
62   uint32_t shader_total_size;
63   uint32_t static_state_size;
64   uint32_t vpd_offset;
65
66   bool rasterizer_discard;
67   /* these states are affectd by rasterizer_discard */
68   VkSampleCountFlagBits samples;
69   bool use_depth_stencil_attachment;
70   uint8_t active_color_attachments;
71   enum pipe_format color_attachment_formats[MAX_RTS];
72};
73
74static VkResult
75panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder,
76                                       struct panvk_pipeline **out_pipeline)
77{
78   struct panvk_device *dev = builder->device;
79
80   struct panvk_pipeline *pipeline =
81      vk_object_zalloc(&dev->vk, builder->alloc,
82                       sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE);
83   if (!pipeline)
84      return VK_ERROR_OUT_OF_HOST_MEMORY;
85
86   pipeline->layout = builder->layout;
87   *out_pipeline = pipeline;
88   return VK_SUCCESS;
89}
90
91static void
92panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder)
93{
94   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
95      if (!builder->shaders[i])
96         continue;
97      panvk_shader_destroy(builder->device, builder->shaders[i], builder->alloc);
98   }
99}
100
101static bool
102panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id)
103{
104   return !(pipeline->dynamic_state_mask & (1 << id));
105}
106
107static VkResult
108panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
109                                       struct panvk_pipeline *pipeline)
110{
111   const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
112      NULL
113   };
114   for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
115      gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
116      stage_infos[stage] = &builder->create_info->pStages[i];
117   }
118
119   /* compile shaders in reverse order */
120   unsigned sysval_ubo = builder->layout->num_ubos;
121
122   for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
123        stage > MESA_SHADER_NONE; stage--) {
124      const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
125      if (!stage_info)
126         continue;
127
128      struct panvk_shader *shader;
129
130      shader = panvk_per_arch(shader_create)(builder->device, stage, stage_info,
131                                             builder->layout, sysval_ubo,
132                                             &pipeline->blend.state,
133                                             panvk_pipeline_static_state(pipeline,
134                                                                         VK_DYNAMIC_STATE_BLEND_CONSTANTS),
135                                             builder->alloc);
136      if (!shader)
137         return VK_ERROR_OUT_OF_HOST_MEMORY;
138
139      if (shader->info.sysvals.sysval_count)
140         sysval_ubo++;
141
142      builder->shaders[stage] = shader;
143      builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128);
144      builder->stages[stage].shader_offset = builder->shader_total_size;
145      builder->shader_total_size +=
146         util_dynarray_num_elements(&shader->binary, uint8_t);
147   }
148
149   return VK_SUCCESS;
150}
151
152static VkResult
153panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder,
154                                      struct panvk_pipeline *pipeline)
155{
156   struct panfrost_bo *bin_bo =
157      panfrost_bo_create(&builder->device->physical_device->pdev,
158                         builder->shader_total_size, PAN_BO_EXECUTE,
159                         "Shader");
160
161   pipeline->binary_bo = bin_bo;
162   panfrost_bo_mmap(bin_bo);
163
164   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
165      const struct panvk_shader *shader = builder->shaders[i];
166      if (!shader)
167         continue;
168
169      memcpy(pipeline->binary_bo->ptr.cpu + builder->stages[i].shader_offset,
170             util_dynarray_element(&shader->binary, uint8_t, 0),
171             util_dynarray_num_elements(&shader->binary, uint8_t));
172   }
173
174   return VK_SUCCESS;
175}
176
177static bool
178panvk_pipeline_static_sysval(struct panvk_pipeline *pipeline,
179                             unsigned id)
180{
181   switch (id) {
182   case PAN_SYSVAL_VIEWPORT_SCALE:
183   case PAN_SYSVAL_VIEWPORT_OFFSET:
184      return panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT);
185   default:
186      return false;
187   }
188}
189
190static void
191panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *builder,
192                                             struct panvk_pipeline *pipeline)
193{
194   struct panfrost_device *pdev =
195      &builder->device->physical_device->pdev;
196   unsigned bo_size = 0;
197
198   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
199      const struct panvk_shader *shader = builder->shaders[i];
200      if (!shader)
201         continue;
202
203      if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT)
204         continue;
205
206      bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE));
207      builder->stages[i].rsd_offset = bo_size;
208      bo_size += pan_size(RENDERER_STATE);
209      if (i == MESA_SHADER_FRAGMENT)
210         bo_size += pan_size(BLEND) * MAX2(pipeline->blend.state.rt_count, 1);
211   }
212
213   if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
214       panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
215      bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT));
216      builder->vpd_offset = bo_size;
217      bo_size += pan_size(VIEWPORT);
218   }
219
220   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
221      const struct panvk_shader *shader = builder->shaders[i];
222      if (!shader || !shader->info.sysvals.sysval_count)
223         continue;
224
225      bool static_sysvals = true;
226      for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) {
227         unsigned id = shader->info.sysvals.sysvals[i];
228         static_sysvals &= panvk_pipeline_static_sysval(pipeline, id);
229         switch (PAN_SYSVAL_TYPE(id)) {
230         case PAN_SYSVAL_VIEWPORT_SCALE:
231         case PAN_SYSVAL_VIEWPORT_OFFSET:
232            pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VIEWPORT;
233            break;
234         default:
235            break;
236         }
237      }
238
239      if (!static_sysvals) {
240         builder->stages[i].sysvals_offset = ~0;
241         continue;
242      }
243
244      bo_size = ALIGN_POT(bo_size, 16);
245      builder->stages[i].sysvals_offset = bo_size;
246      bo_size += shader->info.sysvals.sysval_count * 16;
247   }
248
249   if (bo_size) {
250      pipeline->state_bo =
251         panfrost_bo_create(pdev, bo_size, 0, "Pipeline descriptors");
252      panfrost_bo_mmap(pipeline->state_bo);
253   }
254}
255
256static void
257panvk_pipeline_builder_upload_sysval(struct panvk_pipeline_builder *builder,
258                                     struct panvk_pipeline *pipeline,
259                                     unsigned id, union panvk_sysval_data *data)
260{
261   switch (PAN_SYSVAL_TYPE(id)) {
262   case PAN_SYSVAL_VIEWPORT_SCALE:
263      panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports,
264                                         data);
265      break;
266   case PAN_SYSVAL_VIEWPORT_OFFSET:
267      panvk_sysval_upload_viewport_offset(builder->create_info->pViewportState->pViewports,
268                                          data);
269      break;
270   default:
271      unreachable("Invalid static sysval");
272   }
273}
274
275static void
276panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
277                                    struct panvk_pipeline *pipeline,
278                                    gl_shader_stage stage)
279{
280   const struct panvk_shader *shader = builder->shaders[stage];
281
282   pipeline->sysvals[stage].ids = shader->info.sysvals;
283   pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
284
285   if (!shader->info.sysvals.sysval_count ||
286       builder->stages[stage].sysvals_offset == ~0)
287      return;
288
289   union panvk_sysval_data *static_data =
290      pipeline->state_bo->ptr.cpu + builder->stages[stage].sysvals_offset;
291
292   pipeline->sysvals[stage].ubo =
293      pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset;
294
295   for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) {
296      unsigned id = shader->info.sysvals.sysvals[i];
297
298      panvk_pipeline_builder_upload_sysval(builder,
299                                           pipeline,
300                                           id, &static_data[i]);
301   }
302}
303
304static void
305panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
306                                    struct panvk_pipeline *pipeline)
307{
308   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
309      const struct panvk_shader *shader = builder->shaders[i];
310      if (!shader)
311         continue;
312
313      pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size);
314      pipeline->wls_size = MAX2(pipeline->wls_size, shader->info.wls_size);
315
316      if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size)
317         pipeline->ia.writes_point_size = true;
318
319      mali_ptr shader_ptr = pipeline->binary_bo->ptr.gpu +
320                            builder->stages[i].shader_offset;
321
322      void *rsd = pipeline->state_bo->ptr.cpu + builder->stages[i].rsd_offset;
323      mali_ptr gpu_rsd = pipeline->state_bo->ptr.gpu + builder->stages[i].rsd_offset;
324
325      if (i != MESA_SHADER_FRAGMENT) {
326         panvk_per_arch(emit_non_fs_rsd)(builder->device, &shader->info, shader_ptr, rsd);
327      } else if (!pipeline->fs.dynamic_rsd) {
328         void *bd = rsd + pan_size(RENDERER_STATE);
329
330         panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, rsd);
331         for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) {
332            panvk_per_arch(emit_blend)(builder->device, pipeline, rt, bd);
333            bd += pan_size(BLEND);
334         }
335      } else {
336         gpu_rsd = 0;
337         panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, &pipeline->fs.rsd_template);
338         for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) {
339            panvk_per_arch(emit_blend)(builder->device, pipeline, rt,
340                                       &pipeline->blend.bd_template[rt]);
341         }
342      }
343
344      pipeline->rsds[i] = gpu_rsd;
345      panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
346   }
347
348   pipeline->num_ubos = builder->layout->num_ubos;
349   for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
350      if (pipeline->sysvals[i].ids.sysval_count)
351         pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1);
352   }
353
354   pipeline->num_sysvals = 0;
355   for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++)
356      pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count;
357}
358
359
360static void
361panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder,
362                                      struct panvk_pipeline *pipeline)
363{
364   /* The spec says:
365    *
366    *    pViewportState is a pointer to an instance of the
367    *    VkPipelineViewportStateCreateInfo structure, and is ignored if the
368    *    pipeline has rasterization disabled.
369    */
370   if (!builder->rasterizer_discard &&
371       panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
372       panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
373      void *vpd = pipeline->state_bo->ptr.cpu + builder->vpd_offset;
374      panvk_per_arch(emit_viewport)(builder->create_info->pViewportState->pViewports,
375                                    builder->create_info->pViewportState->pScissors,
376                                    vpd);
377      pipeline->vpd = pipeline->state_bo->ptr.gpu +
378                      builder->vpd_offset;
379   }
380   if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT))
381      pipeline->viewport = builder->create_info->pViewportState->pViewports[0];
382
383   if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR))
384      pipeline->scissor = builder->create_info->pViewportState->pScissors[0];
385}
386
387static void
388panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder,
389                                     struct panvk_pipeline *pipeline)
390{
391   const VkPipelineDynamicStateCreateInfo *dynamic_info =
392      builder->create_info->pDynamicState;
393
394   if (!dynamic_info)
395      return;
396
397   for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
398      VkDynamicState state = dynamic_info->pDynamicStates[i];
399      switch (state) {
400      case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
401         pipeline->dynamic_state_mask |= 1 << state;
402         break;
403      default:
404         unreachable("unsupported dynamic state");
405      }
406   }
407
408}
409
410static enum mali_draw_mode
411translate_prim_topology(VkPrimitiveTopology in)
412{
413   switch (in) {
414   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
415      return MALI_DRAW_MODE_POINTS;
416   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
417      return MALI_DRAW_MODE_LINES;
418   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
419      return MALI_DRAW_MODE_LINE_STRIP;
420   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
421      return MALI_DRAW_MODE_TRIANGLES;
422   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
423      return MALI_DRAW_MODE_TRIANGLE_STRIP;
424   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
425      return MALI_DRAW_MODE_TRIANGLE_FAN;
426   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
427   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
428   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
429   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
430   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
431   default:
432      unreachable("Invalid primitive type");
433   }
434}
435
436static void
437panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder *builder,
438                                            struct panvk_pipeline *pipeline)
439{
440   pipeline->ia.primitive_restart =
441      builder->create_info->pInputAssemblyState->primitiveRestartEnable;
442   pipeline->ia.topology =
443      translate_prim_topology(builder->create_info->pInputAssemblyState->topology);
444}
445
446static enum pipe_logicop
447translate_logicop(VkLogicOp in)
448{
449   switch (in) {
450   case VK_LOGIC_OP_CLEAR: return PIPE_LOGICOP_CLEAR;
451   case VK_LOGIC_OP_AND: return PIPE_LOGICOP_AND;
452   case VK_LOGIC_OP_AND_REVERSE: return PIPE_LOGICOP_AND_REVERSE;
453   case VK_LOGIC_OP_COPY: return PIPE_LOGICOP_COPY;
454   case VK_LOGIC_OP_AND_INVERTED: return PIPE_LOGICOP_AND_INVERTED;
455   case VK_LOGIC_OP_NO_OP: return PIPE_LOGICOP_NOOP;
456   case VK_LOGIC_OP_XOR: return PIPE_LOGICOP_XOR;
457   case VK_LOGIC_OP_OR: return PIPE_LOGICOP_OR;
458   case VK_LOGIC_OP_NOR: return PIPE_LOGICOP_NOR;
459   case VK_LOGIC_OP_EQUIVALENT: return PIPE_LOGICOP_EQUIV;
460   case VK_LOGIC_OP_INVERT: return PIPE_LOGICOP_INVERT;
461   case VK_LOGIC_OP_OR_REVERSE: return PIPE_LOGICOP_OR_REVERSE;
462   case VK_LOGIC_OP_COPY_INVERTED: return PIPE_LOGICOP_COPY_INVERTED;
463   case VK_LOGIC_OP_OR_INVERTED: return PIPE_LOGICOP_OR_INVERTED;
464   case VK_LOGIC_OP_NAND: return PIPE_LOGICOP_NAND;
465   case VK_LOGIC_OP_SET: return PIPE_LOGICOP_SET;
466   default: unreachable("Invalid logicop");
467   }
468}
469
470static enum blend_func
471translate_blend_op(VkBlendOp in)
472{
473   switch (in) {
474   case VK_BLEND_OP_ADD: return BLEND_FUNC_ADD;
475   case VK_BLEND_OP_SUBTRACT: return BLEND_FUNC_SUBTRACT;
476   case VK_BLEND_OP_REVERSE_SUBTRACT: return BLEND_FUNC_REVERSE_SUBTRACT;
477   case VK_BLEND_OP_MIN: return BLEND_FUNC_MIN;
478   case VK_BLEND_OP_MAX: return BLEND_FUNC_MAX;
479   default: unreachable("Invalid blend op");
480   }
481}
482
483static enum blend_factor
484translate_blend_factor(VkBlendFactor in, bool dest_has_alpha)
485{
486   switch (in) {
487   case VK_BLEND_FACTOR_ZERO:
488   case VK_BLEND_FACTOR_ONE:
489      return BLEND_FACTOR_ZERO;
490   case VK_BLEND_FACTOR_SRC_COLOR:
491   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
492      return BLEND_FACTOR_SRC_COLOR;
493   case VK_BLEND_FACTOR_DST_COLOR:
494   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
495      return BLEND_FACTOR_DST_COLOR;
496   case VK_BLEND_FACTOR_SRC_ALPHA:
497   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
498      return BLEND_FACTOR_SRC_ALPHA;
499   case VK_BLEND_FACTOR_DST_ALPHA:
500   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
501      return dest_has_alpha ? BLEND_FACTOR_DST_ALPHA : BLEND_FACTOR_ZERO;
502   case VK_BLEND_FACTOR_CONSTANT_COLOR:
503   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
504      return BLEND_FACTOR_CONSTANT_COLOR;
505   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
506   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
507      return BLEND_FACTOR_CONSTANT_ALPHA;
508   case VK_BLEND_FACTOR_SRC1_COLOR:
509   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
510      return BLEND_FACTOR_SRC1_COLOR;
511   case VK_BLEND_FACTOR_SRC1_ALPHA:
512   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
513      return BLEND_FACTOR_SRC1_ALPHA;
514   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
515      return BLEND_FACTOR_SRC_ALPHA_SATURATE;
516   default: unreachable("Invalid blend factor");
517   }
518}
519
520static bool
521inverted_blend_factor(VkBlendFactor in, bool dest_has_alpha)
522{
523   switch (in) {
524   case VK_BLEND_FACTOR_ONE:
525   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
526   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
527   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
528   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
529   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
530   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
531   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
532      return true;
533   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
534      return dest_has_alpha ? true : false;
535   case VK_BLEND_FACTOR_DST_ALPHA:
536      return !dest_has_alpha ? true : false;
537   default:
538      return false;
539   }
540}
541
542bool
543panvk_per_arch(blend_needs_lowering)(const struct panfrost_device *dev,
544                                     const struct pan_blend_state *state,
545                                     unsigned rt)
546{
547   /* LogicOp requires a blend shader */
548   if (state->logicop_enable)
549      return true;
550
551   /* Not all formats can be blended by fixed-function hardware */
552   if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal)
553      return true;
554
555   unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation);
556
557   /* v6 doesn't support blend constants in FF blend equations.
558    * v7 only uses the constant from RT 0 (TODO: what if it's the same
559    * constant? or a constant is shared?)
560    */
561   if (constant_mask && (PAN_ARCH == 6 || (PAN_ARCH == 7 && rt > 0)))
562      return true;
563
564   if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
565      return true;
566
567   bool supports_2src = pan_blend_supports_2src(dev->arch);
568   return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src);
569}
570
571static void
572panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder,
573                                         struct panvk_pipeline *pipeline)
574{
575   struct panfrost_device *pdev = &builder->device->physical_device->pdev;
576   pipeline->blend.state.logicop_enable =
577      builder->create_info->pColorBlendState->logicOpEnable;
578   pipeline->blend.state.logicop_func =
579      translate_logicop(builder->create_info->pColorBlendState->logicOp);
580   pipeline->blend.state.rt_count = util_last_bit(builder->active_color_attachments);
581   memcpy(pipeline->blend.state.constants,
582          builder->create_info->pColorBlendState->blendConstants,
583          sizeof(pipeline->blend.state.constants));
584
585   for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
586      const VkPipelineColorBlendAttachmentState *in =
587         &builder->create_info->pColorBlendState->pAttachments[i];
588      struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i];
589
590      out->format = builder->color_attachment_formats[i];
591
592      bool dest_has_alpha = util_format_has_alpha(out->format);
593
594      out->nr_samples = builder->create_info->pMultisampleState->rasterizationSamples;
595      out->equation.blend_enable = in->blendEnable;
596      out->equation.color_mask = in->colorWriteMask;
597      out->equation.rgb_func = translate_blend_op(in->colorBlendOp);
598      out->equation.rgb_src_factor = translate_blend_factor(in->srcColorBlendFactor, dest_has_alpha);
599      out->equation.rgb_invert_src_factor = inverted_blend_factor(in->srcColorBlendFactor, dest_has_alpha);
600      out->equation.rgb_dst_factor = translate_blend_factor(in->dstColorBlendFactor, dest_has_alpha);
601      out->equation.rgb_invert_dst_factor = inverted_blend_factor(in->dstColorBlendFactor, dest_has_alpha);
602      out->equation.alpha_func = translate_blend_op(in->alphaBlendOp);
603      out->equation.alpha_src_factor = translate_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha);
604      out->equation.alpha_invert_src_factor = inverted_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha);
605      out->equation.alpha_dst_factor = translate_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha);
606      out->equation.alpha_invert_dst_factor = inverted_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha);
607
608      pipeline->blend.reads_dest |= pan_blend_reads_dest(out->equation);
609
610      unsigned constant_mask =
611         panvk_per_arch(blend_needs_lowering)(pdev, &pipeline->blend.state, i) ?
612         0 : pan_blend_constant_mask(out->equation);
613      pipeline->blend.constant[i].index = ffs(constant_mask) - 1;
614      if (constant_mask && PAN_ARCH >= 6) {
615         /* On Bifrost, the blend constant is expressed with a UNORM of the
616          * size of the target format. The value is then shifted such that
617          * used bits are in the MSB. Here we calculate the factor at pipeline
618          * creation time so we only have to do a
619          *   hw_constant = float_constant * factor;
620          * at descriptor emission time.
621          */
622         const struct util_format_description *format_desc =
623            util_format_description(out->format);
624         unsigned chan_size = 0;
625         for (unsigned c = 0; c < format_desc->nr_channels; c++)
626            chan_size = MAX2(format_desc->channel[c].size, chan_size);
627         pipeline->blend.constant[i].bifrost_factor =
628            ((1 << chan_size) - 1) << (16 - chan_size);
629      }
630   }
631}
632
633static void
634panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder,
635                                         struct panvk_pipeline *pipeline)
636{
637   unsigned nr_samples =
638      MAX2(builder->create_info->pMultisampleState->rasterizationSamples, 1);
639
640   pipeline->ms.rast_samples =
641      builder->create_info->pMultisampleState->rasterizationSamples;
642   pipeline->ms.sample_mask =
643      builder->create_info->pMultisampleState->pSampleMask ?
644      builder->create_info->pMultisampleState->pSampleMask[0] : UINT16_MAX;
645   pipeline->ms.min_samples =
646      MAX2(builder->create_info->pMultisampleState->minSampleShading * nr_samples, 1);
647}
648
649static enum mali_stencil_op
650translate_stencil_op(VkStencilOp in)
651{
652   switch (in) {
653   case VK_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
654   case VK_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
655   case VK_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
656   case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return MALI_STENCIL_OP_INCR_SAT;
657   case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return MALI_STENCIL_OP_DECR_SAT;
658   case VK_STENCIL_OP_INCREMENT_AND_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
659   case VK_STENCIL_OP_DECREMENT_AND_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
660   case VK_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
661   default: unreachable("Invalid stencil op");
662   }
663}
664
665static void
666panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder,
667                                struct panvk_pipeline *pipeline)
668{
669   pipeline->zs.z_test = builder->create_info->pDepthStencilState->depthTestEnable;
670   pipeline->zs.z_write = builder->create_info->pDepthStencilState->depthWriteEnable;
671   pipeline->zs.z_compare_func =
672      panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->depthCompareOp);
673   pipeline->zs.s_test = builder->create_info->pDepthStencilState->stencilTestEnable;
674   pipeline->zs.s_front.fail_op =
675      translate_stencil_op(builder->create_info->pDepthStencilState->front.failOp);
676   pipeline->zs.s_front.pass_op =
677      translate_stencil_op(builder->create_info->pDepthStencilState->front.passOp);
678   pipeline->zs.s_front.z_fail_op =
679      translate_stencil_op(builder->create_info->pDepthStencilState->front.depthFailOp);
680   pipeline->zs.s_front.compare_func =
681      panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->front.compareOp);
682   pipeline->zs.s_front.compare_mask =
683      builder->create_info->pDepthStencilState->front.compareMask;
684   pipeline->zs.s_front.write_mask =
685      builder->create_info->pDepthStencilState->front.writeMask;
686   pipeline->zs.s_front.ref =
687      builder->create_info->pDepthStencilState->front.reference;
688   pipeline->zs.s_back.fail_op =
689      translate_stencil_op(builder->create_info->pDepthStencilState->back.failOp);
690   pipeline->zs.s_back.pass_op =
691      translate_stencil_op(builder->create_info->pDepthStencilState->back.passOp);
692   pipeline->zs.s_back.z_fail_op =
693      translate_stencil_op(builder->create_info->pDepthStencilState->back.depthFailOp);
694   pipeline->zs.s_back.compare_func =
695      panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->back.compareOp);
696   pipeline->zs.s_back.compare_mask =
697      builder->create_info->pDepthStencilState->back.compareMask;
698   pipeline->zs.s_back.write_mask =
699      builder->create_info->pDepthStencilState->back.writeMask;
700   pipeline->zs.s_back.ref =
701      builder->create_info->pDepthStencilState->back.reference;
702}
703
704static void
705panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder,
706                                  struct panvk_pipeline *pipeline)
707{
708   pipeline->rast.clamp_depth = builder->create_info->pRasterizationState->depthClampEnable;
709   pipeline->rast.depth_bias.enable = builder->create_info->pRasterizationState->depthBiasEnable;
710   pipeline->rast.depth_bias.constant_factor =
711      builder->create_info->pRasterizationState->depthBiasConstantFactor;
712   pipeline->rast.depth_bias.clamp = builder->create_info->pRasterizationState->depthBiasClamp;
713   pipeline->rast.depth_bias.slope_factor = builder->create_info->pRasterizationState->depthBiasSlopeFactor;
714   pipeline->rast.front_ccw = builder->create_info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
715   pipeline->rast.cull_front_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT;
716   pipeline->rast.cull_back_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT;
717}
718
719static bool
720panvk_fs_required(struct panvk_pipeline *pipeline)
721{
722   const struct pan_shader_info *info = &pipeline->fs.info;
723
724   /* If we generally have side effects */
725   if (info->fs.sidefx)
726      return true;
727
728    /* If colour is written we need to execute */
729    const struct pan_blend_state *blend = &pipeline->blend.state;
730    for (unsigned i = 0; i < blend->rt_count; ++i) {
731       if (blend->rts[i].equation.color_mask)
732          return true;
733    }
734
735    /* If depth is written and not implied we need to execute.
736     * TODO: Predicate on Z/S writes being enabled */
737    return (info->fs.writes_depth || info->fs.writes_stencil);
738}
739
740#define PANVK_DYNAMIC_FS_RSD_MASK \
741        ((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \
742         (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \
743         (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \
744         (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \
745         (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
746
747static void
748panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder,
749                                     struct panvk_pipeline *pipeline)
750{
751   if (!builder->shaders[MESA_SHADER_FRAGMENT])
752      return;
753
754   pipeline->fs.dynamic_rsd =
755      pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK;
756   pipeline->fs.address = pipeline->binary_bo->ptr.gpu +
757                          builder->stages[MESA_SHADER_FRAGMENT].shader_offset;
758   pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info;
759   pipeline->fs.rt_mask = builder->active_color_attachments;
760   pipeline->fs.required = panvk_fs_required(pipeline);
761}
762
763static void
764panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings,
765                                   gl_shader_stage stage,
766                                   const struct pan_shader_varying *varying,
767                                   bool input)
768{
769   bool fs = stage == MESA_SHADER_FRAGMENT;
770   gl_varying_slot loc = varying->location;
771   enum panvk_varying_buf_id buf_id =
772      panvk_varying_buf_id(fs, loc);
773
774   varyings->stage[stage].loc[varyings->stage[stage].count++] = loc;
775
776   if (panvk_varying_is_builtin(stage, loc)) {
777      varyings->buf_mask |= 1 << buf_id;
778      return;
779   }
780
781   assert(loc < ARRAY_SIZE(varyings->varying));
782
783   enum pipe_format new_fmt = varying->format;
784   enum pipe_format old_fmt = varyings->varying[loc].format;
785
786   BITSET_SET(varyings->active, loc);
787
788   /* We expect inputs to either be set by a previous stage or be built
789    * in, skip the entry if that's not the case, we'll emit a const
790    * varying returning zero for those entries.
791    */
792   if (input && old_fmt == PIPE_FORMAT_NONE)
793      return;
794
795   unsigned new_size = util_format_get_blocksize(new_fmt);
796   unsigned old_size = util_format_get_blocksize(old_fmt);
797
798   if (old_size < new_size)
799      varyings->varying[loc].format = new_fmt;
800
801   varyings->buf_mask |= 1 << buf_id;
802}
803
804static void
805panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder,
806                                        struct panvk_pipeline *pipeline)
807{
808   for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) {
809      if (!builder->shaders[s])
810         continue;
811
812      const struct pan_shader_info *info = &builder->shaders[s]->info;
813
814      for (unsigned i = 0; i < info->varyings.input_count; i++) {
815         panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
816                                            &info->varyings.input[i],
817                                            true);
818      }
819
820      for (unsigned i = 0; i < info->varyings.output_count; i++) {
821         panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
822                                            &info->varyings.output[i],
823                                            false);
824      }
825   }
826
827   /* TODO: Xfb */
828   gl_varying_slot loc;
829   BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) {
830      if (pipeline->varyings.varying[loc].format == PIPE_FORMAT_NONE)
831         continue;
832
833      enum panvk_varying_buf_id buf_id =
834         panvk_varying_buf_id(false, loc);
835      unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id);
836      unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc);
837
838      pipeline->varyings.varying[loc].buf = buf_idx;
839      pipeline->varyings.varying[loc].offset =
840         pipeline->varyings.buf[buf_idx].stride;
841      pipeline->varyings.buf[buf_idx].stride += varying_sz;
842   }
843}
844
845static void
846panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder,
847                                          struct panvk_pipeline *pipeline)
848{
849   struct panvk_attribs_info *attribs = &pipeline->attribs;
850   const VkPipelineVertexInputStateCreateInfo *info =
851      builder->create_info->pVertexInputState;
852
853   for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) {
854      const VkVertexInputBindingDescription *desc =
855         &info->pVertexBindingDescriptions[i];
856      attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count);
857      attribs->buf[desc->binding].stride = desc->stride;
858      attribs->buf[desc->binding].special = false;
859   }
860
861   for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) {
862      const VkVertexInputAttributeDescription *desc =
863         &info->pVertexAttributeDescriptions[i];
864      attribs->attrib[desc->location].buf = desc->binding;
865      attribs->attrib[desc->location].format =
866         vk_format_to_pipe_format(desc->format);
867      attribs->attrib[desc->location].offset = desc->offset;
868   }
869
870   const struct pan_shader_info *vs =
871      &builder->shaders[MESA_SHADER_VERTEX]->info;
872
873   if (vs->attribute_count >= PAN_VERTEX_ID) {
874      attribs->buf[attribs->buf_count].special = true;
875      attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID;
876      attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++;
877      attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT;
878   }
879
880   if (vs->attribute_count >= PAN_INSTANCE_ID) {
881      attribs->buf[attribs->buf_count].special = true;
882      attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID;
883      attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++;
884      attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT;
885   }
886
887   attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count);
888}
889
890static VkResult
891panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder,
892                             struct panvk_pipeline **pipeline)
893{
894   VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline);
895   if (result != VK_SUCCESS)
896      return result;
897
898   /* TODO: make those functions return a result and handle errors */
899   panvk_pipeline_builder_parse_dynamic(builder, *pipeline);
900   panvk_pipeline_builder_parse_color_blend(builder, *pipeline);
901   panvk_pipeline_builder_compile_shaders(builder, *pipeline);
902   panvk_pipeline_builder_collect_varyings(builder, *pipeline);
903   panvk_pipeline_builder_parse_input_assembly(builder, *pipeline);
904   panvk_pipeline_builder_parse_multisample(builder, *pipeline);
905   panvk_pipeline_builder_parse_zs(builder, *pipeline);
906   panvk_pipeline_builder_parse_rast(builder, *pipeline);
907   panvk_pipeline_builder_parse_vertex_input(builder, *pipeline);
908
909
910   panvk_pipeline_builder_upload_shaders(builder, *pipeline);
911   panvk_pipeline_builder_init_fs_state(builder, *pipeline);
912   panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
913   panvk_pipeline_builder_init_shaders(builder, *pipeline);
914   panvk_pipeline_builder_parse_viewport(builder, *pipeline);
915
916   return VK_SUCCESS;
917}
918
919static void
920panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder *builder,
921                                     struct panvk_device *dev,
922                                     struct panvk_pipeline_cache *cache,
923                                     const VkGraphicsPipelineCreateInfo *create_info,
924                                     const VkAllocationCallbacks *alloc)
925{
926   VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
927   assert(layout);
928   *builder = (struct panvk_pipeline_builder) {
929      .device = dev,
930      .cache = cache,
931      .layout = layout,
932      .create_info = create_info,
933      .alloc = alloc,
934   };
935
936   builder->rasterizer_discard =
937      create_info->pRasterizationState->rasterizerDiscardEnable;
938
939   if (builder->rasterizer_discard) {
940      builder->samples = VK_SAMPLE_COUNT_1_BIT;
941   } else {
942      builder->samples = create_info->pMultisampleState->rasterizationSamples;
943
944      const struct panvk_render_pass *pass = panvk_render_pass_from_handle(create_info->renderPass);
945      const struct panvk_subpass *subpass = &pass->subpasses[create_info->subpass];
946
947      builder->use_depth_stencil_attachment =
948         subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED;
949
950      assert(subpass->color_count <= create_info->pColorBlendState->attachmentCount);
951      builder->active_color_attachments = 0;
952      for (uint32_t i = 0; i < subpass->color_count; i++) {
953         uint32_t idx = subpass->color_attachments[i].idx;
954         if (idx == VK_ATTACHMENT_UNUSED)
955            continue;
956
957         builder->active_color_attachments |= 1 << i;
958         builder->color_attachment_formats[i] = pass->attachments[idx].format;
959      }
960   }
961}
962
963VkResult
964panvk_per_arch(CreateGraphicsPipelines)(VkDevice device,
965                                        VkPipelineCache pipelineCache,
966                                        uint32_t count,
967                                        const VkGraphicsPipelineCreateInfo *pCreateInfos,
968                                        const VkAllocationCallbacks *pAllocator,
969                                        VkPipeline *pPipelines)
970{
971   VK_FROM_HANDLE(panvk_device, dev, device);
972   VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
973
974   for (uint32_t i = 0; i < count; i++) {
975      struct panvk_pipeline_builder builder;
976      panvk_pipeline_builder_init_graphics(&builder, dev, cache,
977                                           &pCreateInfos[i], pAllocator);
978
979      struct panvk_pipeline *pipeline;
980      VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
981      panvk_pipeline_builder_finish(&builder);
982
983      if (result != VK_SUCCESS) {
984         for (uint32_t j = 0; j < i; j++) {
985            panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
986            pPipelines[j] = VK_NULL_HANDLE;
987         }
988
989         return result;
990      }
991
992      pPipelines[i] = panvk_pipeline_to_handle(pipeline);
993   }
994
995   return VK_SUCCESS;
996}
997