17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2021 Collabora, Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#ifndef __PAN_SHADER_H__
267ec681f3Smrg#define __PAN_SHADER_H__
277ec681f3Smrg
287ec681f3Smrg#include "compiler/nir/nir.h"
297ec681f3Smrg#include "panfrost/util/pan_ir.h"
307ec681f3Smrg
317ec681f3Smrg#include "pan_device.h"
327ec681f3Smrg#include "genxml/gen_macros.h"
337ec681f3Smrg
347ec681f3Smrgstruct panfrost_device;
357ec681f3Smrg
367ec681f3Smrg#ifdef PAN_ARCH
377ec681f3Smrgconst nir_shader_compiler_options *
387ec681f3SmrgGENX(pan_shader_get_compiler_options)(void);
397ec681f3Smrg
407ec681f3Smrgvoid
417ec681f3SmrgGENX(pan_shader_compile)(nir_shader *nir,
427ec681f3Smrg                         struct panfrost_compile_inputs *inputs,
437ec681f3Smrg                         struct util_dynarray *binary,
447ec681f3Smrg                         struct pan_shader_info *info);
457ec681f3Smrg
467ec681f3Smrg#if PAN_ARCH <= 5
477ec681f3Smrgstatic inline void
487ec681f3Smrgpan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
497ec681f3Smrg                               struct MALI_RENDERER_STATE *rsd)
507ec681f3Smrg{
517ec681f3Smrg        assert((info->push.count & 3) == 0);
527ec681f3Smrg
537ec681f3Smrg        rsd->properties.uniform_count = info->push.count / 4;
547ec681f3Smrg        rsd->properties.shader_has_side_effects = info->writes_global;
557ec681f3Smrg        rsd->properties.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
567ec681f3Smrg
577ec681f3Smrg        /* For fragment shaders, work register count, early-z, reads at draw-time */
587ec681f3Smrg
597ec681f3Smrg        if (info->stage != MESA_SHADER_FRAGMENT) {
607ec681f3Smrg                rsd->properties.work_register_count = info->work_reg_count;
617ec681f3Smrg        } else {
627ec681f3Smrg                rsd->properties.shader_reads_tilebuffer =
637ec681f3Smrg                        info->fs.outputs_read;
647ec681f3Smrg
657ec681f3Smrg                /* However, forcing early-z in the shader overrides draw-time */
667ec681f3Smrg                rsd->properties.force_early_z =
677ec681f3Smrg                        info->fs.early_fragment_tests;
687ec681f3Smrg        }
697ec681f3Smrg}
707ec681f3Smrg
717ec681f3Smrg#else
727ec681f3Smrg
737ec681f3Smrg/* Classify a shader into the following pixel kill categories:
747ec681f3Smrg *
757ec681f3Smrg * (force early, strong early): no side effects/depth/stencil/coverage writes (force)
767ec681f3Smrg * (weak early, weak early): no side effects/depth/stencil/coverage writes
777ec681f3Smrg * (weak early, force late): no side effects/depth/stencil writes
787ec681f3Smrg * (force late, weak early): side effects but no depth/stencil/coverage writes
797ec681f3Smrg * (force late, force early): only run for side effects
807ec681f3Smrg * (force late, force late): depth/stencil writes
817ec681f3Smrg *
827ec681f3Smrg * Note that discard is considered a coverage write. TODO: what about
837ec681f3Smrg * alpha-to-coverage?
847ec681f3Smrg * */
857ec681f3Smrg
867ec681f3Smrg#define SET_PIXEL_KILL(kill, update) do { \
877ec681f3Smrg        rsd->properties.pixel_kill_operation = MALI_PIXEL_KILL_## kill; \
887ec681f3Smrg        rsd->properties.zs_update_operation = MALI_PIXEL_KILL_## update; \
897ec681f3Smrg} while(0)
907ec681f3Smrg
917ec681f3Smrgstatic inline void
927ec681f3Smrgpan_shader_classify_pixel_kill_coverage(const struct pan_shader_info *info,
937ec681f3Smrg                struct MALI_RENDERER_STATE *rsd)
947ec681f3Smrg{
957ec681f3Smrg        bool force_early = info->fs.early_fragment_tests;
967ec681f3Smrg        bool sidefx = info->writes_global;
977ec681f3Smrg        bool coverage = info->fs.writes_coverage || info->fs.can_discard;
987ec681f3Smrg        bool depth = info->fs.writes_depth;
997ec681f3Smrg        bool stencil = info->fs.writes_stencil;
1007ec681f3Smrg
1017ec681f3Smrg        rsd->properties.shader_modifies_coverage = coverage;
1027ec681f3Smrg
1037ec681f3Smrg        if (force_early)
1047ec681f3Smrg                SET_PIXEL_KILL(FORCE_EARLY, STRONG_EARLY);
1057ec681f3Smrg        else if (depth || stencil || (sidefx && coverage))
1067ec681f3Smrg                SET_PIXEL_KILL(FORCE_LATE, FORCE_LATE);
1077ec681f3Smrg        else if (sidefx)
1087ec681f3Smrg                SET_PIXEL_KILL(FORCE_LATE, WEAK_EARLY);
1097ec681f3Smrg        else if (coverage)
1107ec681f3Smrg                SET_PIXEL_KILL(WEAK_EARLY, FORCE_LATE);
1117ec681f3Smrg        else
1127ec681f3Smrg                SET_PIXEL_KILL(WEAK_EARLY, WEAK_EARLY);
1137ec681f3Smrg}
1147ec681f3Smrg
1157ec681f3Smrg#undef SET_PIXEL_KILL
1167ec681f3Smrg
1177ec681f3Smrgstatic inline void
1187ec681f3Smrgpan_shader_prepare_bifrost_rsd(const struct pan_shader_info *info,
1197ec681f3Smrg                               struct MALI_RENDERER_STATE *rsd)
1207ec681f3Smrg{
1217ec681f3Smrg        unsigned fau_count = DIV_ROUND_UP(info->push.count, 2);
1227ec681f3Smrg        rsd->preload.uniform_count = fau_count;
1237ec681f3Smrg
1247ec681f3Smrg#if PAN_ARCH >= 7
1257ec681f3Smrg        rsd->properties.shader_register_allocation =
1267ec681f3Smrg                (info->work_reg_count <= 32) ?
1277ec681f3Smrg                MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD :
1287ec681f3Smrg                MALI_SHADER_REGISTER_ALLOCATION_64_PER_THREAD;
1297ec681f3Smrg#endif
1307ec681f3Smrg
1317ec681f3Smrg        switch (info->stage) {
1327ec681f3Smrg        case MESA_SHADER_VERTEX:
1337ec681f3Smrg                rsd->preload.vertex.vertex_id = true;
1347ec681f3Smrg                rsd->preload.vertex.instance_id = true;
1357ec681f3Smrg                break;
1367ec681f3Smrg
1377ec681f3Smrg        case MESA_SHADER_FRAGMENT:
1387ec681f3Smrg                pan_shader_classify_pixel_kill_coverage(info, rsd);
1397ec681f3Smrg
1407ec681f3Smrg#if PAN_ARCH >= 7
1417ec681f3Smrg                rsd->properties.shader_wait_dependency_6 = info->bifrost.wait_6;
1427ec681f3Smrg                rsd->properties.shader_wait_dependency_7 = info->bifrost.wait_7;
1437ec681f3Smrg#endif
1447ec681f3Smrg
1457ec681f3Smrg                /* Match the mesa/st convention. If this needs to be flipped,
1467ec681f3Smrg                 * nir_lower_pntc_ytransform will do so. */
1477ec681f3Smrg                rsd->properties.point_sprite_coord_origin_max_y = true;
1487ec681f3Smrg
1497ec681f3Smrg                rsd->properties.allow_forward_pixel_to_be_killed =
1507ec681f3Smrg                        !info->fs.sidefx;
1517ec681f3Smrg
1527ec681f3Smrg                rsd->preload.fragment.fragment_position = info->fs.reads_frag_coord;
1537ec681f3Smrg                rsd->preload.fragment.coverage = true;
1547ec681f3Smrg                rsd->preload.fragment.primitive_flags = info->fs.reads_face;
1557ec681f3Smrg
1567ec681f3Smrg                /* Contains sample ID and sample mask. Sample position and
1577ec681f3Smrg                 * helper invocation are expressed in terms of the above, so
1587ec681f3Smrg                 * preload for those too */
1597ec681f3Smrg                rsd->preload.fragment.sample_mask_id =
1607ec681f3Smrg                        info->fs.reads_sample_id |
1617ec681f3Smrg                        info->fs.reads_sample_pos |
1627ec681f3Smrg                        info->fs.reads_sample_mask_in |
1637ec681f3Smrg                        info->fs.reads_helper_invocation |
1647ec681f3Smrg                        info->fs.sample_shading;
1657ec681f3Smrg
1667ec681f3Smrg#if PAN_ARCH >= 7
1677ec681f3Smrg                rsd->message_preload_1 = info->bifrost.messages[0];
1687ec681f3Smrg                rsd->message_preload_2 = info->bifrost.messages[1];
1697ec681f3Smrg#endif
1707ec681f3Smrg                break;
1717ec681f3Smrg
1727ec681f3Smrg        case MESA_SHADER_COMPUTE:
1737ec681f3Smrg                rsd->preload.compute.local_invocation_xy = true;
1747ec681f3Smrg                rsd->preload.compute.local_invocation_z = true;
1757ec681f3Smrg                rsd->preload.compute.work_group_x = true;
1767ec681f3Smrg                rsd->preload.compute.work_group_y = true;
1777ec681f3Smrg                rsd->preload.compute.work_group_z = true;
1787ec681f3Smrg                rsd->preload.compute.global_invocation_x = true;
1797ec681f3Smrg                rsd->preload.compute.global_invocation_y = true;
1807ec681f3Smrg                rsd->preload.compute.global_invocation_z = true;
1817ec681f3Smrg                break;
1827ec681f3Smrg
1837ec681f3Smrg        default:
1847ec681f3Smrg                unreachable("TODO");
1857ec681f3Smrg        }
1867ec681f3Smrg}
1877ec681f3Smrg
1887ec681f3Smrg#endif
1897ec681f3Smrg
1907ec681f3Smrgstatic inline void
1917ec681f3Smrgpan_shader_prepare_rsd(const struct pan_shader_info *shader_info,
1927ec681f3Smrg                       mali_ptr shader_ptr,
1937ec681f3Smrg                       struct MALI_RENDERER_STATE *rsd)
1947ec681f3Smrg{
1957ec681f3Smrg#if PAN_ARCH <= 5
1967ec681f3Smrg        shader_ptr |= shader_info->midgard.first_tag;
1977ec681f3Smrg#endif
1987ec681f3Smrg
1997ec681f3Smrg        rsd->shader.shader = shader_ptr;
2007ec681f3Smrg        rsd->shader.attribute_count = shader_info->attribute_count;
2017ec681f3Smrg        rsd->shader.varying_count = shader_info->varyings.input_count +
2027ec681f3Smrg                                   shader_info->varyings.output_count;
2037ec681f3Smrg        rsd->shader.texture_count = shader_info->texture_count;
2047ec681f3Smrg        rsd->shader.sampler_count = shader_info->sampler_count;
2057ec681f3Smrg        rsd->properties.shader_contains_barrier = shader_info->contains_barrier;
2067ec681f3Smrg        rsd->properties.uniform_buffer_count = shader_info->ubo_count;
2077ec681f3Smrg
2087ec681f3Smrg        if (shader_info->stage == MESA_SHADER_FRAGMENT) {
2097ec681f3Smrg                rsd->properties.shader_contains_barrier |=
2107ec681f3Smrg                        shader_info->fs.helper_invocations;
2117ec681f3Smrg                rsd->properties.stencil_from_shader =
2127ec681f3Smrg                        shader_info->fs.writes_stencil;
2137ec681f3Smrg                rsd->properties.depth_source =
2147ec681f3Smrg                        shader_info->fs.writes_depth ?
2157ec681f3Smrg                        MALI_DEPTH_SOURCE_SHADER :
2167ec681f3Smrg                        MALI_DEPTH_SOURCE_FIXED_FUNCTION;
2177ec681f3Smrg
2187ec681f3Smrg                /* This also needs to be set if the API forces per-sample
2197ec681f3Smrg                 * shading, but that'll just got ORed in */
2207ec681f3Smrg                rsd->multisample_misc.evaluate_per_sample =
2217ec681f3Smrg                        shader_info->fs.sample_shading;
2227ec681f3Smrg        }
2237ec681f3Smrg
2247ec681f3Smrg#if PAN_ARCH >= 6
2257ec681f3Smrg        pan_shader_prepare_bifrost_rsd(shader_info, rsd);
2267ec681f3Smrg#else
2277ec681f3Smrg        pan_shader_prepare_midgard_rsd(shader_info, rsd);
2287ec681f3Smrg#endif
2297ec681f3Smrg}
2307ec681f3Smrg#endif /* PAN_ARCH */
2317ec681f3Smrg
2327ec681f3Smrg#endif
233