17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd.
37ec681f3Smrg * Copyright (C) 2019-2020 Collabora, Ltd.
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
227ec681f3Smrg * SOFTWARE.
237ec681f3Smrg *
247ec681f3Smrg * Authors (Collabora):
257ec681f3Smrg *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
267ec681f3Smrg */
277ec681f3Smrg
287ec681f3Smrg#include "compiler.h"
297ec681f3Smrg
307ec681f3Smrg/* Derivatives in Midgard are implemented on the texture pipe, rather than the
317ec681f3Smrg * ALU pipe as suggested by NIR. The rationale is that normal texture
327ec681f3Smrg * instructions require (implicit) derivatives to be calculated anyway, so it
337ec681f3Smrg * makes sense to reuse the derivative logic. Thus, in addition to the usual
347ec681f3Smrg * texturing ops that calculate derivatives, there are two explicit texture ops
357ec681f3Smrg * dFdx/dFdy that perform differencing across helper invocations in either
367ec681f3Smrg * horizontal or vertical directions.
377ec681f3Smrg *
387ec681f3Smrg * One major caveat is that derivatives can only be calculated on up to a vec2
397ec681f3Smrg * at a time. This restriction presumably is to save some silicon, as 99% of
407ec681f3Smrg * derivatives will be vec2 (autocalculating mip levels of 2D texture
417ec681f3Smrg * coordinates). Admittedly I'm not sure why 3D textures can have their levels
427ec681f3Smrg * calculated automatically, umm... Pressing on.
437ec681f3Smrg *
447ec681f3Smrg * This caveat is handled in two steps. During the first pass (code
457ec681f3Smrg * generation), we generate texture ops 1:1 to the incoming NIR derivatives.
467ec681f3Smrg * This works for float/vec2 but not for vec3/vec4. A later lowering pass will
477ec681f3Smrg * scan for vec3/vec4 derivatives and lower (split) to multiple instructions.
487ec681f3Smrg * This pass is separated as we'll have to rewrite th e destination into a
497ec681f3Smrg * register (rather than SSA) and we'd rather do this after we have the whole
507ec681f3Smrg * IR in front of us to do it at once.
517ec681f3Smrg */
527ec681f3Smrg
537ec681f3Smrgstatic unsigned
547ec681f3Smrgmir_derivative_mode(nir_op op)
557ec681f3Smrg{
567ec681f3Smrg        switch (op) {
577ec681f3Smrg        case nir_op_fddx:
587ec681f3Smrg        case nir_op_fddx_fine:
597ec681f3Smrg        case nir_op_fddx_coarse:
607ec681f3Smrg                return TEXTURE_DFDX;
617ec681f3Smrg
627ec681f3Smrg        case nir_op_fddy:
637ec681f3Smrg        case nir_op_fddy_fine:
647ec681f3Smrg        case nir_op_fddy_coarse:
657ec681f3Smrg                return TEXTURE_DFDY;
667ec681f3Smrg
677ec681f3Smrg        default:
687ec681f3Smrg                unreachable("Invalid derivative op");
697ec681f3Smrg        }
707ec681f3Smrg}
717ec681f3Smrg
727ec681f3Smrg/* Returns true if a texturing op computes derivatives either explicitly or
737ec681f3Smrg * implicitly */
747ec681f3Smrg
757ec681f3Smrgbool
767ec681f3Smrgmir_op_computes_derivatives(gl_shader_stage stage, unsigned op)
777ec681f3Smrg{
787ec681f3Smrg        /* Only fragment shaders may compute derivatives, but the sense of
797ec681f3Smrg         * "normal" changes in vertex shaders on certain GPUs */
807ec681f3Smrg
817ec681f3Smrg        if (op == midgard_tex_op_normal && stage != MESA_SHADER_FRAGMENT)
827ec681f3Smrg                return false;
837ec681f3Smrg
847ec681f3Smrg        switch (op) {
857ec681f3Smrg        case midgard_tex_op_normal:
867ec681f3Smrg        case midgard_tex_op_derivative:
877ec681f3Smrg                assert(stage == MESA_SHADER_FRAGMENT);
887ec681f3Smrg                return true;
897ec681f3Smrg        default:
907ec681f3Smrg                return false;
917ec681f3Smrg        }
927ec681f3Smrg}
937ec681f3Smrg
947ec681f3Smrgvoid
957ec681f3Smrgmidgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr)
967ec681f3Smrg{
977ec681f3Smrg        /* Create texture instructions */
987ec681f3Smrg
997ec681f3Smrg        unsigned nr_components = nir_dest_num_components(instr->dest.dest);
1007ec681f3Smrg
1017ec681f3Smrg        midgard_instruction ins = {
1027ec681f3Smrg                .type = TAG_TEXTURE_4,
1037ec681f3Smrg                .mask = mask_of(nr_components),
1047ec681f3Smrg                .dest = nir_dest_index(&instr->dest.dest),
1057ec681f3Smrg                .dest_type = nir_type_float32,
1067ec681f3Smrg                .src = { ~0, nir_src_index(ctx, &instr->src[0].src), ~0, ~0 },
1077ec681f3Smrg                .swizzle = SWIZZLE_IDENTITY_4,
1087ec681f3Smrg                .src_types = { nir_type_float32, nir_type_float32 },
1097ec681f3Smrg                .op = midgard_tex_op_derivative,
1107ec681f3Smrg                .texture = {
1117ec681f3Smrg                        .mode = mir_derivative_mode(instr->op),
1127ec681f3Smrg                        .format = 2,
1137ec681f3Smrg                        .in_reg_full = 1,
1147ec681f3Smrg                        .out_full = 1,
1157ec681f3Smrg                        .sampler_type = MALI_SAMPLER_FLOAT,
1167ec681f3Smrg                }
1177ec681f3Smrg        };
1187ec681f3Smrg
1197ec681f3Smrg        if (!instr->dest.dest.is_ssa)
1207ec681f3Smrg                ins.mask &= instr->dest.write_mask;
1217ec681f3Smrg
1227ec681f3Smrg        emit_mir_instruction(ctx, ins);
1237ec681f3Smrg}
1247ec681f3Smrg
1257ec681f3Smrgvoid
1267ec681f3Smrgmidgard_lower_derivatives(compiler_context *ctx, midgard_block *block)
1277ec681f3Smrg{
1287ec681f3Smrg        mir_foreach_instr_in_block_safe(block, ins) {
1297ec681f3Smrg                if (ins->type != TAG_TEXTURE_4) continue;
1307ec681f3Smrg                if (ins->op != midgard_tex_op_derivative) continue;
1317ec681f3Smrg
1327ec681f3Smrg                /* Check if we need to split */
1337ec681f3Smrg
1347ec681f3Smrg                bool upper = ins->mask & 0b1100;
1357ec681f3Smrg                bool lower = ins->mask & 0b0011;
1367ec681f3Smrg
1377ec681f3Smrg                if (!(upper && lower)) continue;
1387ec681f3Smrg
1397ec681f3Smrg                /* Duplicate for dedicated upper instruction */
1407ec681f3Smrg
1417ec681f3Smrg                midgard_instruction dup;
1427ec681f3Smrg                memcpy(&dup, ins, sizeof(dup));
1437ec681f3Smrg
1447ec681f3Smrg                /* Fixup masks. Make original just lower and dupe just upper */
1457ec681f3Smrg
1467ec681f3Smrg                ins->mask &= 0b0011;
1477ec681f3Smrg                dup.mask &= 0b1100;
1487ec681f3Smrg
1497ec681f3Smrg                /* Fixup swizzles */
1507ec681f3Smrg                dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X;
1517ec681f3Smrg                dup.swizzle[0][3] = COMPONENT_Y;
1527ec681f3Smrg
1537ec681f3Smrg                dup.swizzle[1][0] = COMPONENT_Z;
1547ec681f3Smrg                dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W;
1557ec681f3Smrg
1567ec681f3Smrg                /* Insert the new instruction */
1577ec681f3Smrg                mir_insert_instruction_before(ctx, mir_next_op(ins), dup);
1587ec681f3Smrg
1597ec681f3Smrg                /* We'll need both instructions to write to the same index, so
1607ec681f3Smrg                 * rewrite to use a register */
1617ec681f3Smrg
1627ec681f3Smrg                unsigned new = make_compiler_temp_reg(ctx);
1637ec681f3Smrg                mir_rewrite_index(ctx, ins->dest, new);
1647ec681f3Smrg        }
1657ec681f3Smrg}
166