17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * Copyright (C) 2019-2020 Collabora, Ltd. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 227ec681f3Smrg * SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg * Authors (Collabora): 257ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include "compiler.h" 297ec681f3Smrg 307ec681f3Smrg/* Derivatives in Midgard are implemented on the texture pipe, rather than the 317ec681f3Smrg * ALU pipe as suggested by NIR. The rationale is that normal texture 327ec681f3Smrg * instructions require (implicit) derivatives to be calculated anyway, so it 337ec681f3Smrg * makes sense to reuse the derivative logic. Thus, in addition to the usual 347ec681f3Smrg * texturing ops that calculate derivatives, there are two explicit texture ops 357ec681f3Smrg * dFdx/dFdy that perform differencing across helper invocations in either 367ec681f3Smrg * horizontal or vertical directions. 377ec681f3Smrg * 387ec681f3Smrg * One major caveat is that derivatives can only be calculated on up to a vec2 397ec681f3Smrg * at a time. This restriction presumably is to save some silicon, as 99% of 407ec681f3Smrg * derivatives will be vec2 (autocalculating mip levels of 2D texture 417ec681f3Smrg * coordinates). Admittedly I'm not sure why 3D textures can have their levels 427ec681f3Smrg * calculated automatically, umm... Pressing on. 437ec681f3Smrg * 447ec681f3Smrg * This caveat is handled in two steps. During the first pass (code 457ec681f3Smrg * generation), we generate texture ops 1:1 to the incoming NIR derivatives. 467ec681f3Smrg * This works for float/vec2 but not for vec3/vec4. A later lowering pass will 477ec681f3Smrg * scan for vec3/vec4 derivatives and lower (split) to multiple instructions. 487ec681f3Smrg * This pass is separated as we'll have to rewrite th e destination into a 497ec681f3Smrg * register (rather than SSA) and we'd rather do this after we have the whole 507ec681f3Smrg * IR in front of us to do it at once. 517ec681f3Smrg */ 527ec681f3Smrg 537ec681f3Smrgstatic unsigned 547ec681f3Smrgmir_derivative_mode(nir_op op) 557ec681f3Smrg{ 567ec681f3Smrg switch (op) { 577ec681f3Smrg case nir_op_fddx: 587ec681f3Smrg case nir_op_fddx_fine: 597ec681f3Smrg case nir_op_fddx_coarse: 607ec681f3Smrg return TEXTURE_DFDX; 617ec681f3Smrg 627ec681f3Smrg case nir_op_fddy: 637ec681f3Smrg case nir_op_fddy_fine: 647ec681f3Smrg case nir_op_fddy_coarse: 657ec681f3Smrg return TEXTURE_DFDY; 667ec681f3Smrg 677ec681f3Smrg default: 687ec681f3Smrg unreachable("Invalid derivative op"); 697ec681f3Smrg } 707ec681f3Smrg} 717ec681f3Smrg 727ec681f3Smrg/* Returns true if a texturing op computes derivatives either explicitly or 737ec681f3Smrg * implicitly */ 747ec681f3Smrg 757ec681f3Smrgbool 767ec681f3Smrgmir_op_computes_derivatives(gl_shader_stage stage, unsigned op) 777ec681f3Smrg{ 787ec681f3Smrg /* Only fragment shaders may compute derivatives, but the sense of 797ec681f3Smrg * "normal" changes in vertex shaders on certain GPUs */ 807ec681f3Smrg 817ec681f3Smrg if (op == midgard_tex_op_normal && stage != MESA_SHADER_FRAGMENT) 827ec681f3Smrg return false; 837ec681f3Smrg 847ec681f3Smrg switch (op) { 857ec681f3Smrg case midgard_tex_op_normal: 867ec681f3Smrg case midgard_tex_op_derivative: 877ec681f3Smrg assert(stage == MESA_SHADER_FRAGMENT); 887ec681f3Smrg return true; 897ec681f3Smrg default: 907ec681f3Smrg return false; 917ec681f3Smrg } 927ec681f3Smrg} 937ec681f3Smrg 947ec681f3Smrgvoid 957ec681f3Smrgmidgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr) 967ec681f3Smrg{ 977ec681f3Smrg /* Create texture instructions */ 987ec681f3Smrg 997ec681f3Smrg unsigned nr_components = nir_dest_num_components(instr->dest.dest); 1007ec681f3Smrg 1017ec681f3Smrg midgard_instruction ins = { 1027ec681f3Smrg .type = TAG_TEXTURE_4, 1037ec681f3Smrg .mask = mask_of(nr_components), 1047ec681f3Smrg .dest = nir_dest_index(&instr->dest.dest), 1057ec681f3Smrg .dest_type = nir_type_float32, 1067ec681f3Smrg .src = { ~0, nir_src_index(ctx, &instr->src[0].src), ~0, ~0 }, 1077ec681f3Smrg .swizzle = SWIZZLE_IDENTITY_4, 1087ec681f3Smrg .src_types = { nir_type_float32, nir_type_float32 }, 1097ec681f3Smrg .op = midgard_tex_op_derivative, 1107ec681f3Smrg .texture = { 1117ec681f3Smrg .mode = mir_derivative_mode(instr->op), 1127ec681f3Smrg .format = 2, 1137ec681f3Smrg .in_reg_full = 1, 1147ec681f3Smrg .out_full = 1, 1157ec681f3Smrg .sampler_type = MALI_SAMPLER_FLOAT, 1167ec681f3Smrg } 1177ec681f3Smrg }; 1187ec681f3Smrg 1197ec681f3Smrg if (!instr->dest.dest.is_ssa) 1207ec681f3Smrg ins.mask &= instr->dest.write_mask; 1217ec681f3Smrg 1227ec681f3Smrg emit_mir_instruction(ctx, ins); 1237ec681f3Smrg} 1247ec681f3Smrg 1257ec681f3Smrgvoid 1267ec681f3Smrgmidgard_lower_derivatives(compiler_context *ctx, midgard_block *block) 1277ec681f3Smrg{ 1287ec681f3Smrg mir_foreach_instr_in_block_safe(block, ins) { 1297ec681f3Smrg if (ins->type != TAG_TEXTURE_4) continue; 1307ec681f3Smrg if (ins->op != midgard_tex_op_derivative) continue; 1317ec681f3Smrg 1327ec681f3Smrg /* Check if we need to split */ 1337ec681f3Smrg 1347ec681f3Smrg bool upper = ins->mask & 0b1100; 1357ec681f3Smrg bool lower = ins->mask & 0b0011; 1367ec681f3Smrg 1377ec681f3Smrg if (!(upper && lower)) continue; 1387ec681f3Smrg 1397ec681f3Smrg /* Duplicate for dedicated upper instruction */ 1407ec681f3Smrg 1417ec681f3Smrg midgard_instruction dup; 1427ec681f3Smrg memcpy(&dup, ins, sizeof(dup)); 1437ec681f3Smrg 1447ec681f3Smrg /* Fixup masks. Make original just lower and dupe just upper */ 1457ec681f3Smrg 1467ec681f3Smrg ins->mask &= 0b0011; 1477ec681f3Smrg dup.mask &= 0b1100; 1487ec681f3Smrg 1497ec681f3Smrg /* Fixup swizzles */ 1507ec681f3Smrg dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X; 1517ec681f3Smrg dup.swizzle[0][3] = COMPONENT_Y; 1527ec681f3Smrg 1537ec681f3Smrg dup.swizzle[1][0] = COMPONENT_Z; 1547ec681f3Smrg dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W; 1557ec681f3Smrg 1567ec681f3Smrg /* Insert the new instruction */ 1577ec681f3Smrg mir_insert_instruction_before(ctx, mir_next_op(ins), dup); 1587ec681f3Smrg 1597ec681f3Smrg /* We'll need both instructions to write to the same index, so 1607ec681f3Smrg * rewrite to use a register */ 1617ec681f3Smrg 1627ec681f3Smrg unsigned new = make_compiler_temp_reg(ctx); 1637ec681f3Smrg mir_rewrite_index(ctx, ins->dest, new); 1647ec681f3Smrg } 1657ec681f3Smrg} 166