1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * Copyright (C) 2019-2020 Collabora, Ltd. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors (Collabora): 25 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 26 */ 27 28#include "compiler.h" 29 30/* Derivatives in Midgard are implemented on the texture pipe, rather than the 31 * ALU pipe as suggested by NIR. The rationale is that normal texture 32 * instructions require (implicit) derivatives to be calculated anyway, so it 33 * makes sense to reuse the derivative logic. Thus, in addition to the usual 34 * texturing ops that calculate derivatives, there are two explicit texture ops 35 * dFdx/dFdy that perform differencing across helper invocations in either 36 * horizontal or vertical directions. 37 * 38 * One major caveat is that derivatives can only be calculated on up to a vec2 39 * at a time. This restriction presumably is to save some silicon, as 99% of 40 * derivatives will be vec2 (autocalculating mip levels of 2D texture 41 * coordinates). Admittedly I'm not sure why 3D textures can have their levels 42 * calculated automatically, umm... Pressing on. 43 * 44 * This caveat is handled in two steps. During the first pass (code 45 * generation), we generate texture ops 1:1 to the incoming NIR derivatives. 46 * This works for float/vec2 but not for vec3/vec4. A later lowering pass will 47 * scan for vec3/vec4 derivatives and lower (split) to multiple instructions. 48 * This pass is separated as we'll have to rewrite th e destination into a 49 * register (rather than SSA) and we'd rather do this after we have the whole 50 * IR in front of us to do it at once. 51 */ 52 53static unsigned 54mir_derivative_mode(nir_op op) 55{ 56 switch (op) { 57 case nir_op_fddx: 58 case nir_op_fddx_fine: 59 case nir_op_fddx_coarse: 60 return TEXTURE_DFDX; 61 62 case nir_op_fddy: 63 case nir_op_fddy_fine: 64 case nir_op_fddy_coarse: 65 return TEXTURE_DFDY; 66 67 default: 68 unreachable("Invalid derivative op"); 69 } 70} 71 72/* Returns true if a texturing op computes derivatives either explicitly or 73 * implicitly */ 74 75bool 76mir_op_computes_derivatives(gl_shader_stage stage, unsigned op) 77{ 78 /* Only fragment shaders may compute derivatives, but the sense of 79 * "normal" changes in vertex shaders on certain GPUs */ 80 81 if (op == midgard_tex_op_normal && stage != MESA_SHADER_FRAGMENT) 82 return false; 83 84 switch (op) { 85 case midgard_tex_op_normal: 86 case midgard_tex_op_derivative: 87 assert(stage == MESA_SHADER_FRAGMENT); 88 return true; 89 default: 90 return false; 91 } 92} 93 94void 95midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr) 96{ 97 /* Create texture instructions */ 98 99 unsigned nr_components = nir_dest_num_components(instr->dest.dest); 100 101 midgard_instruction ins = { 102 .type = TAG_TEXTURE_4, 103 .mask = mask_of(nr_components), 104 .dest = nir_dest_index(&instr->dest.dest), 105 .dest_type = nir_type_float32, 106 .src = { ~0, nir_src_index(ctx, &instr->src[0].src), ~0, ~0 }, 107 .swizzle = SWIZZLE_IDENTITY_4, 108 .src_types = { nir_type_float32, nir_type_float32 }, 109 .op = midgard_tex_op_derivative, 110 .texture = { 111 .mode = mir_derivative_mode(instr->op), 112 .format = 2, 113 .in_reg_full = 1, 114 .out_full = 1, 115 .sampler_type = MALI_SAMPLER_FLOAT, 116 } 117 }; 118 119 if (!instr->dest.dest.is_ssa) 120 ins.mask &= instr->dest.write_mask; 121 122 emit_mir_instruction(ctx, ins); 123} 124 125void 126midgard_lower_derivatives(compiler_context *ctx, midgard_block *block) 127{ 128 mir_foreach_instr_in_block_safe(block, ins) { 129 if (ins->type != TAG_TEXTURE_4) continue; 130 if (ins->op != midgard_tex_op_derivative) continue; 131 132 /* Check if we need to split */ 133 134 bool upper = ins->mask & 0b1100; 135 bool lower = ins->mask & 0b0011; 136 137 if (!(upper && lower)) continue; 138 139 /* Duplicate for dedicated upper instruction */ 140 141 midgard_instruction dup; 142 memcpy(&dup, ins, sizeof(dup)); 143 144 /* Fixup masks. Make original just lower and dupe just upper */ 145 146 ins->mask &= 0b0011; 147 dup.mask &= 0b1100; 148 149 /* Fixup swizzles */ 150 dup.swizzle[0][0] = dup.swizzle[0][1] = dup.swizzle[0][2] = COMPONENT_X; 151 dup.swizzle[0][3] = COMPONENT_Y; 152 153 dup.swizzle[1][0] = COMPONENT_Z; 154 dup.swizzle[1][1] = dup.swizzle[1][2] = dup.swizzle[1][3] = COMPONENT_W; 155 156 /* Insert the new instruction */ 157 mir_insert_instruction_before(ctx, mir_next_op(ins), dup); 158 159 /* We'll need both instructions to write to the same index, so 160 * rewrite to use a register */ 161 162 unsigned new = make_compiler_temp_reg(ctx); 163 mir_rewrite_index(ctx, ins->dest, new); 164 } 165} 166