1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24/* Midgard has some accelerated support for perspective projection on the 25 * load/store pipes. So the first perspective projection pass looks for 26 * lowered/open-coded perspective projection of the form "fmul (A.xyz, 27 * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native 28 * perspective division opcode (on the load/store pipe). Caveats apply: the 29 * frcp should be used only once to make this optimization worthwhile. And the 30 * source of the frcp ought to be a varying to make it worthwhile... 31 * 32 * The second pass in this file is a step #2 of sorts: fusing that load/store 33 * projection into a varying load instruction (they can be done together 34 * implicitly). This depends on the combination pass. Again caveat: the vary 35 * should only be used once to make this worthwhile. 36 */ 37 38#include "compiler.h" 39 40static bool 41is_swizzle_0(unsigned *swizzle) 42{ 43 for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) 44 if (swizzle[c]) 45 return false; 46 47 return true; 48} 49 50bool 51midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) 52{ 53 bool progress = false; 54 55 mir_foreach_instr_in_block_safe(block, ins) { 56 /* First search for fmul */ 57 if (ins->type != TAG_ALU_4) continue; 58 if (ins->op != midgard_alu_op_fmul) continue; 59 60 /* TODO: Flip */ 61 62 /* Check the swizzles */ 63 64 if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue; 65 if (!is_swizzle_0(ins->swizzle[1])) continue; 66 67 /* Awesome, we're the right form. Now check where src2 is from */ 68 unsigned frcp = ins->src[1]; 69 unsigned to = ins->dest; 70 71 if (frcp & PAN_IS_REG) continue; 72 if (to & PAN_IS_REG) continue; 73 74 bool frcp_found = false; 75 unsigned frcp_component = 0; 76 unsigned frcp_from = 0; 77 78 mir_foreach_instr_in_block_safe(block, sub) { 79 if (sub->dest != frcp) continue; 80 81 frcp_component = sub->swizzle[0][0]; 82 frcp_from = sub->src[0]; 83 84 frcp_found = 85 (sub->type == TAG_ALU_4) && 86 (sub->op == midgard_alu_op_frcp); 87 break; 88 } 89 90 if (!frcp_found) continue; 91 if (frcp_from != ins->src[0]) continue; 92 if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue; 93 if (!mir_single_use(ctx, frcp)) continue; 94 95 /* Heuristic: check if the frcp is from a single-use varying */ 96 97 bool ok = false; 98 99 /* One for frcp and one for fmul */ 100 if (mir_use_count(ctx, frcp_from) > 2) continue; 101 102 mir_foreach_instr_in_block_safe(block, v) { 103 if (v->dest != frcp_from) continue; 104 if (v->type != TAG_LOAD_STORE_4) break; 105 if (!OP_IS_LOAD_VARY_F(v->op)) break; 106 107 ok = true; 108 break; 109 } 110 111 if (!ok) 112 continue; 113 114 /* Nice, we got the form spot on. Let's convert! */ 115 116 midgard_instruction accel = { 117 .type = TAG_LOAD_STORE_4, 118 .mask = ins->mask, 119 .dest = to, 120 .dest_type = nir_type_float32, 121 .src = { frcp_from, ~0, ~0, ~0 }, 122 .src_types = { nir_type_float32 }, 123 .swizzle = SWIZZLE_IDENTITY_4, 124 .op = frcp_component == COMPONENT_W ? 125 midgard_op_ldst_perspective_div_w : 126 midgard_op_ldst_perspective_div_z, 127 .load_store = { 128 .bitsize_toggle = true, 129 } 130 }; 131 132 mir_insert_instruction_before(ctx, ins, accel); 133 mir_remove_instruction(ins); 134 135 progress |= true; 136 } 137 138 return progress; 139} 140 141bool 142midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) 143{ 144 bool progress = false; 145 146 mir_foreach_instr_in_block_safe(block, ins) { 147 /* Search for a projection */ 148 if (ins->type != TAG_LOAD_STORE_4) continue; 149 if (!OP_IS_PROJECTION(ins->op)) continue; 150 151 unsigned vary = ins->src[0]; 152 unsigned to = ins->dest; 153 154 if (vary & PAN_IS_REG) continue; 155 if (to & PAN_IS_REG) continue; 156 if (!mir_single_use(ctx, vary)) continue; 157 158 /* Check for a varying source. If we find it, we rewrite */ 159 160 bool rewritten = false; 161 162 mir_foreach_instr_in_block_safe(block, v) { 163 if (v->dest != vary) continue; 164 if (v->type != TAG_LOAD_STORE_4) break; 165 if (!OP_IS_LOAD_VARY_F(v->op)) break; 166 167 /* We found it, so rewrite it to project. Grab the 168 * modifier */ 169 170 midgard_varying_params p = 171 midgard_unpack_varying_params(v->load_store); 172 173 if (p.modifier != midgard_varying_mod_none) 174 break; 175 176 bool projects_w = 177 ins->op == midgard_op_ldst_perspective_div_w; 178 179 p.modifier = projects_w ? 180 midgard_varying_mod_perspective_w : 181 midgard_varying_mod_perspective_z; 182 183 midgard_pack_varying_params(&v->load_store, p); 184 185 /* Use the new destination */ 186 v->dest = to; 187 188 rewritten = true; 189 break; 190 } 191 192 if (rewritten) 193 mir_remove_instruction(ins); 194 195 progress |= rewritten; 196 } 197 198 return progress; 199} 200