17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg/* Midgard has some accelerated support for perspective projection on the 257ec681f3Smrg * load/store pipes. So the first perspective projection pass looks for 267ec681f3Smrg * lowered/open-coded perspective projection of the form "fmul (A.xyz, 277ec681f3Smrg * frcp(A.w))" or "fmul (A.xy, frcp(A.z))" and rewrite with a native 287ec681f3Smrg * perspective division opcode (on the load/store pipe). Caveats apply: the 297ec681f3Smrg * frcp should be used only once to make this optimization worthwhile. And the 307ec681f3Smrg * source of the frcp ought to be a varying to make it worthwhile... 317ec681f3Smrg * 327ec681f3Smrg * The second pass in this file is a step #2 of sorts: fusing that load/store 337ec681f3Smrg * projection into a varying load instruction (they can be done together 347ec681f3Smrg * implicitly). This depends on the combination pass. Again caveat: the vary 357ec681f3Smrg * should only be used once to make this worthwhile. 367ec681f3Smrg */ 377ec681f3Smrg 387ec681f3Smrg#include "compiler.h" 397ec681f3Smrg 407ec681f3Smrgstatic bool 417ec681f3Smrgis_swizzle_0(unsigned *swizzle) 427ec681f3Smrg{ 437ec681f3Smrg for (unsigned c = 0; c < MIR_VEC_COMPONENTS; ++c) 447ec681f3Smrg if (swizzle[c]) 457ec681f3Smrg return false; 467ec681f3Smrg 477ec681f3Smrg return true; 487ec681f3Smrg} 497ec681f3Smrg 507ec681f3Smrgbool 517ec681f3Smrgmidgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) 527ec681f3Smrg{ 537ec681f3Smrg bool progress = false; 547ec681f3Smrg 557ec681f3Smrg mir_foreach_instr_in_block_safe(block, ins) { 567ec681f3Smrg /* First search for fmul */ 577ec681f3Smrg if (ins->type != TAG_ALU_4) continue; 587ec681f3Smrg if (ins->op != midgard_alu_op_fmul) continue; 597ec681f3Smrg 607ec681f3Smrg /* TODO: Flip */ 617ec681f3Smrg 627ec681f3Smrg /* Check the swizzles */ 637ec681f3Smrg 647ec681f3Smrg if (!mir_is_simple_swizzle(ins->swizzle[0], ins->mask)) continue; 657ec681f3Smrg if (!is_swizzle_0(ins->swizzle[1])) continue; 667ec681f3Smrg 677ec681f3Smrg /* Awesome, we're the right form. Now check where src2 is from */ 687ec681f3Smrg unsigned frcp = ins->src[1]; 697ec681f3Smrg unsigned to = ins->dest; 707ec681f3Smrg 717ec681f3Smrg if (frcp & PAN_IS_REG) continue; 727ec681f3Smrg if (to & PAN_IS_REG) continue; 737ec681f3Smrg 747ec681f3Smrg bool frcp_found = false; 757ec681f3Smrg unsigned frcp_component = 0; 767ec681f3Smrg unsigned frcp_from = 0; 777ec681f3Smrg 787ec681f3Smrg mir_foreach_instr_in_block_safe(block, sub) { 797ec681f3Smrg if (sub->dest != frcp) continue; 807ec681f3Smrg 817ec681f3Smrg frcp_component = sub->swizzle[0][0]; 827ec681f3Smrg frcp_from = sub->src[0]; 837ec681f3Smrg 847ec681f3Smrg frcp_found = 857ec681f3Smrg (sub->type == TAG_ALU_4) && 867ec681f3Smrg (sub->op == midgard_alu_op_frcp); 877ec681f3Smrg break; 887ec681f3Smrg } 897ec681f3Smrg 907ec681f3Smrg if (!frcp_found) continue; 917ec681f3Smrg if (frcp_from != ins->src[0]) continue; 927ec681f3Smrg if (frcp_component != COMPONENT_W && frcp_component != COMPONENT_Z) continue; 937ec681f3Smrg if (!mir_single_use(ctx, frcp)) continue; 947ec681f3Smrg 957ec681f3Smrg /* Heuristic: check if the frcp is from a single-use varying */ 967ec681f3Smrg 977ec681f3Smrg bool ok = false; 987ec681f3Smrg 997ec681f3Smrg /* One for frcp and one for fmul */ 1007ec681f3Smrg if (mir_use_count(ctx, frcp_from) > 2) continue; 1017ec681f3Smrg 1027ec681f3Smrg mir_foreach_instr_in_block_safe(block, v) { 1037ec681f3Smrg if (v->dest != frcp_from) continue; 1047ec681f3Smrg if (v->type != TAG_LOAD_STORE_4) break; 1057ec681f3Smrg if (!OP_IS_LOAD_VARY_F(v->op)) break; 1067ec681f3Smrg 1077ec681f3Smrg ok = true; 1087ec681f3Smrg break; 1097ec681f3Smrg } 1107ec681f3Smrg 1117ec681f3Smrg if (!ok) 1127ec681f3Smrg continue; 1137ec681f3Smrg 1147ec681f3Smrg /* Nice, we got the form spot on. Let's convert! */ 1157ec681f3Smrg 1167ec681f3Smrg midgard_instruction accel = { 1177ec681f3Smrg .type = TAG_LOAD_STORE_4, 1187ec681f3Smrg .mask = ins->mask, 1197ec681f3Smrg .dest = to, 1207ec681f3Smrg .dest_type = nir_type_float32, 1217ec681f3Smrg .src = { frcp_from, ~0, ~0, ~0 }, 1227ec681f3Smrg .src_types = { nir_type_float32 }, 1237ec681f3Smrg .swizzle = SWIZZLE_IDENTITY_4, 1247ec681f3Smrg .op = frcp_component == COMPONENT_W ? 1257ec681f3Smrg midgard_op_ldst_perspective_div_w : 1267ec681f3Smrg midgard_op_ldst_perspective_div_z, 1277ec681f3Smrg .load_store = { 1287ec681f3Smrg .bitsize_toggle = true, 1297ec681f3Smrg } 1307ec681f3Smrg }; 1317ec681f3Smrg 1327ec681f3Smrg mir_insert_instruction_before(ctx, ins, accel); 1337ec681f3Smrg mir_remove_instruction(ins); 1347ec681f3Smrg 1357ec681f3Smrg progress |= true; 1367ec681f3Smrg } 1377ec681f3Smrg 1387ec681f3Smrg return progress; 1397ec681f3Smrg} 1407ec681f3Smrg 1417ec681f3Smrgbool 1427ec681f3Smrgmidgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) 1437ec681f3Smrg{ 1447ec681f3Smrg bool progress = false; 1457ec681f3Smrg 1467ec681f3Smrg mir_foreach_instr_in_block_safe(block, ins) { 1477ec681f3Smrg /* Search for a projection */ 1487ec681f3Smrg if (ins->type != TAG_LOAD_STORE_4) continue; 1497ec681f3Smrg if (!OP_IS_PROJECTION(ins->op)) continue; 1507ec681f3Smrg 1517ec681f3Smrg unsigned vary = ins->src[0]; 1527ec681f3Smrg unsigned to = ins->dest; 1537ec681f3Smrg 1547ec681f3Smrg if (vary & PAN_IS_REG) continue; 1557ec681f3Smrg if (to & PAN_IS_REG) continue; 1567ec681f3Smrg if (!mir_single_use(ctx, vary)) continue; 1577ec681f3Smrg 1587ec681f3Smrg /* Check for a varying source. If we find it, we rewrite */ 1597ec681f3Smrg 1607ec681f3Smrg bool rewritten = false; 1617ec681f3Smrg 1627ec681f3Smrg mir_foreach_instr_in_block_safe(block, v) { 1637ec681f3Smrg if (v->dest != vary) continue; 1647ec681f3Smrg if (v->type != TAG_LOAD_STORE_4) break; 1657ec681f3Smrg if (!OP_IS_LOAD_VARY_F(v->op)) break; 1667ec681f3Smrg 1677ec681f3Smrg /* We found it, so rewrite it to project. Grab the 1687ec681f3Smrg * modifier */ 1697ec681f3Smrg 1707ec681f3Smrg midgard_varying_params p = 1717ec681f3Smrg midgard_unpack_varying_params(v->load_store); 1727ec681f3Smrg 1737ec681f3Smrg if (p.modifier != midgard_varying_mod_none) 1747ec681f3Smrg break; 1757ec681f3Smrg 1767ec681f3Smrg bool projects_w = 1777ec681f3Smrg ins->op == midgard_op_ldst_perspective_div_w; 1787ec681f3Smrg 1797ec681f3Smrg p.modifier = projects_w ? 1807ec681f3Smrg midgard_varying_mod_perspective_w : 1817ec681f3Smrg midgard_varying_mod_perspective_z; 1827ec681f3Smrg 1837ec681f3Smrg midgard_pack_varying_params(&v->load_store, p); 1847ec681f3Smrg 1857ec681f3Smrg /* Use the new destination */ 1867ec681f3Smrg v->dest = to; 1877ec681f3Smrg 1887ec681f3Smrg rewritten = true; 1897ec681f3Smrg break; 1907ec681f3Smrg } 1917ec681f3Smrg 1927ec681f3Smrg if (rewritten) 1937ec681f3Smrg mir_remove_instruction(ins); 1947ec681f3Smrg 1957ec681f3Smrg progress |= rewritten; 1967ec681f3Smrg } 1977ec681f3Smrg 1987ec681f3Smrg return progress; 1997ec681f3Smrg} 200