17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2020 Collabora, Ltd. 37ec681f3Smrg * Copyright (C) 2014 Intel Corporation 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227ec681f3Smrg * IN THE SOFTWARE. 237ec681f3Smrg * 247ec681f3Smrg * Authors: 257ec681f3Smrg * Alyssa Rosenzweig <alyssa@collabora.com> 267ec681f3Smrg * Jason Ekstrand (jason@jlekstrand.net) 277ec681f3Smrg * 287ec681f3Smrg */ 297ec681f3Smrg 307ec681f3Smrg#include "nir.h" 317ec681f3Smrg#include "pan_ir.h" 327ec681f3Smrg 337ec681f3Smrg/* Check if a given ALU source is the result of a particular componentwise 1-op 347ec681f3Smrg * ALU source (principally fneg or fabs). If so, return true and rewrite the 357ec681f3Smrg * source to be the argument, respecting swizzles as needed. If not (or it 367ec681f3Smrg * cannot be proven), return false and leave the source untouched. 377ec681f3Smrg*/ 387ec681f3Smrg 397ec681f3Smrgbool 407ec681f3Smrgpan_has_source_mod(nir_alu_src *src, nir_op op) 417ec681f3Smrg{ 427ec681f3Smrg if (!src->src.is_ssa || src->src.ssa->parent_instr->type != nir_instr_type_alu) 437ec681f3Smrg return false; 447ec681f3Smrg 457ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr); 467ec681f3Smrg 477ec681f3Smrg if (alu->op != op) 487ec681f3Smrg return false; 497ec681f3Smrg 507ec681f3Smrg /* This only works for unary ops */ 517ec681f3Smrg assert(nir_op_infos[op].num_inputs == 1); 527ec681f3Smrg 537ec681f3Smrg /* If the copied source is not SSA, moving it might not be valid */ 547ec681f3Smrg if (!alu->src[0].src.is_ssa) 557ec681f3Smrg return false; 567ec681f3Smrg 577ec681f3Smrg /* Okay - we've found the modifier we wanted. Let's construct the new ALU 587ec681f3Smrg * src. In a scalar world, this is just psrc, but for vector archs we need 597ec681f3Smrg * to respect the swizzle, so we compose. 607ec681f3Smrg */ 617ec681f3Smrg 627ec681f3Smrg nir_alu_src nsrc = { 637ec681f3Smrg .src = alu->src[0].src, 647ec681f3Smrg }; 657ec681f3Smrg 667ec681f3Smrg for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) { 677ec681f3Smrg /* (a o b)(i) = a(b(i)) ... swizzle composition is intense. */ 687ec681f3Smrg nsrc.swizzle[i] = alu->src[0].swizzle[src->swizzle[i]]; 697ec681f3Smrg } 707ec681f3Smrg 717ec681f3Smrg *src = nsrc; 727ec681f3Smrg return true; 737ec681f3Smrg} 747ec681f3Smrg 757ec681f3Smrg/* Check if a given instruction's result will be fed into a 767ec681f3Smrg * componentwise 1-op ALU instruction (principally fsat without 777ec681f3Smrg * swizzles). If so, return true and rewrite the destination. The 787ec681f3Smrg * backend will need to track the new destinations to avoid 797ec681f3Smrg * incorrect double-emits. */ 807ec681f3Smrg 817ec681f3Smrgbool 827ec681f3Smrgpan_has_dest_mod(nir_dest **odest, nir_op op) 837ec681f3Smrg{ 847ec681f3Smrg /* This only works for unary ops */ 857ec681f3Smrg assert(nir_op_infos[op].num_inputs == 1); 867ec681f3Smrg 877ec681f3Smrg /* If not SSA, this might not be legal */ 887ec681f3Smrg nir_dest *dest = *odest; 897ec681f3Smrg if (!dest->is_ssa) 907ec681f3Smrg return false; 917ec681f3Smrg 927ec681f3Smrg /* Check the uses. We want a single use, with the op `op` */ 937ec681f3Smrg if (!list_is_empty(&dest->ssa.if_uses)) 947ec681f3Smrg return false; 957ec681f3Smrg 967ec681f3Smrg if (!list_is_singular(&dest->ssa.uses)) 977ec681f3Smrg return false; 987ec681f3Smrg 997ec681f3Smrg nir_src *use = list_first_entry(&dest->ssa.uses, nir_src, use_link); 1007ec681f3Smrg nir_instr *parent = use->parent_instr; 1017ec681f3Smrg 1027ec681f3Smrg /* Check if the op is `op` */ 1037ec681f3Smrg if (parent->type != nir_instr_type_alu) 1047ec681f3Smrg return false; 1057ec681f3Smrg 1067ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(parent); 1077ec681f3Smrg if (alu->op != op) 1087ec681f3Smrg return false; 1097ec681f3Smrg 1107ec681f3Smrg /* We can't do expansions without a move in the middle */ 1117ec681f3Smrg unsigned nr_components = nir_dest_num_components(alu->dest.dest); 1127ec681f3Smrg 1137ec681f3Smrg if (nir_dest_num_components(*dest) != nr_components) 1147ec681f3Smrg return false; 1157ec681f3Smrg 1167ec681f3Smrg /* We don't handle swizzles here, so check for the identity */ 1177ec681f3Smrg for (unsigned i = 0; i < nr_components; ++i) { 1187ec681f3Smrg if (alu->src[0].swizzle[i] != i) 1197ec681f3Smrg return false; 1207ec681f3Smrg } 1217ec681f3Smrg 1227ec681f3Smrg if (!alu->dest.dest.is_ssa) 1237ec681f3Smrg return false; 1247ec681f3Smrg 1257ec681f3Smrg /* Otherwise, we're good */ 1267ec681f3Smrg *odest = &alu->dest.dest; 1277ec681f3Smrg return true; 1287ec681f3Smrg} 129