17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2020 Collabora, Ltd.
37ec681f3Smrg * Copyright (C) 2014 Intel Corporation
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
217ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
227ec681f3Smrg * IN THE SOFTWARE.
237ec681f3Smrg *
247ec681f3Smrg * Authors:
257ec681f3Smrg *    Alyssa Rosenzweig <alyssa@collabora.com>
267ec681f3Smrg *    Jason Ekstrand (jason@jlekstrand.net)
277ec681f3Smrg *
287ec681f3Smrg */
297ec681f3Smrg
307ec681f3Smrg#include "nir.h"
317ec681f3Smrg#include "pan_ir.h"
327ec681f3Smrg
337ec681f3Smrg/* Check if a given ALU source is the result of a particular componentwise 1-op
347ec681f3Smrg * ALU source (principally fneg or fabs). If so, return true and rewrite the
357ec681f3Smrg * source to be the argument, respecting swizzles as needed. If not (or it
367ec681f3Smrg * cannot be proven), return false and leave the source untouched.
377ec681f3Smrg*/
387ec681f3Smrg
397ec681f3Smrgbool
407ec681f3Smrgpan_has_source_mod(nir_alu_src *src, nir_op op)
417ec681f3Smrg{
427ec681f3Smrg   if (!src->src.is_ssa || src->src.ssa->parent_instr->type != nir_instr_type_alu)
437ec681f3Smrg      return false;
447ec681f3Smrg
457ec681f3Smrg   nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
467ec681f3Smrg
477ec681f3Smrg   if (alu->op != op)
487ec681f3Smrg      return false;
497ec681f3Smrg
507ec681f3Smrg   /* This only works for unary ops */
517ec681f3Smrg   assert(nir_op_infos[op].num_inputs == 1);
527ec681f3Smrg
537ec681f3Smrg   /* If the copied source is not SSA, moving it might not be valid */
547ec681f3Smrg   if (!alu->src[0].src.is_ssa)
557ec681f3Smrg      return false;
567ec681f3Smrg
577ec681f3Smrg   /* Okay - we've found the modifier we wanted. Let's construct the new ALU
587ec681f3Smrg    * src. In a scalar world, this is just psrc, but for vector archs we need
597ec681f3Smrg    * to respect the swizzle, so we compose.
607ec681f3Smrg    */
617ec681f3Smrg
627ec681f3Smrg   nir_alu_src nsrc = {
637ec681f3Smrg      .src = alu->src[0].src,
647ec681f3Smrg   };
657ec681f3Smrg
667ec681f3Smrg   for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) {
677ec681f3Smrg      /* (a o b)(i) = a(b(i)) ... swizzle composition is intense. */
687ec681f3Smrg      nsrc.swizzle[i] = alu->src[0].swizzle[src->swizzle[i]];
697ec681f3Smrg   }
707ec681f3Smrg
717ec681f3Smrg   *src = nsrc;
727ec681f3Smrg   return true;
737ec681f3Smrg}
747ec681f3Smrg
757ec681f3Smrg/* Check if a given instruction's result will be fed into a
767ec681f3Smrg * componentwise 1-op ALU instruction (principally fsat without
777ec681f3Smrg * swizzles). If so, return true and rewrite the destination. The
787ec681f3Smrg * backend will need to track the new destinations to avoid
797ec681f3Smrg * incorrect double-emits. */
807ec681f3Smrg
817ec681f3Smrgbool
827ec681f3Smrgpan_has_dest_mod(nir_dest **odest, nir_op op)
837ec681f3Smrg{
847ec681f3Smrg   /* This only works for unary ops */
857ec681f3Smrg   assert(nir_op_infos[op].num_inputs == 1);
867ec681f3Smrg
877ec681f3Smrg   /* If not SSA, this might not be legal */
887ec681f3Smrg   nir_dest *dest = *odest;
897ec681f3Smrg   if (!dest->is_ssa)
907ec681f3Smrg      return false;
917ec681f3Smrg
927ec681f3Smrg   /* Check the uses. We want a single use, with the op `op` */
937ec681f3Smrg   if (!list_is_empty(&dest->ssa.if_uses))
947ec681f3Smrg      return false;
957ec681f3Smrg
967ec681f3Smrg   if (!list_is_singular(&dest->ssa.uses))
977ec681f3Smrg      return false;
987ec681f3Smrg
997ec681f3Smrg   nir_src *use = list_first_entry(&dest->ssa.uses, nir_src, use_link);
1007ec681f3Smrg   nir_instr *parent = use->parent_instr;
1017ec681f3Smrg
1027ec681f3Smrg   /* Check if the op is `op` */
1037ec681f3Smrg   if (parent->type != nir_instr_type_alu)
1047ec681f3Smrg      return false;
1057ec681f3Smrg
1067ec681f3Smrg   nir_alu_instr *alu = nir_instr_as_alu(parent);
1077ec681f3Smrg   if (alu->op != op)
1087ec681f3Smrg      return false;
1097ec681f3Smrg
1107ec681f3Smrg   /* We can't do expansions without a move in the middle */
1117ec681f3Smrg   unsigned nr_components = nir_dest_num_components(alu->dest.dest);
1127ec681f3Smrg
1137ec681f3Smrg   if (nir_dest_num_components(*dest) != nr_components)
1147ec681f3Smrg      return false;
1157ec681f3Smrg
1167ec681f3Smrg   /* We don't handle swizzles here, so check for the identity */
1177ec681f3Smrg   for (unsigned i = 0; i < nr_components; ++i) {
1187ec681f3Smrg      if (alu->src[0].swizzle[i] != i)
1197ec681f3Smrg         return false;
1207ec681f3Smrg   }
1217ec681f3Smrg
1227ec681f3Smrg   if (!alu->dest.dest.is_ssa)
1237ec681f3Smrg      return false;
1247ec681f3Smrg
1257ec681f3Smrg   /* Otherwise, we're good */
1267ec681f3Smrg   *odest = &alu->dest.dest;
1277ec681f3Smrg   return true;
1287ec681f3Smrg}
129