asahi/compiler/agx_optimizer.c

7ec681f3Smrg/*
7ec681f3Smrg * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
7ec681f3Smrg *
7ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
7ec681f3Smrg * to deal in the Software without restriction, including without limitation
7ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
7ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
7ec681f3Smrg *
7ec681f3Smrg * The above copyright notice and this permission notice (including the next
7ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
7ec681f3Smrg * Software.
7ec681f3Smrg *
7ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
7ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7ec681f3Smrg * SOFTWARE.
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrg#include "agx_compiler.h"
7ec681f3Smrg#include "agx_minifloat.h"
7ec681f3Smrg
7ec681f3Smrg/* AGX peephole optimizer responsible for instruction combining. It operates in
7ec681f3Smrg * a forward direction and a backward direction, in each case traversing in
7ec681f3Smrg * source order. SSA means the forward pass satisfies the invariant:
7ec681f3Smrg *
7ec681f3Smrg *    Every def is visited before any of its uses.
7ec681f3Smrg *
7ec681f3Smrg * Dually, the backend pass satisfies the invariant:
7ec681f3Smrg *
7ec681f3Smrg *    Every use of a def is visited before the def.
7ec681f3Smrg *
7ec681f3Smrg * This means the forward pass can propagate modifiers forward, whereas the
7ec681f3Smrg * backwards pass propagates modifiers backward. Consider an example:
7ec681f3Smrg *
7ec681f3Smrg *    1 = fabs 0
7ec681f3Smrg *    2 = fround 1
7ec681f3Smrg *    3 = fsat 1
7ec681f3Smrg *
7ec681f3Smrg * The forwards pass would propagate the fabs to the fround (since we can
7ec681f3Smrg * lookup the fabs from the fround source and do the replacement). By contrast
7ec681f3Smrg * the backwards pass would propagate the fsat back to the fround (since when
7ec681f3Smrg * we see the fround we know it has only a single user, fsat).  Propagatable
7ec681f3Smrg * instruction have natural directions (like pushforwards and pullbacks).
7ec681f3Smrg *
7ec681f3Smrg * We are careful to update the tracked state whenever we modify an instruction
7ec681f3Smrg * to ensure the passes are linear-time and converge in a single iteration.
7ec681f3Smrg *
7ec681f3Smrg * Size conversions are worth special discussion. Consider the snippet:
7ec681f3Smrg *
7ec681f3Smrg *    2 = fadd 0, 1
7ec681f3Smrg *    3 = f2f16 2
7ec681f3Smrg *    4 = fround 3
7ec681f3Smrg *
7ec681f3Smrg * A priori, we can move the f2f16 in either direction. But it's not equal --
7ec681f3Smrg * if we move it up to the fadd, we get FP16 for two instructions, whereas if
7ec681f3Smrg * we push it into the fround, we effectively get FP32 for two instructions. So
7ec681f3Smrg * f2f16 is backwards. Likewise, consider
7ec681f3Smrg *
7ec681f3Smrg *    2 = fadd 0, 1
7ec681f3Smrg *    3 = f2f32 1
7ec681f3Smrg *    4 = fround 3
7ec681f3Smrg *
7ec681f3Smrg * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
7ec681f3Smrg * move it down to the fround, we get FP16 to too. So f2f32 is backwards.
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrgagx_is_fmov(agx_instr *def)
7ec681f3Smrg{
7ec681f3Smrg   return (def->op == AGX_OPCODE_FADD)
7ec681f3Smrg      && agx_is_equiv(def->src[1], agx_negzero());
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrg/* Compose floating-point modifiers with floating-point sources */
7ec681f3Smrg
7ec681f3Smrgstatic agx_index
7ec681f3Smrgagx_compose_float_src(agx_index to, agx_index from)
7ec681f3Smrg{
7ec681f3Smrg   if (to.abs)
7ec681f3Smrg      from.neg = false;
7ec681f3Smrg
7ec681f3Smrg   from.abs |= to.abs;
7ec681f3Smrg   from.neg |= to.neg;
7ec681f3Smrg
7ec681f3Smrg   return from;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgagx_optimizer_fmov(agx_instr **defs, agx_instr *ins, unsigned srcs)
7ec681f3Smrg{
7ec681f3Smrg   for (unsigned s = 0; s < srcs; ++s) {
7ec681f3Smrg      agx_index src = ins->src[s];
7ec681f3Smrg      if (src.type != AGX_INDEX_NORMAL) continue;
7ec681f3Smrg
7ec681f3Smrg      agx_instr *def = defs[src.value];
7ec681f3Smrg      if (!agx_is_fmov(def)) continue;
7ec681f3Smrg      if (def->saturate) continue;
7ec681f3Smrg
7ec681f3Smrg      ins->src[s] = agx_compose_float_src(src, def->src[0]);
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgagx_optimizer_inline_imm(agx_instr **defs, agx_instr *I,
7ec681f3Smrg      unsigned srcs, bool is_float)
7ec681f3Smrg{
7ec681f3Smrg   for (unsigned s = 0; s < srcs; ++s) {
7ec681f3Smrg      agx_index src = I->src[s];
7ec681f3Smrg      if (src.type != AGX_INDEX_NORMAL) continue;
7ec681f3Smrg
7ec681f3Smrg      agx_instr *def = defs[src.value];
7ec681f3Smrg      if (def->op != AGX_OPCODE_MOV_IMM) continue;
7ec681f3Smrg
7ec681f3Smrg      uint8_t value = def->imm;
7ec681f3Smrg      bool float_src = is_float;
7ec681f3Smrg
7ec681f3Smrg      /* cmpselsrc takes integer immediates only */
7ec681f3Smrg      if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false;
7ec681f3Smrg
7ec681f3Smrg      if (float_src) {
7ec681f3Smrg         bool fp16 = (def->dest[0].size == AGX_SIZE_16);
7ec681f3Smrg         assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
7ec681f3Smrg
7ec681f3Smrg         float f = fp16 ? _mesa_half_to_float(def->imm) : uif(def->imm);
7ec681f3Smrg         if (!agx_minifloat_exact(f)) continue;
7ec681f3Smrg
7ec681f3Smrg         value = agx_minifloat_encode(f);
7ec681f3Smrg      } else if (value != def->imm) {
7ec681f3Smrg         continue;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      I->src[s].type = AGX_INDEX_IMMEDIATE;
7ec681f3Smrg      I->src[s].value = value;
7ec681f3Smrg   }
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic bool
7ec681f3Smrgagx_optimizer_fmov_rev(agx_instr *I, agx_instr *use)
7ec681f3Smrg{
7ec681f3Smrg   if (!agx_is_fmov(use)) return false;
7ec681f3Smrg   if (use->src[0].neg || use->src[0].abs) return false;
7ec681f3Smrg
7ec681f3Smrg   /* saturate(saturate(x)) = saturate(x) */
7ec681f3Smrg   I->saturate |= use->saturate;
7ec681f3Smrg   I->dest[0] = use->dest[0];
7ec681f3Smrg   return true;
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgagx_optimizer_forward(agx_context *ctx)
7ec681f3Smrg{
7ec681f3Smrg   agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
7ec681f3Smrg
7ec681f3Smrg   agx_foreach_instr_global(ctx, I) {
7ec681f3Smrg      struct agx_opcode_info info = agx_opcodes_info[I->op];
7ec681f3Smrg
7ec681f3Smrg      for (unsigned d = 0; d < info.nr_dests; ++d) {
7ec681f3Smrg         if (I->dest[d].type == AGX_INDEX_NORMAL)
7ec681f3Smrg            defs[I->dest[d].value] = I;
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      /* Propagate fmov down */
7ec681f3Smrg      if (info.is_float)
7ec681f3Smrg         agx_optimizer_fmov(defs, I, info.nr_srcs);
7ec681f3Smrg
7ec681f3Smrg      /* Inline immediates if we can. TODO: systematic */
7ec681f3Smrg      if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_P_EXTRACT && I->op != AGX_OPCODE_P_COMBINE)
7ec681f3Smrg         agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   free(defs);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgstatic void
7ec681f3Smrgagx_optimizer_backward(agx_context *ctx)
7ec681f3Smrg{
7ec681f3Smrg   agx_instr **uses = calloc(ctx->alloc, sizeof(*uses));
7ec681f3Smrg   BITSET_WORD *multiple = calloc(BITSET_WORDS(ctx->alloc), sizeof(*multiple));
7ec681f3Smrg
7ec681f3Smrg   agx_foreach_instr_global_rev(ctx, I) {
7ec681f3Smrg      struct agx_opcode_info info = agx_opcodes_info[I->op];
7ec681f3Smrg
7ec681f3Smrg      for (unsigned s = 0; s < info.nr_srcs; ++s) {
7ec681f3Smrg         if (I->src[s].type == AGX_INDEX_NORMAL) {
7ec681f3Smrg            unsigned v = I->src[s].value;
7ec681f3Smrg
7ec681f3Smrg            if (uses[v])
7ec681f3Smrg               BITSET_SET(multiple, v);
7ec681f3Smrg            else
7ec681f3Smrg               uses[v] = I;
7ec681f3Smrg         }
7ec681f3Smrg      }
7ec681f3Smrg
7ec681f3Smrg      if (info.nr_dests != 1)
7ec681f3Smrg         continue;
7ec681f3Smrg
7ec681f3Smrg      if (I->dest[0].type != AGX_INDEX_NORMAL)
7ec681f3Smrg         continue;
7ec681f3Smrg
7ec681f3Smrg      agx_instr *use = uses[I->dest[0].value];
7ec681f3Smrg
7ec681f3Smrg      if (!use || BITSET_TEST(multiple, I->dest[0].value))
7ec681f3Smrg         continue;
7ec681f3Smrg
7ec681f3Smrg      /* Destination has a single use, try to propagate */
7ec681f3Smrg      if (info.is_float && agx_optimizer_fmov_rev(I, use)) {
7ec681f3Smrg         agx_remove_instruction(use);
7ec681f3Smrg         continue;
7ec681f3Smrg      }
7ec681f3Smrg   }
7ec681f3Smrg
7ec681f3Smrg   free(uses);
7ec681f3Smrg   free(multiple);
7ec681f3Smrg}
7ec681f3Smrg
7ec681f3Smrgvoid
7ec681f3Smrgagx_optimizer(agx_context *ctx)
7ec681f3Smrg{
7ec681f3Smrg   agx_optimizer_backward(ctx);
7ec681f3Smrg   agx_optimizer_forward(ctx);
7ec681f3Smrg}