17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2021 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "util/ralloc.h"
257ec681f3Smrg#include "ir3_ra.h"
267ec681f3Smrg#include "ir3_shader.h"
277ec681f3Smrg
287ec681f3Smrg/* This file implements a validation pass for register allocation. We check
297ec681f3Smrg * that the assignment of SSA values to registers is "valid", in the sense
307ec681f3Smrg * that each original definition reaches all of its uses without being
317ec681f3Smrg * clobbered by something else.
327ec681f3Smrg *
337ec681f3Smrg * The validation is a forward dataflow analysis. The state at each point
347ec681f3Smrg * consists of, for each physical register, the SSA value occupying it, or a
357ec681f3Smrg * few special values:
367ec681f3Smrg *
377ec681f3Smrg * - "unknown" is set initially, before the dataflow analysis assigns it a
387ec681f3Smrg *   value. This is the lattice bottom.
397ec681f3Smrg * - Values at the start get "undef", which acts like a special SSA value that
407ec681f3Smrg *   indicates it is never written.
417ec681f3Smrg * - "overdefined" registers are set to more than one value, depending on
427ec681f3Smrg *   which path you take to get to the spot. This is the lattice top.
437ec681f3Smrg *
447ec681f3Smrg * Overdefined is necessary to distinguish because in some programs, like this
457ec681f3Smrg * simple example, it's perfectly normal and allowed:
467ec681f3Smrg *
477ec681f3Smrg * if (...) {
487ec681f3Smrg *    mov.u32u32 ssa_1(r1.x), ...
497ec681f3Smrg *    ...
507ec681f3Smrg * } else {
517ec681f3Smrg *    mov.u32u32 ssa_2(r1.x), ...
527ec681f3Smrg *    ...
537ec681f3Smrg * }
547ec681f3Smrg * // r1.x is overdefined here!
557ec681f3Smrg *
567ec681f3Smrg * However, if an ssa value after the if is accidentally assigned to r1.x, we
577ec681f3Smrg * need to remember that it's invalid to catch the mistake. Overdef has to be
587ec681f3Smrg * distinguished from undef so that the state forms a valid lattice to
597ec681f3Smrg * guarantee that the analysis always terminates. We could avoid relying on
607ec681f3Smrg * overdef by using liveness analysis, but not relying on liveness has the
617ec681f3Smrg * benefit that we can catch bugs in liveness analysis too.
627ec681f3Smrg *
637ec681f3Smrg * One tricky thing we have to handle is the coalescing of splits/collects,
647ec681f3Smrg * which means that multiple SSA values can occupy a register at the same
657ec681f3Smrg * time. While we could use the same merge set indices that RA uses, again
667ec681f3Smrg * that would rely on the merge set calculation being correct which we don't
677ec681f3Smrg * want to. Instead we treat splits/collects as transfer instructions, similar
687ec681f3Smrg * to the parallelcopy instructions inserted by RA, and have them copy their
697ec681f3Smrg * sources to their destinations. This means that each physreg must carry the
707ec681f3Smrg * SSA def assigned to it plus an offset into that definition, and when
717ec681f3Smrg * validating sources we must look through splits/collects to find the
727ec681f3Smrg * "original" source for each subregister.
737ec681f3Smrg */
747ec681f3Smrg
757ec681f3Smrg#define UNKNOWN ((struct ir3_register *)NULL)
767ec681f3Smrg#define UNDEF   ((struct ir3_register *)(uintptr_t)1)
777ec681f3Smrg#define OVERDEF ((struct ir3_register *)(uintptr_t)2)
787ec681f3Smrg
797ec681f3Smrgstruct reg_state {
807ec681f3Smrg   struct ir3_register *def;
817ec681f3Smrg   unsigned offset;
827ec681f3Smrg};
837ec681f3Smrg
847ec681f3Smrgstruct file_state {
857ec681f3Smrg   struct reg_state regs[RA_MAX_FILE_SIZE];
867ec681f3Smrg};
877ec681f3Smrg
887ec681f3Smrgstruct reaching_state {
897ec681f3Smrg   struct file_state half, full, shared;
907ec681f3Smrg};
917ec681f3Smrg
927ec681f3Smrgstruct ra_val_ctx {
937ec681f3Smrg   struct ir3_instruction *current_instr;
947ec681f3Smrg
957ec681f3Smrg   struct reaching_state reaching;
967ec681f3Smrg   struct reaching_state *block_reaching;
977ec681f3Smrg   unsigned block_count;
987ec681f3Smrg
997ec681f3Smrg   unsigned full_size, half_size;
1007ec681f3Smrg
1017ec681f3Smrg   bool merged_regs;
1027ec681f3Smrg
1037ec681f3Smrg   bool failed;
1047ec681f3Smrg};
1057ec681f3Smrg
1067ec681f3Smrgstatic void
1077ec681f3Smrgvalidate_error(struct ra_val_ctx *ctx, const char *condstr)
1087ec681f3Smrg{
1097ec681f3Smrg   fprintf(stderr, "ra validation fail: %s\n", condstr);
1107ec681f3Smrg   fprintf(stderr, "  -> for instruction: ");
1117ec681f3Smrg   ir3_print_instr(ctx->current_instr);
1127ec681f3Smrg   abort();
1137ec681f3Smrg}
1147ec681f3Smrg
1157ec681f3Smrg#define validate_assert(ctx, cond)                                             \
1167ec681f3Smrg   do {                                                                        \
1177ec681f3Smrg      if (!(cond)) {                                                           \
1187ec681f3Smrg         validate_error(ctx, #cond);                                           \
1197ec681f3Smrg      }                                                                        \
1207ec681f3Smrg   } while (0)
1217ec681f3Smrg
1227ec681f3Smrgstatic unsigned
1237ec681f3Smrgget_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
1247ec681f3Smrg{
1257ec681f3Smrg   if (reg->flags & IR3_REG_SHARED)
1267ec681f3Smrg      return RA_SHARED_SIZE;
1277ec681f3Smrg   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
1287ec681f3Smrg      return ctx->full_size;
1297ec681f3Smrg   else
1307ec681f3Smrg      return ctx->half_size;
1317ec681f3Smrg}
1327ec681f3Smrg
1337ec681f3Smrg/* Validate simple things, like the registers being in-bounds. This way we
1347ec681f3Smrg * don't have to worry about out-of-bounds accesses later.
1357ec681f3Smrg */
1367ec681f3Smrg
1377ec681f3Smrgstatic void
1387ec681f3Smrgvalidate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
1397ec681f3Smrg{
1407ec681f3Smrg   ctx->current_instr = instr;
1417ec681f3Smrg   ra_foreach_dst (dst, instr) {
1427ec681f3Smrg      unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
1437ec681f3Smrg      validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
1447ec681f3Smrg      if (dst->tied)
1457ec681f3Smrg         validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
1467ec681f3Smrg   }
1477ec681f3Smrg
1487ec681f3Smrg   ra_foreach_src (src, instr) {
1497ec681f3Smrg      unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
1507ec681f3Smrg      validate_assert(ctx, src_max <= get_file_size(ctx, src));
1517ec681f3Smrg   }
1527ec681f3Smrg}
1537ec681f3Smrg
1547ec681f3Smrg/* This is the lattice operator. */
1557ec681f3Smrgstatic bool
1567ec681f3Smrgmerge_reg(struct reg_state *dst, const struct reg_state *src)
1577ec681f3Smrg{
1587ec681f3Smrg   if (dst->def == UNKNOWN) {
1597ec681f3Smrg      *dst = *src;
1607ec681f3Smrg      return src->def != UNKNOWN;
1617ec681f3Smrg   } else if (dst->def == OVERDEF) {
1627ec681f3Smrg      return false;
1637ec681f3Smrg   } else {
1647ec681f3Smrg      if (src->def == UNKNOWN)
1657ec681f3Smrg         return false;
1667ec681f3Smrg      else if (src->def == OVERDEF) {
1677ec681f3Smrg         *dst = *src;
1687ec681f3Smrg         return true;
1697ec681f3Smrg      } else {
1707ec681f3Smrg         if (dst->def != src->def || dst->offset != src->offset) {
1717ec681f3Smrg            dst->def = OVERDEF;
1727ec681f3Smrg            dst->offset = 0;
1737ec681f3Smrg            return true;
1747ec681f3Smrg         } else {
1757ec681f3Smrg            return false;
1767ec681f3Smrg         }
1777ec681f3Smrg      }
1787ec681f3Smrg   }
1797ec681f3Smrg}
1807ec681f3Smrg
1817ec681f3Smrgstatic bool
1827ec681f3Smrgmerge_file(struct file_state *dst, const struct file_state *src, unsigned size)
1837ec681f3Smrg{
1847ec681f3Smrg   bool progress = false;
1857ec681f3Smrg   for (unsigned i = 0; i < size; i++)
1867ec681f3Smrg      progress |= merge_reg(&dst->regs[i], &src->regs[i]);
1877ec681f3Smrg   return progress;
1887ec681f3Smrg}
1897ec681f3Smrg
1907ec681f3Smrgstatic bool
1917ec681f3Smrgmerge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
1927ec681f3Smrg            const struct reaching_state *src)
1937ec681f3Smrg{
1947ec681f3Smrg   bool progress = false;
1957ec681f3Smrg   progress |= merge_file(&dst->full, &src->full, ctx->full_size);
1967ec681f3Smrg   progress |= merge_file(&dst->half, &src->half, ctx->half_size);
1977ec681f3Smrg   return progress;
1987ec681f3Smrg}
1997ec681f3Smrg
2007ec681f3Smrgstatic bool
2017ec681f3Smrgmerge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
2027ec681f3Smrg                     const struct reaching_state *src)
2037ec681f3Smrg{
2047ec681f3Smrg   return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
2057ec681f3Smrg}
2067ec681f3Smrg
2077ec681f3Smrgstatic struct file_state *
2087ec681f3Smrgra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
2097ec681f3Smrg{
2107ec681f3Smrg   if (reg->flags & IR3_REG_SHARED)
2117ec681f3Smrg      return &ctx->reaching.shared;
2127ec681f3Smrg   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
2137ec681f3Smrg      return &ctx->reaching.full;
2147ec681f3Smrg   else
2157ec681f3Smrg      return &ctx->reaching.half;
2167ec681f3Smrg}
2177ec681f3Smrg
2187ec681f3Smrgstatic void
2197ec681f3Smrgpropagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
2207ec681f3Smrg{
2217ec681f3Smrg   ra_foreach_dst (dst, instr) {
2227ec681f3Smrg      struct file_state *file = ra_val_get_file(ctx, dst);
2237ec681f3Smrg      physreg_t physreg = ra_reg_get_physreg(dst);
2247ec681f3Smrg      for (unsigned i = 0; i < reg_size(dst); i++) {
2257ec681f3Smrg         file->regs[physreg + i] = (struct reg_state){
2267ec681f3Smrg            .def = dst,
2277ec681f3Smrg            .offset = i,
2287ec681f3Smrg         };
2297ec681f3Smrg      }
2307ec681f3Smrg   }
2317ec681f3Smrg}
2327ec681f3Smrg
2337ec681f3Smrgstatic void
2347ec681f3Smrgpropagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
2357ec681f3Smrg{
2367ec681f3Smrg   struct ir3_register *dst = split->dsts[0];
2377ec681f3Smrg   struct ir3_register *src = split->srcs[0];
2387ec681f3Smrg   physreg_t dst_physreg = ra_reg_get_physreg(dst);
2397ec681f3Smrg   physreg_t src_physreg = ra_reg_get_physreg(src);
2407ec681f3Smrg   struct file_state *file = ra_val_get_file(ctx, dst);
2417ec681f3Smrg
2427ec681f3Smrg   unsigned offset = split->split.off * reg_elem_size(src);
2437ec681f3Smrg   for (unsigned i = 0; i < reg_elem_size(src); i++) {
2447ec681f3Smrg      file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
2457ec681f3Smrg   }
2467ec681f3Smrg}
2477ec681f3Smrg
2487ec681f3Smrgstatic void
2497ec681f3Smrgpropagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
2507ec681f3Smrg{
2517ec681f3Smrg   struct ir3_register *dst = collect->dsts[0];
2527ec681f3Smrg   physreg_t dst_physreg = ra_reg_get_physreg(dst);
2537ec681f3Smrg   struct file_state *file = ra_val_get_file(ctx, dst);
2547ec681f3Smrg
2557ec681f3Smrg   unsigned size = reg_size(dst);
2567ec681f3Smrg   struct reg_state srcs[size];
2577ec681f3Smrg
2587ec681f3Smrg   for (unsigned i = 0; i < collect->srcs_count; i++) {
2597ec681f3Smrg      struct ir3_register *src = collect->srcs[i];
2607ec681f3Smrg      unsigned dst_offset = i * reg_elem_size(dst);
2617ec681f3Smrg      for (unsigned j = 0; j < reg_elem_size(dst); j++) {
2627ec681f3Smrg         if (!ra_reg_is_src(src)) {
2637ec681f3Smrg            srcs[dst_offset + j] = (struct reg_state){
2647ec681f3Smrg               .def = dst,
2657ec681f3Smrg               .offset = dst_offset + j,
2667ec681f3Smrg            };
2677ec681f3Smrg         } else {
2687ec681f3Smrg            physreg_t src_physreg = ra_reg_get_physreg(src);
2697ec681f3Smrg            srcs[dst_offset + j] = file->regs[src_physreg + j];
2707ec681f3Smrg         }
2717ec681f3Smrg      }
2727ec681f3Smrg   }
2737ec681f3Smrg
2747ec681f3Smrg   for (unsigned i = 0; i < size; i++)
2757ec681f3Smrg      file->regs[dst_physreg + i] = srcs[i];
2767ec681f3Smrg}
2777ec681f3Smrg
2787ec681f3Smrgstatic void
2797ec681f3Smrgpropagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
2807ec681f3Smrg{
2817ec681f3Smrg   unsigned size = 0;
2827ec681f3Smrg   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
2837ec681f3Smrg      size += reg_size(pcopy->srcs[i]);
2847ec681f3Smrg   }
2857ec681f3Smrg
2867ec681f3Smrg   struct reg_state srcs[size];
2877ec681f3Smrg
2887ec681f3Smrg   unsigned offset = 0;
2897ec681f3Smrg   for (unsigned i = 0; i < pcopy->srcs_count; i++) {
2907ec681f3Smrg      struct ir3_register *dst = pcopy->dsts[i];
2917ec681f3Smrg      struct ir3_register *src = pcopy->srcs[i];
2927ec681f3Smrg      struct file_state *file = ra_val_get_file(ctx, dst);
2937ec681f3Smrg
2947ec681f3Smrg      for (unsigned j = 0; j < reg_size(dst); j++) {
2957ec681f3Smrg         if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
2967ec681f3Smrg            srcs[offset + j] = (struct reg_state){
2977ec681f3Smrg               .def = dst,
2987ec681f3Smrg               .offset = j,
2997ec681f3Smrg            };
3007ec681f3Smrg         } else {
3017ec681f3Smrg            physreg_t src_physreg = ra_reg_get_physreg(src);
3027ec681f3Smrg            srcs[offset + j] = file->regs[src_physreg + j];
3037ec681f3Smrg         }
3047ec681f3Smrg      }
3057ec681f3Smrg
3067ec681f3Smrg      offset += reg_size(dst);
3077ec681f3Smrg   }
3087ec681f3Smrg   assert(offset == size);
3097ec681f3Smrg
3107ec681f3Smrg   offset = 0;
3117ec681f3Smrg   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
3127ec681f3Smrg      struct ir3_register *dst = pcopy->dsts[i];
3137ec681f3Smrg      physreg_t dst_physreg = ra_reg_get_physreg(dst);
3147ec681f3Smrg      struct file_state *file = ra_val_get_file(ctx, dst);
3157ec681f3Smrg
3167ec681f3Smrg      for (unsigned j = 0; j < reg_size(dst); j++)
3177ec681f3Smrg         file->regs[dst_physreg + j] = srcs[offset + j];
3187ec681f3Smrg
3197ec681f3Smrg      offset += reg_size(dst);
3207ec681f3Smrg   }
3217ec681f3Smrg   assert(offset == size);
3227ec681f3Smrg}
3237ec681f3Smrg
3247ec681f3Smrgstatic void
3257ec681f3Smrgpropagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
3267ec681f3Smrg{
3277ec681f3Smrg   if (instr->opc == OPC_META_SPLIT)
3287ec681f3Smrg      propagate_split(ctx, instr);
3297ec681f3Smrg   else if (instr->opc == OPC_META_COLLECT)
3307ec681f3Smrg      propagate_collect(ctx, instr);
3317ec681f3Smrg   else if (instr->opc == OPC_META_PARALLEL_COPY)
3327ec681f3Smrg      propagate_parallelcopy(ctx, instr);
3337ec681f3Smrg   else
3347ec681f3Smrg      propagate_normal_instr(ctx, instr);
3357ec681f3Smrg}
3367ec681f3Smrg
3377ec681f3Smrgstatic bool
3387ec681f3Smrgpropagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
3397ec681f3Smrg{
3407ec681f3Smrg   ctx->reaching = ctx->block_reaching[block->index];
3417ec681f3Smrg
3427ec681f3Smrg   foreach_instr (instr, &block->instr_list) {
3437ec681f3Smrg      propagate_instr(ctx, instr);
3447ec681f3Smrg   }
3457ec681f3Smrg
3467ec681f3Smrg   bool progress = false;
3477ec681f3Smrg   for (unsigned i = 0; i < 2; i++) {
3487ec681f3Smrg      struct ir3_block *succ = block->successors[i];
3497ec681f3Smrg      if (!succ)
3507ec681f3Smrg         continue;
3517ec681f3Smrg      progress |=
3527ec681f3Smrg         merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
3537ec681f3Smrg   }
3547ec681f3Smrg   for (unsigned i = 0; i < 2; i++) {
3557ec681f3Smrg      struct ir3_block *succ = block->physical_successors[i];
3567ec681f3Smrg      if (!succ)
3577ec681f3Smrg         continue;
3587ec681f3Smrg      progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
3597ec681f3Smrg                                       &ctx->reaching);
3607ec681f3Smrg   }
3617ec681f3Smrg   return progress;
3627ec681f3Smrg}
3637ec681f3Smrg
3647ec681f3Smrgstatic void
3657ec681f3Smrgchase_definition(struct reg_state *state)
3667ec681f3Smrg{
3677ec681f3Smrg   while (true) {
3687ec681f3Smrg      struct ir3_instruction *instr = state->def->instr;
3697ec681f3Smrg      switch (instr->opc) {
3707ec681f3Smrg      case OPC_META_SPLIT: {
3717ec681f3Smrg         struct ir3_register *new_def = instr->srcs[0]->def;
3727ec681f3Smrg         unsigned offset = instr->split.off * reg_elem_size(new_def);
3737ec681f3Smrg         *state = (struct reg_state){
3747ec681f3Smrg            .def = new_def,
3757ec681f3Smrg            .offset = state->offset + offset,
3767ec681f3Smrg         };
3777ec681f3Smrg         break;
3787ec681f3Smrg      }
3797ec681f3Smrg      case OPC_META_COLLECT: {
3807ec681f3Smrg         unsigned src_idx = state->offset / reg_elem_size(state->def);
3817ec681f3Smrg         unsigned src_offset = state->offset % reg_elem_size(state->def);
3827ec681f3Smrg         struct ir3_register *new_def = instr->srcs[src_idx]->def;
3837ec681f3Smrg         if (new_def) {
3847ec681f3Smrg            *state = (struct reg_state){
3857ec681f3Smrg               .def = new_def,
3867ec681f3Smrg               .offset = src_offset,
3877ec681f3Smrg            };
3887ec681f3Smrg         } else {
3897ec681f3Smrg            /* Bail on immed/const */
3907ec681f3Smrg            return;
3917ec681f3Smrg         }
3927ec681f3Smrg         break;
3937ec681f3Smrg      }
3947ec681f3Smrg      case OPC_META_PARALLEL_COPY: {
3957ec681f3Smrg         unsigned dst_idx = ~0;
3967ec681f3Smrg         for (unsigned i = 0; i < instr->dsts_count; i++) {
3977ec681f3Smrg            if (instr->dsts[i] == state->def) {
3987ec681f3Smrg               dst_idx = i;
3997ec681f3Smrg               break;
4007ec681f3Smrg            }
4017ec681f3Smrg         }
4027ec681f3Smrg         assert(dst_idx != ~0);
4037ec681f3Smrg
4047ec681f3Smrg         struct ir3_register *new_def = instr->srcs[dst_idx]->def;
4057ec681f3Smrg         if (new_def) {
4067ec681f3Smrg            state->def = new_def;
4077ec681f3Smrg         } else {
4087ec681f3Smrg            /* Bail on immed/const */
4097ec681f3Smrg            return;
4107ec681f3Smrg         }
4117ec681f3Smrg         break;
4127ec681f3Smrg      }
4137ec681f3Smrg      default:
4147ec681f3Smrg         return;
4157ec681f3Smrg      }
4167ec681f3Smrg   }
4177ec681f3Smrg}
4187ec681f3Smrg
4197ec681f3Smrgstatic void
4207ec681f3Smrgdump_reg_state(struct reg_state *state)
4217ec681f3Smrg{
4227ec681f3Smrg   if (state->def == UNDEF) {
4237ec681f3Smrg      fprintf(stderr, "no reaching definition");
4247ec681f3Smrg   } else if (state->def == OVERDEF) {
4257ec681f3Smrg      fprintf(stderr,
4267ec681f3Smrg              "more than one reaching definition or partial definition");
4277ec681f3Smrg   } else {
4287ec681f3Smrg      /* The analysis should always remove UNKNOWN eventually. */
4297ec681f3Smrg      assert(state->def != UNKNOWN);
4307ec681f3Smrg
4317ec681f3Smrg      fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
4327ec681f3Smrg              state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
4337ec681f3Smrg              state->def->num / 4, "xyzw"[state->def->num % 4],
4347ec681f3Smrg              state -> offset);
4357ec681f3Smrg   }
4367ec681f3Smrg}
4377ec681f3Smrg
4387ec681f3Smrgstatic void
4397ec681f3Smrgcheck_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
4407ec681f3Smrg                   struct ir3_register *src)
4417ec681f3Smrg{
4427ec681f3Smrg   struct file_state *file = ra_val_get_file(ctx, src);
4437ec681f3Smrg   physreg_t physreg = ra_reg_get_physreg(src);
4447ec681f3Smrg   for (unsigned i = 0; i < reg_size(src); i++) {
4457ec681f3Smrg      struct reg_state expected = (struct reg_state){
4467ec681f3Smrg         .def = src->def,
4477ec681f3Smrg         .offset = i,
4487ec681f3Smrg      };
4497ec681f3Smrg      chase_definition(&expected);
4507ec681f3Smrg
4517ec681f3Smrg      struct reg_state actual = file->regs[physreg + i];
4527ec681f3Smrg
4537ec681f3Smrg      if (expected.def != actual.def || expected.offset != actual.offset) {
4547ec681f3Smrg         fprintf(
4557ec681f3Smrg            stderr,
4567ec681f3Smrg            "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
4577ec681f3Smrg            src->def->instr->serialno, src->def->name, i);
4587ec681f3Smrg         fprintf(stderr, "expected: ");
4597ec681f3Smrg         dump_reg_state(&expected);
4607ec681f3Smrg         fprintf(stderr, "\n");
4617ec681f3Smrg         fprintf(stderr, "actual: ");
4627ec681f3Smrg         dump_reg_state(&actual);
4637ec681f3Smrg         fprintf(stderr, "\n");
4647ec681f3Smrg         fprintf(stderr, "-> for instruction: ");
4657ec681f3Smrg         ir3_print_instr(instr);
4667ec681f3Smrg         ctx->failed = true;
4677ec681f3Smrg      }
4687ec681f3Smrg   }
4697ec681f3Smrg}
4707ec681f3Smrg
4717ec681f3Smrgstatic void
4727ec681f3Smrgcheck_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
4737ec681f3Smrg{
4747ec681f3Smrg   if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
4757ec681f3Smrg       instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
4767ec681f3Smrg      return;
4777ec681f3Smrg   }
4787ec681f3Smrg
4797ec681f3Smrg   ra_foreach_src (src, instr) {
4807ec681f3Smrg      check_reaching_src(ctx, instr, src);
4817ec681f3Smrg   }
4827ec681f3Smrg}
4837ec681f3Smrg
4847ec681f3Smrgstatic void
4857ec681f3Smrgcheck_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
4867ec681f3Smrg{
4877ec681f3Smrg   ctx->reaching = ctx->block_reaching[block->index];
4887ec681f3Smrg
4897ec681f3Smrg   foreach_instr (instr, &block->instr_list) {
4907ec681f3Smrg      check_reaching_instr(ctx, instr);
4917ec681f3Smrg      propagate_instr(ctx, instr);
4927ec681f3Smrg   }
4937ec681f3Smrg
4947ec681f3Smrg   for (unsigned i = 0; i < 2; i++) {
4957ec681f3Smrg      struct ir3_block *succ = block->successors[i];
4967ec681f3Smrg      if (!succ)
4977ec681f3Smrg         continue;
4987ec681f3Smrg
4997ec681f3Smrg      unsigned pred_idx = ir3_block_get_pred_index(succ, block);
5007ec681f3Smrg      foreach_instr (instr, &succ->instr_list) {
5017ec681f3Smrg         if (instr->opc != OPC_META_PHI)
5027ec681f3Smrg            break;
5037ec681f3Smrg         if (instr->srcs[pred_idx]->def)
5047ec681f3Smrg            check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
5057ec681f3Smrg      }
5067ec681f3Smrg   }
5077ec681f3Smrg}
5087ec681f3Smrg
5097ec681f3Smrgstatic void
5107ec681f3Smrgcheck_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
5117ec681f3Smrg{
5127ec681f3Smrg   ctx->block_reaching =
5137ec681f3Smrg      rzalloc_array(ctx, struct reaching_state, ctx->block_count);
5147ec681f3Smrg
5157ec681f3Smrg   struct reaching_state *start = &ctx->block_reaching[0];
5167ec681f3Smrg   for (unsigned i = 0; i < ctx->full_size; i++)
5177ec681f3Smrg      start->full.regs[i].def = UNDEF;
5187ec681f3Smrg   for (unsigned i = 0; i < ctx->half_size; i++)
5197ec681f3Smrg      start->half.regs[i].def = UNDEF;
5207ec681f3Smrg   for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
5217ec681f3Smrg      start->shared.regs[i].def = UNDEF;
5227ec681f3Smrg
5237ec681f3Smrg   bool progress;
5247ec681f3Smrg   do {
5257ec681f3Smrg      progress = false;
5267ec681f3Smrg      foreach_block (block, &ir->block_list) {
5277ec681f3Smrg         progress |= propagate_block(ctx, block);
5287ec681f3Smrg      }
5297ec681f3Smrg   } while (progress);
5307ec681f3Smrg
5317ec681f3Smrg   foreach_block (block, &ir->block_list) {
5327ec681f3Smrg      check_reaching_block(ctx, block);
5337ec681f3Smrg   }
5347ec681f3Smrg
5357ec681f3Smrg   if (ctx->failed) {
5367ec681f3Smrg      fprintf(stderr, "failing shader:\n");
5377ec681f3Smrg      ir3_print(ir);
5387ec681f3Smrg      abort();
5397ec681f3Smrg   }
5407ec681f3Smrg}
5417ec681f3Smrg
5427ec681f3Smrgvoid
5437ec681f3Smrgir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
5447ec681f3Smrg                unsigned half_size, unsigned block_count)
5457ec681f3Smrg{
5467ec681f3Smrg#ifdef NDEBUG
5477ec681f3Smrg#define VALIDATE 0
5487ec681f3Smrg#else
5497ec681f3Smrg#define VALIDATE 1
5507ec681f3Smrg#endif
5517ec681f3Smrg
5527ec681f3Smrg   if (!VALIDATE)
5537ec681f3Smrg      return;
5547ec681f3Smrg
5557ec681f3Smrg   struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
5567ec681f3Smrg   ctx->merged_regs = v->mergedregs;
5577ec681f3Smrg   ctx->full_size = full_size;
5587ec681f3Smrg   ctx->half_size = half_size;
5597ec681f3Smrg   ctx->block_count = block_count;
5607ec681f3Smrg
5617ec681f3Smrg   foreach_block (block, &v->ir->block_list) {
5627ec681f3Smrg      foreach_instr (instr, &block->instr_list) {
5637ec681f3Smrg         validate_simple(ctx, instr);
5647ec681f3Smrg      }
5657ec681f3Smrg   }
5667ec681f3Smrg
5677ec681f3Smrg   check_reaching_defs(ctx, v->ir);
5687ec681f3Smrg
5697ec681f3Smrg   ralloc_free(ctx);
5707ec681f3Smrg}
571