17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2021 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "util/ralloc.h" 257ec681f3Smrg#include "ir3_ra.h" 267ec681f3Smrg#include "ir3_shader.h" 277ec681f3Smrg 287ec681f3Smrg/* This file implements a validation pass for register allocation. We check 297ec681f3Smrg * that the assignment of SSA values to registers is "valid", in the sense 307ec681f3Smrg * that each original definition reaches all of its uses without being 317ec681f3Smrg * clobbered by something else. 327ec681f3Smrg * 337ec681f3Smrg * The validation is a forward dataflow analysis. The state at each point 347ec681f3Smrg * consists of, for each physical register, the SSA value occupying it, or a 357ec681f3Smrg * few special values: 367ec681f3Smrg * 377ec681f3Smrg * - "unknown" is set initially, before the dataflow analysis assigns it a 387ec681f3Smrg * value. This is the lattice bottom. 397ec681f3Smrg * - Values at the start get "undef", which acts like a special SSA value that 407ec681f3Smrg * indicates it is never written. 417ec681f3Smrg * - "overdefined" registers are set to more than one value, depending on 427ec681f3Smrg * which path you take to get to the spot. This is the lattice top. 437ec681f3Smrg * 447ec681f3Smrg * Overdefined is necessary to distinguish because in some programs, like this 457ec681f3Smrg * simple example, it's perfectly normal and allowed: 467ec681f3Smrg * 477ec681f3Smrg * if (...) { 487ec681f3Smrg * mov.u32u32 ssa_1(r1.x), ... 497ec681f3Smrg * ... 507ec681f3Smrg * } else { 517ec681f3Smrg * mov.u32u32 ssa_2(r1.x), ... 527ec681f3Smrg * ... 537ec681f3Smrg * } 547ec681f3Smrg * // r1.x is overdefined here! 557ec681f3Smrg * 567ec681f3Smrg * However, if an ssa value after the if is accidentally assigned to r1.x, we 577ec681f3Smrg * need to remember that it's invalid to catch the mistake. Overdef has to be 587ec681f3Smrg * distinguished from undef so that the state forms a valid lattice to 597ec681f3Smrg * guarantee that the analysis always terminates. We could avoid relying on 607ec681f3Smrg * overdef by using liveness analysis, but not relying on liveness has the 617ec681f3Smrg * benefit that we can catch bugs in liveness analysis too. 627ec681f3Smrg * 637ec681f3Smrg * One tricky thing we have to handle is the coalescing of splits/collects, 647ec681f3Smrg * which means that multiple SSA values can occupy a register at the same 657ec681f3Smrg * time. While we could use the same merge set indices that RA uses, again 667ec681f3Smrg * that would rely on the merge set calculation being correct which we don't 677ec681f3Smrg * want to. Instead we treat splits/collects as transfer instructions, similar 687ec681f3Smrg * to the parallelcopy instructions inserted by RA, and have them copy their 697ec681f3Smrg * sources to their destinations. This means that each physreg must carry the 707ec681f3Smrg * SSA def assigned to it plus an offset into that definition, and when 717ec681f3Smrg * validating sources we must look through splits/collects to find the 727ec681f3Smrg * "original" source for each subregister. 737ec681f3Smrg */ 747ec681f3Smrg 757ec681f3Smrg#define UNKNOWN ((struct ir3_register *)NULL) 767ec681f3Smrg#define UNDEF ((struct ir3_register *)(uintptr_t)1) 777ec681f3Smrg#define OVERDEF ((struct ir3_register *)(uintptr_t)2) 787ec681f3Smrg 797ec681f3Smrgstruct reg_state { 807ec681f3Smrg struct ir3_register *def; 817ec681f3Smrg unsigned offset; 827ec681f3Smrg}; 837ec681f3Smrg 847ec681f3Smrgstruct file_state { 857ec681f3Smrg struct reg_state regs[RA_MAX_FILE_SIZE]; 867ec681f3Smrg}; 877ec681f3Smrg 887ec681f3Smrgstruct reaching_state { 897ec681f3Smrg struct file_state half, full, shared; 907ec681f3Smrg}; 917ec681f3Smrg 927ec681f3Smrgstruct ra_val_ctx { 937ec681f3Smrg struct ir3_instruction *current_instr; 947ec681f3Smrg 957ec681f3Smrg struct reaching_state reaching; 967ec681f3Smrg struct reaching_state *block_reaching; 977ec681f3Smrg unsigned block_count; 987ec681f3Smrg 997ec681f3Smrg unsigned full_size, half_size; 1007ec681f3Smrg 1017ec681f3Smrg bool merged_regs; 1027ec681f3Smrg 1037ec681f3Smrg bool failed; 1047ec681f3Smrg}; 1057ec681f3Smrg 1067ec681f3Smrgstatic void 1077ec681f3Smrgvalidate_error(struct ra_val_ctx *ctx, const char *condstr) 1087ec681f3Smrg{ 1097ec681f3Smrg fprintf(stderr, "ra validation fail: %s\n", condstr); 1107ec681f3Smrg fprintf(stderr, " -> for instruction: "); 1117ec681f3Smrg ir3_print_instr(ctx->current_instr); 1127ec681f3Smrg abort(); 1137ec681f3Smrg} 1147ec681f3Smrg 1157ec681f3Smrg#define validate_assert(ctx, cond) \ 1167ec681f3Smrg do { \ 1177ec681f3Smrg if (!(cond)) { \ 1187ec681f3Smrg validate_error(ctx, #cond); \ 1197ec681f3Smrg } \ 1207ec681f3Smrg } while (0) 1217ec681f3Smrg 1227ec681f3Smrgstatic unsigned 1237ec681f3Smrgget_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg) 1247ec681f3Smrg{ 1257ec681f3Smrg if (reg->flags & IR3_REG_SHARED) 1267ec681f3Smrg return RA_SHARED_SIZE; 1277ec681f3Smrg else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 1287ec681f3Smrg return ctx->full_size; 1297ec681f3Smrg else 1307ec681f3Smrg return ctx->half_size; 1317ec681f3Smrg} 1327ec681f3Smrg 1337ec681f3Smrg/* Validate simple things, like the registers being in-bounds. This way we 1347ec681f3Smrg * don't have to worry about out-of-bounds accesses later. 1357ec681f3Smrg */ 1367ec681f3Smrg 1377ec681f3Smrgstatic void 1387ec681f3Smrgvalidate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 1397ec681f3Smrg{ 1407ec681f3Smrg ctx->current_instr = instr; 1417ec681f3Smrg ra_foreach_dst (dst, instr) { 1427ec681f3Smrg unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst); 1437ec681f3Smrg validate_assert(ctx, dst_max <= get_file_size(ctx, dst)); 1447ec681f3Smrg if (dst->tied) 1457ec681f3Smrg validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied)); 1467ec681f3Smrg } 1477ec681f3Smrg 1487ec681f3Smrg ra_foreach_src (src, instr) { 1497ec681f3Smrg unsigned src_max = ra_reg_get_physreg(src) + reg_size(src); 1507ec681f3Smrg validate_assert(ctx, src_max <= get_file_size(ctx, src)); 1517ec681f3Smrg } 1527ec681f3Smrg} 1537ec681f3Smrg 1547ec681f3Smrg/* This is the lattice operator. */ 1557ec681f3Smrgstatic bool 1567ec681f3Smrgmerge_reg(struct reg_state *dst, const struct reg_state *src) 1577ec681f3Smrg{ 1587ec681f3Smrg if (dst->def == UNKNOWN) { 1597ec681f3Smrg *dst = *src; 1607ec681f3Smrg return src->def != UNKNOWN; 1617ec681f3Smrg } else if (dst->def == OVERDEF) { 1627ec681f3Smrg return false; 1637ec681f3Smrg } else { 1647ec681f3Smrg if (src->def == UNKNOWN) 1657ec681f3Smrg return false; 1667ec681f3Smrg else if (src->def == OVERDEF) { 1677ec681f3Smrg *dst = *src; 1687ec681f3Smrg return true; 1697ec681f3Smrg } else { 1707ec681f3Smrg if (dst->def != src->def || dst->offset != src->offset) { 1717ec681f3Smrg dst->def = OVERDEF; 1727ec681f3Smrg dst->offset = 0; 1737ec681f3Smrg return true; 1747ec681f3Smrg } else { 1757ec681f3Smrg return false; 1767ec681f3Smrg } 1777ec681f3Smrg } 1787ec681f3Smrg } 1797ec681f3Smrg} 1807ec681f3Smrg 1817ec681f3Smrgstatic bool 1827ec681f3Smrgmerge_file(struct file_state *dst, const struct file_state *src, unsigned size) 1837ec681f3Smrg{ 1847ec681f3Smrg bool progress = false; 1857ec681f3Smrg for (unsigned i = 0; i < size; i++) 1867ec681f3Smrg progress |= merge_reg(&dst->regs[i], &src->regs[i]); 1877ec681f3Smrg return progress; 1887ec681f3Smrg} 1897ec681f3Smrg 1907ec681f3Smrgstatic bool 1917ec681f3Smrgmerge_state(struct ra_val_ctx *ctx, struct reaching_state *dst, 1927ec681f3Smrg const struct reaching_state *src) 1937ec681f3Smrg{ 1947ec681f3Smrg bool progress = false; 1957ec681f3Smrg progress |= merge_file(&dst->full, &src->full, ctx->full_size); 1967ec681f3Smrg progress |= merge_file(&dst->half, &src->half, ctx->half_size); 1977ec681f3Smrg return progress; 1987ec681f3Smrg} 1997ec681f3Smrg 2007ec681f3Smrgstatic bool 2017ec681f3Smrgmerge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst, 2027ec681f3Smrg const struct reaching_state *src) 2037ec681f3Smrg{ 2047ec681f3Smrg return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE); 2057ec681f3Smrg} 2067ec681f3Smrg 2077ec681f3Smrgstatic struct file_state * 2087ec681f3Smrgra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg) 2097ec681f3Smrg{ 2107ec681f3Smrg if (reg->flags & IR3_REG_SHARED) 2117ec681f3Smrg return &ctx->reaching.shared; 2127ec681f3Smrg else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) 2137ec681f3Smrg return &ctx->reaching.full; 2147ec681f3Smrg else 2157ec681f3Smrg return &ctx->reaching.half; 2167ec681f3Smrg} 2177ec681f3Smrg 2187ec681f3Smrgstatic void 2197ec681f3Smrgpropagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 2207ec681f3Smrg{ 2217ec681f3Smrg ra_foreach_dst (dst, instr) { 2227ec681f3Smrg struct file_state *file = ra_val_get_file(ctx, dst); 2237ec681f3Smrg physreg_t physreg = ra_reg_get_physreg(dst); 2247ec681f3Smrg for (unsigned i = 0; i < reg_size(dst); i++) { 2257ec681f3Smrg file->regs[physreg + i] = (struct reg_state){ 2267ec681f3Smrg .def = dst, 2277ec681f3Smrg .offset = i, 2287ec681f3Smrg }; 2297ec681f3Smrg } 2307ec681f3Smrg } 2317ec681f3Smrg} 2327ec681f3Smrg 2337ec681f3Smrgstatic void 2347ec681f3Smrgpropagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split) 2357ec681f3Smrg{ 2367ec681f3Smrg struct ir3_register *dst = split->dsts[0]; 2377ec681f3Smrg struct ir3_register *src = split->srcs[0]; 2387ec681f3Smrg physreg_t dst_physreg = ra_reg_get_physreg(dst); 2397ec681f3Smrg physreg_t src_physreg = ra_reg_get_physreg(src); 2407ec681f3Smrg struct file_state *file = ra_val_get_file(ctx, dst); 2417ec681f3Smrg 2427ec681f3Smrg unsigned offset = split->split.off * reg_elem_size(src); 2437ec681f3Smrg for (unsigned i = 0; i < reg_elem_size(src); i++) { 2447ec681f3Smrg file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i]; 2457ec681f3Smrg } 2467ec681f3Smrg} 2477ec681f3Smrg 2487ec681f3Smrgstatic void 2497ec681f3Smrgpropagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect) 2507ec681f3Smrg{ 2517ec681f3Smrg struct ir3_register *dst = collect->dsts[0]; 2527ec681f3Smrg physreg_t dst_physreg = ra_reg_get_physreg(dst); 2537ec681f3Smrg struct file_state *file = ra_val_get_file(ctx, dst); 2547ec681f3Smrg 2557ec681f3Smrg unsigned size = reg_size(dst); 2567ec681f3Smrg struct reg_state srcs[size]; 2577ec681f3Smrg 2587ec681f3Smrg for (unsigned i = 0; i < collect->srcs_count; i++) { 2597ec681f3Smrg struct ir3_register *src = collect->srcs[i]; 2607ec681f3Smrg unsigned dst_offset = i * reg_elem_size(dst); 2617ec681f3Smrg for (unsigned j = 0; j < reg_elem_size(dst); j++) { 2627ec681f3Smrg if (!ra_reg_is_src(src)) { 2637ec681f3Smrg srcs[dst_offset + j] = (struct reg_state){ 2647ec681f3Smrg .def = dst, 2657ec681f3Smrg .offset = dst_offset + j, 2667ec681f3Smrg }; 2677ec681f3Smrg } else { 2687ec681f3Smrg physreg_t src_physreg = ra_reg_get_physreg(src); 2697ec681f3Smrg srcs[dst_offset + j] = file->regs[src_physreg + j]; 2707ec681f3Smrg } 2717ec681f3Smrg } 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg for (unsigned i = 0; i < size; i++) 2757ec681f3Smrg file->regs[dst_physreg + i] = srcs[i]; 2767ec681f3Smrg} 2777ec681f3Smrg 2787ec681f3Smrgstatic void 2797ec681f3Smrgpropagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy) 2807ec681f3Smrg{ 2817ec681f3Smrg unsigned size = 0; 2827ec681f3Smrg for (unsigned i = 0; i < pcopy->dsts_count; i++) { 2837ec681f3Smrg size += reg_size(pcopy->srcs[i]); 2847ec681f3Smrg } 2857ec681f3Smrg 2867ec681f3Smrg struct reg_state srcs[size]; 2877ec681f3Smrg 2887ec681f3Smrg unsigned offset = 0; 2897ec681f3Smrg for (unsigned i = 0; i < pcopy->srcs_count; i++) { 2907ec681f3Smrg struct ir3_register *dst = pcopy->dsts[i]; 2917ec681f3Smrg struct ir3_register *src = pcopy->srcs[i]; 2927ec681f3Smrg struct file_state *file = ra_val_get_file(ctx, dst); 2937ec681f3Smrg 2947ec681f3Smrg for (unsigned j = 0; j < reg_size(dst); j++) { 2957ec681f3Smrg if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) { 2967ec681f3Smrg srcs[offset + j] = (struct reg_state){ 2977ec681f3Smrg .def = dst, 2987ec681f3Smrg .offset = j, 2997ec681f3Smrg }; 3007ec681f3Smrg } else { 3017ec681f3Smrg physreg_t src_physreg = ra_reg_get_physreg(src); 3027ec681f3Smrg srcs[offset + j] = file->regs[src_physreg + j]; 3037ec681f3Smrg } 3047ec681f3Smrg } 3057ec681f3Smrg 3067ec681f3Smrg offset += reg_size(dst); 3077ec681f3Smrg } 3087ec681f3Smrg assert(offset == size); 3097ec681f3Smrg 3107ec681f3Smrg offset = 0; 3117ec681f3Smrg for (unsigned i = 0; i < pcopy->dsts_count; i++) { 3127ec681f3Smrg struct ir3_register *dst = pcopy->dsts[i]; 3137ec681f3Smrg physreg_t dst_physreg = ra_reg_get_physreg(dst); 3147ec681f3Smrg struct file_state *file = ra_val_get_file(ctx, dst); 3157ec681f3Smrg 3167ec681f3Smrg for (unsigned j = 0; j < reg_size(dst); j++) 3177ec681f3Smrg file->regs[dst_physreg + j] = srcs[offset + j]; 3187ec681f3Smrg 3197ec681f3Smrg offset += reg_size(dst); 3207ec681f3Smrg } 3217ec681f3Smrg assert(offset == size); 3227ec681f3Smrg} 3237ec681f3Smrg 3247ec681f3Smrgstatic void 3257ec681f3Smrgpropagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 3267ec681f3Smrg{ 3277ec681f3Smrg if (instr->opc == OPC_META_SPLIT) 3287ec681f3Smrg propagate_split(ctx, instr); 3297ec681f3Smrg else if (instr->opc == OPC_META_COLLECT) 3307ec681f3Smrg propagate_collect(ctx, instr); 3317ec681f3Smrg else if (instr->opc == OPC_META_PARALLEL_COPY) 3327ec681f3Smrg propagate_parallelcopy(ctx, instr); 3337ec681f3Smrg else 3347ec681f3Smrg propagate_normal_instr(ctx, instr); 3357ec681f3Smrg} 3367ec681f3Smrg 3377ec681f3Smrgstatic bool 3387ec681f3Smrgpropagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) 3397ec681f3Smrg{ 3407ec681f3Smrg ctx->reaching = ctx->block_reaching[block->index]; 3417ec681f3Smrg 3427ec681f3Smrg foreach_instr (instr, &block->instr_list) { 3437ec681f3Smrg propagate_instr(ctx, instr); 3447ec681f3Smrg } 3457ec681f3Smrg 3467ec681f3Smrg bool progress = false; 3477ec681f3Smrg for (unsigned i = 0; i < 2; i++) { 3487ec681f3Smrg struct ir3_block *succ = block->successors[i]; 3497ec681f3Smrg if (!succ) 3507ec681f3Smrg continue; 3517ec681f3Smrg progress |= 3527ec681f3Smrg merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); 3537ec681f3Smrg } 3547ec681f3Smrg for (unsigned i = 0; i < 2; i++) { 3557ec681f3Smrg struct ir3_block *succ = block->physical_successors[i]; 3567ec681f3Smrg if (!succ) 3577ec681f3Smrg continue; 3587ec681f3Smrg progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index], 3597ec681f3Smrg &ctx->reaching); 3607ec681f3Smrg } 3617ec681f3Smrg return progress; 3627ec681f3Smrg} 3637ec681f3Smrg 3647ec681f3Smrgstatic void 3657ec681f3Smrgchase_definition(struct reg_state *state) 3667ec681f3Smrg{ 3677ec681f3Smrg while (true) { 3687ec681f3Smrg struct ir3_instruction *instr = state->def->instr; 3697ec681f3Smrg switch (instr->opc) { 3707ec681f3Smrg case OPC_META_SPLIT: { 3717ec681f3Smrg struct ir3_register *new_def = instr->srcs[0]->def; 3727ec681f3Smrg unsigned offset = instr->split.off * reg_elem_size(new_def); 3737ec681f3Smrg *state = (struct reg_state){ 3747ec681f3Smrg .def = new_def, 3757ec681f3Smrg .offset = state->offset + offset, 3767ec681f3Smrg }; 3777ec681f3Smrg break; 3787ec681f3Smrg } 3797ec681f3Smrg case OPC_META_COLLECT: { 3807ec681f3Smrg unsigned src_idx = state->offset / reg_elem_size(state->def); 3817ec681f3Smrg unsigned src_offset = state->offset % reg_elem_size(state->def); 3827ec681f3Smrg struct ir3_register *new_def = instr->srcs[src_idx]->def; 3837ec681f3Smrg if (new_def) { 3847ec681f3Smrg *state = (struct reg_state){ 3857ec681f3Smrg .def = new_def, 3867ec681f3Smrg .offset = src_offset, 3877ec681f3Smrg }; 3887ec681f3Smrg } else { 3897ec681f3Smrg /* Bail on immed/const */ 3907ec681f3Smrg return; 3917ec681f3Smrg } 3927ec681f3Smrg break; 3937ec681f3Smrg } 3947ec681f3Smrg case OPC_META_PARALLEL_COPY: { 3957ec681f3Smrg unsigned dst_idx = ~0; 3967ec681f3Smrg for (unsigned i = 0; i < instr->dsts_count; i++) { 3977ec681f3Smrg if (instr->dsts[i] == state->def) { 3987ec681f3Smrg dst_idx = i; 3997ec681f3Smrg break; 4007ec681f3Smrg } 4017ec681f3Smrg } 4027ec681f3Smrg assert(dst_idx != ~0); 4037ec681f3Smrg 4047ec681f3Smrg struct ir3_register *new_def = instr->srcs[dst_idx]->def; 4057ec681f3Smrg if (new_def) { 4067ec681f3Smrg state->def = new_def; 4077ec681f3Smrg } else { 4087ec681f3Smrg /* Bail on immed/const */ 4097ec681f3Smrg return; 4107ec681f3Smrg } 4117ec681f3Smrg break; 4127ec681f3Smrg } 4137ec681f3Smrg default: 4147ec681f3Smrg return; 4157ec681f3Smrg } 4167ec681f3Smrg } 4177ec681f3Smrg} 4187ec681f3Smrg 4197ec681f3Smrgstatic void 4207ec681f3Smrgdump_reg_state(struct reg_state *state) 4217ec681f3Smrg{ 4227ec681f3Smrg if (state->def == UNDEF) { 4237ec681f3Smrg fprintf(stderr, "no reaching definition"); 4247ec681f3Smrg } else if (state->def == OVERDEF) { 4257ec681f3Smrg fprintf(stderr, 4267ec681f3Smrg "more than one reaching definition or partial definition"); 4277ec681f3Smrg } else { 4287ec681f3Smrg /* The analysis should always remove UNKNOWN eventually. */ 4297ec681f3Smrg assert(state->def != UNKNOWN); 4307ec681f3Smrg 4317ec681f3Smrg fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno, 4327ec681f3Smrg state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "", 4337ec681f3Smrg state->def->num / 4, "xyzw"[state->def->num % 4], 4347ec681f3Smrg state -> offset); 4357ec681f3Smrg } 4367ec681f3Smrg} 4377ec681f3Smrg 4387ec681f3Smrgstatic void 4397ec681f3Smrgcheck_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr, 4407ec681f3Smrg struct ir3_register *src) 4417ec681f3Smrg{ 4427ec681f3Smrg struct file_state *file = ra_val_get_file(ctx, src); 4437ec681f3Smrg physreg_t physreg = ra_reg_get_physreg(src); 4447ec681f3Smrg for (unsigned i = 0; i < reg_size(src); i++) { 4457ec681f3Smrg struct reg_state expected = (struct reg_state){ 4467ec681f3Smrg .def = src->def, 4477ec681f3Smrg .offset = i, 4487ec681f3Smrg }; 4497ec681f3Smrg chase_definition(&expected); 4507ec681f3Smrg 4517ec681f3Smrg struct reg_state actual = file->regs[physreg + i]; 4527ec681f3Smrg 4537ec681f3Smrg if (expected.def != actual.def || expected.offset != actual.offset) { 4547ec681f3Smrg fprintf( 4557ec681f3Smrg stderr, 4567ec681f3Smrg "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n", 4577ec681f3Smrg src->def->instr->serialno, src->def->name, i); 4587ec681f3Smrg fprintf(stderr, "expected: "); 4597ec681f3Smrg dump_reg_state(&expected); 4607ec681f3Smrg fprintf(stderr, "\n"); 4617ec681f3Smrg fprintf(stderr, "actual: "); 4627ec681f3Smrg dump_reg_state(&actual); 4637ec681f3Smrg fprintf(stderr, "\n"); 4647ec681f3Smrg fprintf(stderr, "-> for instruction: "); 4657ec681f3Smrg ir3_print_instr(instr); 4667ec681f3Smrg ctx->failed = true; 4677ec681f3Smrg } 4687ec681f3Smrg } 4697ec681f3Smrg} 4707ec681f3Smrg 4717ec681f3Smrgstatic void 4727ec681f3Smrgcheck_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr) 4737ec681f3Smrg{ 4747ec681f3Smrg if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT || 4757ec681f3Smrg instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) { 4767ec681f3Smrg return; 4777ec681f3Smrg } 4787ec681f3Smrg 4797ec681f3Smrg ra_foreach_src (src, instr) { 4807ec681f3Smrg check_reaching_src(ctx, instr, src); 4817ec681f3Smrg } 4827ec681f3Smrg} 4837ec681f3Smrg 4847ec681f3Smrgstatic void 4857ec681f3Smrgcheck_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block) 4867ec681f3Smrg{ 4877ec681f3Smrg ctx->reaching = ctx->block_reaching[block->index]; 4887ec681f3Smrg 4897ec681f3Smrg foreach_instr (instr, &block->instr_list) { 4907ec681f3Smrg check_reaching_instr(ctx, instr); 4917ec681f3Smrg propagate_instr(ctx, instr); 4927ec681f3Smrg } 4937ec681f3Smrg 4947ec681f3Smrg for (unsigned i = 0; i < 2; i++) { 4957ec681f3Smrg struct ir3_block *succ = block->successors[i]; 4967ec681f3Smrg if (!succ) 4977ec681f3Smrg continue; 4987ec681f3Smrg 4997ec681f3Smrg unsigned pred_idx = ir3_block_get_pred_index(succ, block); 5007ec681f3Smrg foreach_instr (instr, &succ->instr_list) { 5017ec681f3Smrg if (instr->opc != OPC_META_PHI) 5027ec681f3Smrg break; 5037ec681f3Smrg if (instr->srcs[pred_idx]->def) 5047ec681f3Smrg check_reaching_src(ctx, instr, instr->srcs[pred_idx]); 5057ec681f3Smrg } 5067ec681f3Smrg } 5077ec681f3Smrg} 5087ec681f3Smrg 5097ec681f3Smrgstatic void 5107ec681f3Smrgcheck_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir) 5117ec681f3Smrg{ 5127ec681f3Smrg ctx->block_reaching = 5137ec681f3Smrg rzalloc_array(ctx, struct reaching_state, ctx->block_count); 5147ec681f3Smrg 5157ec681f3Smrg struct reaching_state *start = &ctx->block_reaching[0]; 5167ec681f3Smrg for (unsigned i = 0; i < ctx->full_size; i++) 5177ec681f3Smrg start->full.regs[i].def = UNDEF; 5187ec681f3Smrg for (unsigned i = 0; i < ctx->half_size; i++) 5197ec681f3Smrg start->half.regs[i].def = UNDEF; 5207ec681f3Smrg for (unsigned i = 0; i < RA_SHARED_SIZE; i++) 5217ec681f3Smrg start->shared.regs[i].def = UNDEF; 5227ec681f3Smrg 5237ec681f3Smrg bool progress; 5247ec681f3Smrg do { 5257ec681f3Smrg progress = false; 5267ec681f3Smrg foreach_block (block, &ir->block_list) { 5277ec681f3Smrg progress |= propagate_block(ctx, block); 5287ec681f3Smrg } 5297ec681f3Smrg } while (progress); 5307ec681f3Smrg 5317ec681f3Smrg foreach_block (block, &ir->block_list) { 5327ec681f3Smrg check_reaching_block(ctx, block); 5337ec681f3Smrg } 5347ec681f3Smrg 5357ec681f3Smrg if (ctx->failed) { 5367ec681f3Smrg fprintf(stderr, "failing shader:\n"); 5377ec681f3Smrg ir3_print(ir); 5387ec681f3Smrg abort(); 5397ec681f3Smrg } 5407ec681f3Smrg} 5417ec681f3Smrg 5427ec681f3Smrgvoid 5437ec681f3Smrgir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size, 5447ec681f3Smrg unsigned half_size, unsigned block_count) 5457ec681f3Smrg{ 5467ec681f3Smrg#ifdef NDEBUG 5477ec681f3Smrg#define VALIDATE 0 5487ec681f3Smrg#else 5497ec681f3Smrg#define VALIDATE 1 5507ec681f3Smrg#endif 5517ec681f3Smrg 5527ec681f3Smrg if (!VALIDATE) 5537ec681f3Smrg return; 5547ec681f3Smrg 5557ec681f3Smrg struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx); 5567ec681f3Smrg ctx->merged_regs = v->mergedregs; 5577ec681f3Smrg ctx->full_size = full_size; 5587ec681f3Smrg ctx->half_size = half_size; 5597ec681f3Smrg ctx->block_count = block_count; 5607ec681f3Smrg 5617ec681f3Smrg foreach_block (block, &v->ir->block_list) { 5627ec681f3Smrg foreach_instr (instr, &block->instr_list) { 5637ec681f3Smrg validate_simple(ctx, instr); 5647ec681f3Smrg } 5657ec681f3Smrg } 5667ec681f3Smrg 5677ec681f3Smrg check_reaching_defs(ctx, v->ir); 5687ec681f3Smrg 5697ec681f3Smrg ralloc_free(ctx); 5707ec681f3Smrg} 571