1361fc4cbSmaya/* 2361fc4cbSmaya * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3361fc4cbSmaya * 4361fc4cbSmaya * Permission is hereby granted, free of charge, to any person obtaining a 5361fc4cbSmaya * copy of this software and associated documentation files (the "Software"), 6361fc4cbSmaya * to deal in the Software without restriction, including without limitation 7361fc4cbSmaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8361fc4cbSmaya * and/or sell copies of the Software, and to permit persons to whom the 9361fc4cbSmaya * Software is furnished to do so, subject to the following conditions: 10361fc4cbSmaya * 11361fc4cbSmaya * The above copyright notice and this permission notice (including the next 12361fc4cbSmaya * paragraph) shall be included in all copies or substantial portions of the 13361fc4cbSmaya * Software. 14361fc4cbSmaya * 15361fc4cbSmaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16361fc4cbSmaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17361fc4cbSmaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18361fc4cbSmaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19361fc4cbSmaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20361fc4cbSmaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21361fc4cbSmaya * SOFTWARE. 22361fc4cbSmaya * 23361fc4cbSmaya * Authors: 24361fc4cbSmaya * Rob Clark <robclark@freedesktop.org> 25361fc4cbSmaya */ 26361fc4cbSmaya 277ec681f3Smrg#include "util/dag.h" 28361fc4cbSmaya#include "util/u_math.h" 29361fc4cbSmaya 30361fc4cbSmaya#include "ir3.h" 317ec681f3Smrg#include "ir3_compiler.h" 327ec681f3Smrg 337ec681f3Smrg#ifdef DEBUG 347ec681f3Smrg#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS) 357ec681f3Smrg#else 367ec681f3Smrg#define SCHED_DEBUG 0 377ec681f3Smrg#endif 387ec681f3Smrg#define d(fmt, ...) \ 397ec681f3Smrg do { \ 407ec681f3Smrg if (SCHED_DEBUG) { \ 417ec681f3Smrg mesa_logi("SCHED: " fmt, ##__VA_ARGS__); \ 427ec681f3Smrg } \ 437ec681f3Smrg } while (0) 447ec681f3Smrg 457ec681f3Smrg#define di(instr, fmt, ...) \ 467ec681f3Smrg do { \ 477ec681f3Smrg if (SCHED_DEBUG) { \ 487ec681f3Smrg struct log_stream *stream = mesa_log_streami(); \ 497ec681f3Smrg mesa_log_stream_printf(stream, "SCHED: " fmt ": ", ##__VA_ARGS__); \ 507ec681f3Smrg ir3_print_instr_stream(stream, instr); \ 517ec681f3Smrg mesa_log_stream_destroy(stream); \ 527ec681f3Smrg } \ 537ec681f3Smrg } while (0) 54361fc4cbSmaya 55361fc4cbSmaya/* 56361fc4cbSmaya * Instruction Scheduling: 57361fc4cbSmaya * 587ec681f3Smrg * A block-level pre-RA scheduler, which works by creating a DAG of 597ec681f3Smrg * instruction dependencies, and heuristically picking a DAG head 607ec681f3Smrg * (instruction with no unscheduled dependencies). 617ec681f3Smrg * 627ec681f3Smrg * Where possible, it tries to pick instructions that avoid nop delay 637ec681f3Smrg * slots, but it will prefer to pick instructions that reduce (or do 647ec681f3Smrg * not increase) the number of live values. 657ec681f3Smrg * 667ec681f3Smrg * If the only possible choices are instructions that increase the 677ec681f3Smrg * number of live values, it will try to pick the one with the earliest 687ec681f3Smrg * consumer (based on pre-sched program order). 69361fc4cbSmaya * 70361fc4cbSmaya * There are a few special cases that need to be handled, since sched 71361fc4cbSmaya * is currently independent of register allocation. Usages of address 72361fc4cbSmaya * register (a0.x) or predicate register (p0.x) must be serialized. Ie. 73361fc4cbSmaya * if you have two pairs of instructions that write the same special 74361fc4cbSmaya * register and then read it, then those pairs cannot be interleaved. 75361fc4cbSmaya * To solve this, when we are in such a scheduling "critical section", 76361fc4cbSmaya * and we encounter a conflicting write to a special register, we try 77361fc4cbSmaya * to schedule any remaining instructions that use that value first. 787ec681f3Smrg * 797ec681f3Smrg * TODO we can detect too-large live_values here.. would be a good place 807ec681f3Smrg * to "spill" cheap things, like move from uniform/immed. (Constructing 817ec681f3Smrg * list of ssa def consumers before sched pass would make this easier. 827ec681f3Smrg * Also, in general it is general it might be best not to re-use load_immed 837ec681f3Smrg * across blocks. 847ec681f3Smrg * 857ec681f3Smrg * TODO we can use (abs)/(neg) src modifiers in a lot of cases to reduce 867ec681f3Smrg * the # of immediates in play (or at least that would help with 877ec681f3Smrg * dEQP-GLES31.functional.ubo.random.all_per_block_buffers.*).. probably 887ec681f3Smrg * do this in a nir pass that inserts fneg/etc? The cp pass should fold 897ec681f3Smrg * these into src modifiers.. 90361fc4cbSmaya */ 91361fc4cbSmaya 92361fc4cbSmayastruct ir3_sched_ctx { 937ec681f3Smrg struct ir3_block *block; /* the current block */ 947ec681f3Smrg struct dag *dag; 957ec681f3Smrg 967ec681f3Smrg struct list_head unscheduled_list; /* unscheduled instructions */ 977ec681f3Smrg struct ir3_instruction *scheduled; /* last scheduled instr */ 987ec681f3Smrg struct ir3_instruction *addr0; /* current a0.x user, if any */ 997ec681f3Smrg struct ir3_instruction *addr1; /* current a1.x user, if any */ 1007ec681f3Smrg struct ir3_instruction *pred; /* current p0.x user, if any */ 1017ec681f3Smrg 1027ec681f3Smrg struct ir3_instruction *split; /* most-recently-split a0/a1/p0 producer */ 1037ec681f3Smrg 1047ec681f3Smrg int remaining_kills; 1057ec681f3Smrg int remaining_tex; 1067ec681f3Smrg 1077ec681f3Smrg bool error; 1087ec681f3Smrg 1097ec681f3Smrg int sfu_delay; 1107ec681f3Smrg int tex_delay; 1117ec681f3Smrg 1127ec681f3Smrg /* We order the scheduled tex/SFU instructions, and keep track of the 1137ec681f3Smrg * index of the last waited on instruction, so we can know which 1147ec681f3Smrg * instructions are still outstanding (and therefore would require us to 1157ec681f3Smrg * wait for all outstanding instructions before scheduling a use). 1167ec681f3Smrg */ 1177ec681f3Smrg int tex_index, first_outstanding_tex_index; 1187ec681f3Smrg int sfu_index, first_outstanding_sfu_index; 119361fc4cbSmaya}; 120361fc4cbSmaya 1217ec681f3Smrgstruct ir3_sched_node { 1227ec681f3Smrg struct dag_node dag; /* must be first for util_dynarray_foreach */ 1237ec681f3Smrg struct ir3_instruction *instr; 1247ec681f3Smrg 1257ec681f3Smrg unsigned delay; 1267ec681f3Smrg unsigned max_delay; 1277ec681f3Smrg 1287ec681f3Smrg unsigned tex_index; 1297ec681f3Smrg unsigned sfu_index; 1307ec681f3Smrg 1317ec681f3Smrg /* For instructions that are a meta:collect src, once we schedule 1327ec681f3Smrg * the first src of the collect, the entire vecN is live (at least 1337ec681f3Smrg * from the PoV of the first RA pass.. the 2nd scalar pass can fill 1347ec681f3Smrg * in some of the gaps, but often not all). So we want to help out 1357ec681f3Smrg * RA, and realize that as soon as we schedule the first collect 1367ec681f3Smrg * src, there is no penalty to schedule the remainder (ie. they 1377ec681f3Smrg * don't make additional values live). In fact we'd prefer to 1387ec681f3Smrg * schedule the rest ASAP to minimize the live range of the vecN. 1397ec681f3Smrg * 1407ec681f3Smrg * For instructions that are the src of a collect, we track the 1417ec681f3Smrg * corresponding collect, and mark them as partially live as soon 1427ec681f3Smrg * as any one of the src's is scheduled. 1437ec681f3Smrg */ 1447ec681f3Smrg struct ir3_instruction *collect; 1457ec681f3Smrg bool partially_live; 1467ec681f3Smrg 1477ec681f3Smrg /* Is this instruction a direct or indirect dependency for a kill? 1487ec681f3Smrg * If so, we should prioritize it when possible 1497ec681f3Smrg */ 1507ec681f3Smrg bool kill_path; 1517ec681f3Smrg 1527ec681f3Smrg /* This node represents a shader output. A semi-common pattern in 1537ec681f3Smrg * shaders is something along the lines of: 1547ec681f3Smrg * 1557ec681f3Smrg * fragcolor.w = 1.0 1567ec681f3Smrg * 1577ec681f3Smrg * Which we'd prefer to schedule as late as possible, since it 1587ec681f3Smrg * produces a live value that is never killed/consumed. So detect 1597ec681f3Smrg * outputs up-front, and avoid scheduling them unless the reduce 1607ec681f3Smrg * register pressure (or at least are neutral) 1617ec681f3Smrg */ 1627ec681f3Smrg bool output; 1637ec681f3Smrg}; 164361fc4cbSmaya 1657ec681f3Smrg#define foreach_sched_node(__n, __list) \ 1667ec681f3Smrg list_for_each_entry (struct ir3_sched_node, __n, __list, dag.link) 167361fc4cbSmaya 1687ec681f3Smrgstatic void sched_node_init(struct ir3_sched_ctx *ctx, 1697ec681f3Smrg struct ir3_instruction *instr); 1707ec681f3Smrgstatic void sched_node_add_dep(struct ir3_instruction *instr, 1717ec681f3Smrg struct ir3_instruction *src, int i); 172361fc4cbSmaya 1737ec681f3Smrgstatic bool 1747ec681f3Smrgis_scheduled(struct ir3_instruction *instr) 1757ec681f3Smrg{ 1767ec681f3Smrg return !!(instr->flags & IR3_INSTR_MARK); 177361fc4cbSmaya} 178361fc4cbSmaya 1797ec681f3Smrg/* check_src_cond() passing a ir3_sched_ctx. */ 1807ec681f3Smrgstatic bool 1817ec681f3Smrgsched_check_src_cond(struct ir3_instruction *instr, 1827ec681f3Smrg bool (*cond)(struct ir3_instruction *, 1837ec681f3Smrg struct ir3_sched_ctx *), 1847ec681f3Smrg struct ir3_sched_ctx *ctx) 185361fc4cbSmaya{ 1867ec681f3Smrg foreach_ssa_src (src, instr) { 1877ec681f3Smrg /* meta:split/collect aren't real instructions, the thing that 1887ec681f3Smrg * we actually care about is *their* srcs 1897ec681f3Smrg */ 1907ec681f3Smrg if ((src->opc == OPC_META_SPLIT) || (src->opc == OPC_META_COLLECT)) { 1917ec681f3Smrg if (sched_check_src_cond(src, cond, ctx)) 1927ec681f3Smrg return true; 1937ec681f3Smrg } else { 1947ec681f3Smrg if (cond(src, ctx)) 1957ec681f3Smrg return true; 1967ec681f3Smrg } 1977ec681f3Smrg } 1987ec681f3Smrg 1997ec681f3Smrg return false; 200361fc4cbSmaya} 201361fc4cbSmaya 2027ec681f3Smrg/* Is this a prefetch or tex that hasn't been waited on yet? */ 2037ec681f3Smrg 2047ec681f3Smrgstatic bool 2057ec681f3Smrgis_outstanding_tex_or_prefetch(struct ir3_instruction *instr, 2067ec681f3Smrg struct ir3_sched_ctx *ctx) 207361fc4cbSmaya{ 2087ec681f3Smrg if (!is_tex_or_prefetch(instr)) 2097ec681f3Smrg return false; 210361fc4cbSmaya 2117ec681f3Smrg /* The sched node is only valid within the same block, we cannot 2127ec681f3Smrg * really say anything about src's from other blocks 2137ec681f3Smrg */ 2147ec681f3Smrg if (instr->block != ctx->block) 2157ec681f3Smrg return true; 2167ec681f3Smrg 2177ec681f3Smrg struct ir3_sched_node *n = instr->data; 2187ec681f3Smrg return n->tex_index >= ctx->first_outstanding_tex_index; 219361fc4cbSmaya} 220361fc4cbSmaya 2217ec681f3Smrgstatic bool 2227ec681f3Smrgis_outstanding_sfu(struct ir3_instruction *instr, struct ir3_sched_ctx *ctx) 223361fc4cbSmaya{ 2247ec681f3Smrg if (!is_sfu(instr)) 2257ec681f3Smrg return false; 226361fc4cbSmaya 2277ec681f3Smrg /* The sched node is only valid within the same block, we cannot 2287ec681f3Smrg * really say anything about src's from other blocks 2297ec681f3Smrg */ 2307ec681f3Smrg if (instr->block != ctx->block) 2317ec681f3Smrg return true; 232361fc4cbSmaya 2337ec681f3Smrg struct ir3_sched_node *n = instr->data; 2347ec681f3Smrg return n->sfu_index >= ctx->first_outstanding_sfu_index; 235361fc4cbSmaya} 236361fc4cbSmaya 2377ec681f3Smrgstatic unsigned 2387ec681f3Smrgcycle_count(struct ir3_instruction *instr) 239361fc4cbSmaya{ 2407ec681f3Smrg if (instr->opc == OPC_META_COLLECT) { 2417ec681f3Smrg /* Assume that only immed/const sources produce moves */ 2427ec681f3Smrg unsigned n = 0; 2437ec681f3Smrg foreach_src (src, instr) { 2447ec681f3Smrg if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) 2457ec681f3Smrg n++; 2467ec681f3Smrg } 2477ec681f3Smrg return n; 2487ec681f3Smrg } else if (is_meta(instr)) { 2497ec681f3Smrg return 0; 2507ec681f3Smrg } else { 2517ec681f3Smrg return 1; 2527ec681f3Smrg } 253361fc4cbSmaya} 254361fc4cbSmaya 255361fc4cbSmayastatic void 256361fc4cbSmayaschedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 257361fc4cbSmaya{ 2587ec681f3Smrg debug_assert(ctx->block == instr->block); 2597ec681f3Smrg 2607ec681f3Smrg /* remove from depth list: 2617ec681f3Smrg */ 2627ec681f3Smrg list_delinit(&instr->node); 2637ec681f3Smrg 2647ec681f3Smrg if (writes_addr0(instr)) { 2657ec681f3Smrg debug_assert(ctx->addr0 == NULL); 2667ec681f3Smrg ctx->addr0 = instr; 2677ec681f3Smrg } 2687ec681f3Smrg 2697ec681f3Smrg if (writes_addr1(instr)) { 2707ec681f3Smrg debug_assert(ctx->addr1 == NULL); 2717ec681f3Smrg ctx->addr1 = instr; 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg if (writes_pred(instr)) { 2757ec681f3Smrg debug_assert(ctx->pred == NULL); 2767ec681f3Smrg ctx->pred = instr; 2777ec681f3Smrg } 2787ec681f3Smrg 2797ec681f3Smrg instr->flags |= IR3_INSTR_MARK; 2807ec681f3Smrg 2817ec681f3Smrg di(instr, "schedule"); 2827ec681f3Smrg 2837ec681f3Smrg list_addtail(&instr->node, &instr->block->instr_list); 2847ec681f3Smrg ctx->scheduled = instr; 2857ec681f3Smrg 2867ec681f3Smrg if (is_kill_or_demote(instr)) { 2877ec681f3Smrg assert(ctx->remaining_kills > 0); 2887ec681f3Smrg ctx->remaining_kills--; 2897ec681f3Smrg } 2907ec681f3Smrg 2917ec681f3Smrg struct ir3_sched_node *n = instr->data; 2927ec681f3Smrg 2937ec681f3Smrg /* If this instruction is a meta:collect src, mark the remaining 2947ec681f3Smrg * collect srcs as partially live. 2957ec681f3Smrg */ 2967ec681f3Smrg if (n->collect) { 2977ec681f3Smrg foreach_ssa_src (src, n->collect) { 2987ec681f3Smrg if (src->block != instr->block) 2997ec681f3Smrg continue; 3007ec681f3Smrg struct ir3_sched_node *sn = src->data; 3017ec681f3Smrg sn->partially_live = true; 3027ec681f3Smrg } 3037ec681f3Smrg } 3047ec681f3Smrg 3057ec681f3Smrg dag_prune_head(ctx->dag, &n->dag); 3067ec681f3Smrg 3077ec681f3Smrg unsigned cycles = cycle_count(instr); 3087ec681f3Smrg 3097ec681f3Smrg if (is_sfu(instr)) { 3107ec681f3Smrg ctx->sfu_delay = 8; 3117ec681f3Smrg n->sfu_index = ctx->sfu_index++; 3127ec681f3Smrg } else if (!is_meta(instr) && 3137ec681f3Smrg sched_check_src_cond(instr, is_outstanding_sfu, ctx)) { 3147ec681f3Smrg ctx->sfu_delay = 0; 3157ec681f3Smrg ctx->first_outstanding_sfu_index = ctx->sfu_index; 3167ec681f3Smrg } else if (ctx->sfu_delay > 0) { 3177ec681f3Smrg ctx->sfu_delay -= MIN2(cycles, ctx->sfu_delay); 3187ec681f3Smrg } 3197ec681f3Smrg 3207ec681f3Smrg if (is_tex_or_prefetch(instr)) { 3217ec681f3Smrg /* NOTE that this isn't an attempt to hide texture fetch latency, 3227ec681f3Smrg * but an attempt to hide the cost of switching to another warp. 3237ec681f3Smrg * If we can, we'd like to try to schedule another texture fetch 3247ec681f3Smrg * before scheduling something that would sync. 3257ec681f3Smrg */ 3267ec681f3Smrg ctx->tex_delay = 10; 3277ec681f3Smrg assert(ctx->remaining_tex > 0); 3287ec681f3Smrg ctx->remaining_tex--; 3297ec681f3Smrg n->tex_index = ctx->tex_index++; 3307ec681f3Smrg } else if (!is_meta(instr) && 3317ec681f3Smrg sched_check_src_cond(instr, is_outstanding_tex_or_prefetch, 3327ec681f3Smrg ctx)) { 3337ec681f3Smrg ctx->tex_delay = 0; 3347ec681f3Smrg ctx->first_outstanding_tex_index = ctx->tex_index; 3357ec681f3Smrg } else if (ctx->tex_delay > 0) { 3367ec681f3Smrg ctx->tex_delay -= MIN2(cycles, ctx->tex_delay); 3377ec681f3Smrg } 3387ec681f3Smrg} 339361fc4cbSmaya 3407ec681f3Smrgstruct ir3_sched_notes { 3417ec681f3Smrg /* there is at least one kill which could be scheduled, except 3427ec681f3Smrg * for unscheduled bary.f's: 3437ec681f3Smrg */ 3447ec681f3Smrg bool blocked_kill; 3457ec681f3Smrg /* there is at least one instruction that could be scheduled, 3467ec681f3Smrg * except for conflicting address/predicate register usage: 3477ec681f3Smrg */ 3487ec681f3Smrg bool addr0_conflict, addr1_conflict, pred_conflict; 3497ec681f3Smrg}; 350361fc4cbSmaya 3517ec681f3Smrg/* could an instruction be scheduled if specified ssa src was scheduled? */ 3527ec681f3Smrgstatic bool 3537ec681f3Smrgcould_sched(struct ir3_instruction *instr, struct ir3_instruction *src) 3547ec681f3Smrg{ 3557ec681f3Smrg foreach_ssa_src (other_src, instr) { 3567ec681f3Smrg /* if dependency not scheduled, we aren't ready yet: */ 3577ec681f3Smrg if ((src != other_src) && !is_scheduled(other_src)) { 3587ec681f3Smrg return false; 3597ec681f3Smrg } 3607ec681f3Smrg } 3617ec681f3Smrg return true; 3627ec681f3Smrg} 363361fc4cbSmaya 3647ec681f3Smrg/* Check if instruction is ok to schedule. Make sure it is not blocked 3657ec681f3Smrg * by use of addr/predicate register, etc. 3667ec681f3Smrg */ 3677ec681f3Smrgstatic bool 3687ec681f3Smrgcheck_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 3697ec681f3Smrg struct ir3_instruction *instr) 3707ec681f3Smrg{ 3717ec681f3Smrg debug_assert(!is_scheduled(instr)); 3727ec681f3Smrg 3737ec681f3Smrg if (instr == ctx->split) { 3747ec681f3Smrg /* Don't schedule instructions created by splitting a a0.x/a1.x/p0.x 3757ec681f3Smrg * write until another "normal" instruction has been scheduled. 3767ec681f3Smrg */ 3777ec681f3Smrg return false; 3787ec681f3Smrg } 3797ec681f3Smrg 3807ec681f3Smrg if (ctx->remaining_kills && (is_tex(instr) || is_mem(instr))) { 3817ec681f3Smrg /* avoid texture/memory access if we have unscheduled kills 3827ec681f3Smrg * that could make the expensive operation unnecessary. By 3837ec681f3Smrg * definition, if there are remaining kills, and this instr 3847ec681f3Smrg * is not a dependency of a kill, there are other instructions 3857ec681f3Smrg * that we can choose from. 3867ec681f3Smrg */ 3877ec681f3Smrg struct ir3_sched_node *n = instr->data; 3887ec681f3Smrg if (!n->kill_path) 3897ec681f3Smrg return false; 3907ec681f3Smrg } 3917ec681f3Smrg 3927ec681f3Smrg /* For instructions that write address register we need to 3937ec681f3Smrg * make sure there is at least one instruction that uses the 3947ec681f3Smrg * addr value which is otherwise ready. 3957ec681f3Smrg * 3967ec681f3Smrg * NOTE if any instructions use pred register and have other 3977ec681f3Smrg * src args, we would need to do the same for writes_pred().. 3987ec681f3Smrg */ 3997ec681f3Smrg if (writes_addr0(instr)) { 4007ec681f3Smrg struct ir3 *ir = instr->block->shader; 4017ec681f3Smrg bool ready = false; 4027ec681f3Smrg for (unsigned i = 0; (i < ir->a0_users_count) && !ready; i++) { 4037ec681f3Smrg struct ir3_instruction *indirect = ir->a0_users[i]; 4047ec681f3Smrg if (!indirect) 4057ec681f3Smrg continue; 4067ec681f3Smrg if (indirect->address->def != instr->dsts[0]) 4077ec681f3Smrg continue; 4087ec681f3Smrg ready = could_sched(indirect, instr); 4097ec681f3Smrg } 4107ec681f3Smrg 4117ec681f3Smrg /* nothing could be scheduled, so keep looking: */ 4127ec681f3Smrg if (!ready) 4137ec681f3Smrg return false; 4147ec681f3Smrg } 4157ec681f3Smrg 4167ec681f3Smrg if (writes_addr1(instr)) { 4177ec681f3Smrg struct ir3 *ir = instr->block->shader; 4187ec681f3Smrg bool ready = false; 4197ec681f3Smrg for (unsigned i = 0; (i < ir->a1_users_count) && !ready; i++) { 4207ec681f3Smrg struct ir3_instruction *indirect = ir->a1_users[i]; 4217ec681f3Smrg if (!indirect) 4227ec681f3Smrg continue; 4237ec681f3Smrg if (indirect->address->def != instr->dsts[0]) 4247ec681f3Smrg continue; 4257ec681f3Smrg ready = could_sched(indirect, instr); 4267ec681f3Smrg } 4277ec681f3Smrg 4287ec681f3Smrg /* nothing could be scheduled, so keep looking: */ 4297ec681f3Smrg if (!ready) 4307ec681f3Smrg return false; 4317ec681f3Smrg } 4327ec681f3Smrg 4337ec681f3Smrg /* if this is a write to address/predicate register, and that 4347ec681f3Smrg * register is currently in use, we need to defer until it is 4357ec681f3Smrg * free: 4367ec681f3Smrg */ 4377ec681f3Smrg if (writes_addr0(instr) && ctx->addr0) { 4387ec681f3Smrg debug_assert(ctx->addr0 != instr); 4397ec681f3Smrg notes->addr0_conflict = true; 4407ec681f3Smrg return false; 4417ec681f3Smrg } 4427ec681f3Smrg 4437ec681f3Smrg if (writes_addr1(instr) && ctx->addr1) { 4447ec681f3Smrg debug_assert(ctx->addr1 != instr); 4457ec681f3Smrg notes->addr1_conflict = true; 4467ec681f3Smrg return false; 4477ec681f3Smrg } 4487ec681f3Smrg 4497ec681f3Smrg if (writes_pred(instr) && ctx->pred) { 4507ec681f3Smrg debug_assert(ctx->pred != instr); 4517ec681f3Smrg notes->pred_conflict = true; 4527ec681f3Smrg return false; 4537ec681f3Smrg } 4547ec681f3Smrg 4557ec681f3Smrg /* if the instruction is a kill, we need to ensure *every* 4567ec681f3Smrg * bary.f is scheduled. The hw seems unhappy if the thread 4577ec681f3Smrg * gets killed before the end-input (ei) flag is hit. 4587ec681f3Smrg * 4597ec681f3Smrg * We could do this by adding each bary.f instruction as 4607ec681f3Smrg * virtual ssa src for the kill instruction. But we have 4617ec681f3Smrg * fixed length instr->srcs[]. 4627ec681f3Smrg * 4637ec681f3Smrg * TODO we could handle this by false-deps now, probably. 4647ec681f3Smrg */ 4657ec681f3Smrg if (is_kill_or_demote(instr)) { 4667ec681f3Smrg struct ir3 *ir = instr->block->shader; 4677ec681f3Smrg 4687ec681f3Smrg for (unsigned i = 0; i < ir->baryfs_count; i++) { 4697ec681f3Smrg struct ir3_instruction *baryf = ir->baryfs[i]; 4707ec681f3Smrg if (baryf->flags & IR3_INSTR_UNUSED) 4717ec681f3Smrg continue; 4727ec681f3Smrg if (!is_scheduled(baryf)) { 4737ec681f3Smrg notes->blocked_kill = true; 4747ec681f3Smrg return false; 4757ec681f3Smrg } 4767ec681f3Smrg } 4777ec681f3Smrg } 4787ec681f3Smrg 4797ec681f3Smrg return true; 4807ec681f3Smrg} 481361fc4cbSmaya 4827ec681f3Smrg/* Find the instr->ip of the closest use of an instruction, in 4837ec681f3Smrg * pre-sched order. This isn't going to be the same as post-sched 4847ec681f3Smrg * order, but it is a reasonable approximation to limit scheduling 4857ec681f3Smrg * instructions *too* early. This is mostly to prevent bad behavior 4867ec681f3Smrg * in cases where we have a large number of possible instructions 4877ec681f3Smrg * to choose, to avoid creating too much parallelism (ie. blowing 4887ec681f3Smrg * up register pressure) 4897ec681f3Smrg * 4907ec681f3Smrg * See 4917ec681f3Smrg * dEQP-GLES31.functional.atomic_counter.layout.reverse_offset.inc_dec.8_counters_5_calls_1_thread 4927ec681f3Smrg */ 4937ec681f3Smrgstatic int 4947ec681f3Smrgnearest_use(struct ir3_instruction *instr) 4957ec681f3Smrg{ 4967ec681f3Smrg unsigned nearest = ~0; 4977ec681f3Smrg foreach_ssa_use (use, instr) 4987ec681f3Smrg if (!is_scheduled(use)) 4997ec681f3Smrg nearest = MIN2(nearest, use->ip); 5007ec681f3Smrg 5017ec681f3Smrg /* slight hack.. this heuristic tends to push bary.f's to later 5027ec681f3Smrg * in the shader, closer to their uses. But we actually would 5037ec681f3Smrg * prefer to get these scheduled earlier, to unlock varying 5047ec681f3Smrg * storage for more VS jobs: 5057ec681f3Smrg */ 5067ec681f3Smrg if (is_input(instr)) 5077ec681f3Smrg nearest /= 2; 5087ec681f3Smrg 5097ec681f3Smrg return nearest; 5107ec681f3Smrg} 511361fc4cbSmaya 5127ec681f3Smrgstatic bool 5137ec681f3Smrgis_only_nonscheduled_use(struct ir3_instruction *instr, 5147ec681f3Smrg struct ir3_instruction *use) 5157ec681f3Smrg{ 5167ec681f3Smrg foreach_ssa_use (other_use, instr) { 5177ec681f3Smrg if (other_use != use && !is_scheduled(other_use)) 5187ec681f3Smrg return false; 5197ec681f3Smrg } 520361fc4cbSmaya 5217ec681f3Smrg return true; 522361fc4cbSmaya} 523361fc4cbSmaya 5247ec681f3Smrg/* find net change to live values if instruction were scheduled: */ 5257ec681f3Smrgstatic int 5267ec681f3Smrglive_effect(struct ir3_instruction *instr) 527361fc4cbSmaya{ 5287ec681f3Smrg struct ir3_sched_node *n = instr->data; 5297ec681f3Smrg int new_live = 5307ec681f3Smrg (n->partially_live || !instr->uses || instr->uses->entries == 0) 5317ec681f3Smrg ? 0 5327ec681f3Smrg : dest_regs(instr); 5337ec681f3Smrg int freed_live = 0; 5347ec681f3Smrg 5357ec681f3Smrg /* if we schedule something that causes a vecN to be live, 5367ec681f3Smrg * then count all it's other components too: 5377ec681f3Smrg */ 5387ec681f3Smrg if (n->collect) 5397ec681f3Smrg new_live *= n->collect->srcs_count; 5407ec681f3Smrg 5417ec681f3Smrg foreach_ssa_src_n (src, n, instr) { 5427ec681f3Smrg if (__is_false_dep(instr, n)) 5437ec681f3Smrg continue; 5447ec681f3Smrg 5457ec681f3Smrg if (instr->block != src->block) 5467ec681f3Smrg continue; 5477ec681f3Smrg 5487ec681f3Smrg if (is_only_nonscheduled_use(src, instr)) 5497ec681f3Smrg freed_live += dest_regs(src); 5507ec681f3Smrg } 5517ec681f3Smrg 5527ec681f3Smrg return new_live - freed_live; 5537ec681f3Smrg} 554361fc4cbSmaya 5557ec681f3Smrg/* Determine if this is an instruction that we'd prefer not to schedule 5567ec681f3Smrg * yet, in order to avoid an (ss)/(sy) sync. This is limited by the 5577ec681f3Smrg * sfu_delay/tex_delay counters, ie. the more cycles it has been since 5587ec681f3Smrg * the last SFU/tex, the less costly a sync would be, and the number of 5597ec681f3Smrg * outstanding SFU/tex instructions to prevent a blowup in register pressure. 5607ec681f3Smrg */ 5617ec681f3Smrgstatic bool 5627ec681f3Smrgshould_defer(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 5637ec681f3Smrg{ 5647ec681f3Smrg if (ctx->sfu_delay) { 5657ec681f3Smrg if (sched_check_src_cond(instr, is_outstanding_sfu, ctx)) 5667ec681f3Smrg return true; 5677ec681f3Smrg } 5687ec681f3Smrg 5697ec681f3Smrg /* We mostly just want to try to schedule another texture fetch 5707ec681f3Smrg * before scheduling something that would (sy) sync, so we can 5717ec681f3Smrg * limit this rule to cases where there are remaining texture 5727ec681f3Smrg * fetches 5737ec681f3Smrg */ 5747ec681f3Smrg if (ctx->tex_delay && ctx->remaining_tex) { 5757ec681f3Smrg if (sched_check_src_cond(instr, is_outstanding_tex_or_prefetch, ctx)) 5767ec681f3Smrg return true; 5777ec681f3Smrg } 5787ec681f3Smrg 5797ec681f3Smrg /* Avoid scheduling too many outstanding texture or sfu instructions at 5807ec681f3Smrg * once by deferring further tex/SFU instructions. This both prevents 5817ec681f3Smrg * stalls when the queue of texture/sfu instructions becomes too large, 5827ec681f3Smrg * and prevents unacceptably large increases in register pressure from too 5837ec681f3Smrg * many outstanding texture instructions. 5847ec681f3Smrg */ 5857ec681f3Smrg if (ctx->tex_index - ctx->first_outstanding_tex_index >= 8 && is_tex(instr)) 5867ec681f3Smrg return true; 5877ec681f3Smrg 5887ec681f3Smrg if (ctx->sfu_index - ctx->first_outstanding_sfu_index >= 8 && is_sfu(instr)) 5897ec681f3Smrg return true; 5907ec681f3Smrg 5917ec681f3Smrg return false; 5927ec681f3Smrg} 593361fc4cbSmaya 5947ec681f3Smrgstatic struct ir3_sched_node *choose_instr_inc(struct ir3_sched_ctx *ctx, 5957ec681f3Smrg struct ir3_sched_notes *notes, 5967ec681f3Smrg bool defer, bool avoid_output); 597361fc4cbSmaya 5987ec681f3Smrgenum choose_instr_dec_rank { 5997ec681f3Smrg DEC_NEUTRAL, 6007ec681f3Smrg DEC_NEUTRAL_READY, 6017ec681f3Smrg DEC_FREED, 6027ec681f3Smrg DEC_FREED_READY, 6037ec681f3Smrg}; 604361fc4cbSmaya 6057ec681f3Smrgstatic const char * 6067ec681f3Smrgdec_rank_name(enum choose_instr_dec_rank rank) 6077ec681f3Smrg{ 6087ec681f3Smrg switch (rank) { 6097ec681f3Smrg case DEC_NEUTRAL: 6107ec681f3Smrg return "neutral"; 6117ec681f3Smrg case DEC_NEUTRAL_READY: 6127ec681f3Smrg return "neutral+ready"; 6137ec681f3Smrg case DEC_FREED: 6147ec681f3Smrg return "freed"; 6157ec681f3Smrg case DEC_FREED_READY: 6167ec681f3Smrg return "freed+ready"; 6177ec681f3Smrg default: 6187ec681f3Smrg return NULL; 6197ec681f3Smrg } 620361fc4cbSmaya} 621361fc4cbSmaya 622361fc4cbSmaya/** 6237ec681f3Smrg * Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code 6247ec681f3Smrg * Scheduling for Register pressure) heuristic. 6257ec681f3Smrg * 6267ec681f3Smrg * Only handles the case of choosing instructions that reduce register pressure 6277ec681f3Smrg * or are even. 628361fc4cbSmaya */ 6297ec681f3Smrgstatic struct ir3_sched_node * 6307ec681f3Smrgchoose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 6317ec681f3Smrg bool defer) 632361fc4cbSmaya{ 6337ec681f3Smrg const char *mode = defer ? "-d" : ""; 6347ec681f3Smrg struct ir3_sched_node *chosen = NULL; 6357ec681f3Smrg enum choose_instr_dec_rank chosen_rank = DEC_NEUTRAL; 6367ec681f3Smrg 6377ec681f3Smrg foreach_sched_node (n, &ctx->dag->heads) { 6387ec681f3Smrg if (defer && should_defer(ctx, n->instr)) 6397ec681f3Smrg continue; 6407ec681f3Smrg 6417ec681f3Smrg /* Note: mergedregs is only used post-RA, just set it to false */ 6427ec681f3Smrg unsigned d = ir3_delay_calc_prera(ctx->block, n->instr); 6437ec681f3Smrg 6447ec681f3Smrg int live = live_effect(n->instr); 6457ec681f3Smrg if (live > 0) 6467ec681f3Smrg continue; 6477ec681f3Smrg 6487ec681f3Smrg if (!check_instr(ctx, notes, n->instr)) 6497ec681f3Smrg continue; 6507ec681f3Smrg 6517ec681f3Smrg enum choose_instr_dec_rank rank; 6527ec681f3Smrg if (live < 0) { 6537ec681f3Smrg /* Prioritize instrs which free up regs and can be scheduled with no 6547ec681f3Smrg * delay. 6557ec681f3Smrg */ 6567ec681f3Smrg if (d == 0) 6577ec681f3Smrg rank = DEC_FREED_READY; 6587ec681f3Smrg else 6597ec681f3Smrg rank = DEC_FREED; 6607ec681f3Smrg } else { 6617ec681f3Smrg /* Contra the paper, pick a leader with no effect on used regs. This 6627ec681f3Smrg * may open up new opportunities, as otherwise a single-operand instr 6637ec681f3Smrg * consuming a value will tend to block finding freeing that value. 6647ec681f3Smrg * This had a massive effect on reducing spilling on V3D. 6657ec681f3Smrg * 6667ec681f3Smrg * XXX: Should this prioritize ready? 6677ec681f3Smrg */ 6687ec681f3Smrg if (d == 0) 6697ec681f3Smrg rank = DEC_NEUTRAL_READY; 6707ec681f3Smrg else 6717ec681f3Smrg rank = DEC_NEUTRAL; 6727ec681f3Smrg } 6737ec681f3Smrg 6747ec681f3Smrg /* Prefer higher-ranked instructions, or in the case of a rank tie, the 6757ec681f3Smrg * highest latency-to-end-of-program instruction. 6767ec681f3Smrg */ 6777ec681f3Smrg if (!chosen || rank > chosen_rank || 6787ec681f3Smrg (rank == chosen_rank && chosen->max_delay < n->max_delay)) { 6797ec681f3Smrg chosen = n; 6807ec681f3Smrg chosen_rank = rank; 6817ec681f3Smrg } 6827ec681f3Smrg } 6837ec681f3Smrg 6847ec681f3Smrg if (chosen) { 6857ec681f3Smrg di(chosen->instr, "dec%s: chose (%s)", mode, dec_rank_name(chosen_rank)); 6867ec681f3Smrg return chosen; 6877ec681f3Smrg } 6887ec681f3Smrg 6897ec681f3Smrg return choose_instr_inc(ctx, notes, defer, true); 6907ec681f3Smrg} 691361fc4cbSmaya 6927ec681f3Smrgenum choose_instr_inc_rank { 6937ec681f3Smrg INC_DISTANCE, 6947ec681f3Smrg INC_DISTANCE_READY, 6957ec681f3Smrg}; 696361fc4cbSmaya 6977ec681f3Smrgstatic const char * 6987ec681f3Smrginc_rank_name(enum choose_instr_inc_rank rank) 6997ec681f3Smrg{ 7007ec681f3Smrg switch (rank) { 7017ec681f3Smrg case INC_DISTANCE: 7027ec681f3Smrg return "distance"; 7037ec681f3Smrg case INC_DISTANCE_READY: 7047ec681f3Smrg return "distance+ready"; 7057ec681f3Smrg default: 7067ec681f3Smrg return NULL; 7077ec681f3Smrg } 7087ec681f3Smrg} 709361fc4cbSmaya 7107ec681f3Smrg/** 7117ec681f3Smrg * When we can't choose an instruction that reduces register pressure or 7127ec681f3Smrg * is neutral, we end up here to try and pick the least bad option. 7137ec681f3Smrg */ 7147ec681f3Smrgstatic struct ir3_sched_node * 7157ec681f3Smrgchoose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, 7167ec681f3Smrg bool defer, bool avoid_output) 7177ec681f3Smrg{ 7187ec681f3Smrg const char *mode = defer ? "-d" : ""; 7197ec681f3Smrg struct ir3_sched_node *chosen = NULL; 7207ec681f3Smrg enum choose_instr_inc_rank chosen_rank = INC_DISTANCE; 7217ec681f3Smrg 7227ec681f3Smrg /* 7237ec681f3Smrg * From hear on out, we are picking something that increases 7247ec681f3Smrg * register pressure. So try to pick something which will 7257ec681f3Smrg * be consumed soon: 7267ec681f3Smrg */ 7277ec681f3Smrg unsigned chosen_distance = 0; 7287ec681f3Smrg 7297ec681f3Smrg /* Pick the max delay of the remaining ready set. */ 7307ec681f3Smrg foreach_sched_node (n, &ctx->dag->heads) { 7317ec681f3Smrg if (avoid_output && n->output) 7327ec681f3Smrg continue; 7337ec681f3Smrg 7347ec681f3Smrg if (defer && should_defer(ctx, n->instr)) 7357ec681f3Smrg continue; 7367ec681f3Smrg 7377ec681f3Smrg if (!check_instr(ctx, notes, n->instr)) 7387ec681f3Smrg continue; 7397ec681f3Smrg 7407ec681f3Smrg unsigned d = ir3_delay_calc_prera(ctx->block, n->instr); 7417ec681f3Smrg 7427ec681f3Smrg enum choose_instr_inc_rank rank; 7437ec681f3Smrg if (d == 0) 7447ec681f3Smrg rank = INC_DISTANCE_READY; 7457ec681f3Smrg else 7467ec681f3Smrg rank = INC_DISTANCE; 7477ec681f3Smrg 7487ec681f3Smrg unsigned distance = nearest_use(n->instr); 7497ec681f3Smrg 7507ec681f3Smrg if (!chosen || rank > chosen_rank || 7517ec681f3Smrg (rank == chosen_rank && distance < chosen_distance)) { 7527ec681f3Smrg chosen = n; 7537ec681f3Smrg chosen_distance = distance; 7547ec681f3Smrg chosen_rank = rank; 7557ec681f3Smrg } 7567ec681f3Smrg } 7577ec681f3Smrg 7587ec681f3Smrg if (chosen) { 7597ec681f3Smrg di(chosen->instr, "inc%s: chose (%s)", mode, inc_rank_name(chosen_rank)); 7607ec681f3Smrg return chosen; 7617ec681f3Smrg } 7627ec681f3Smrg 7637ec681f3Smrg return NULL; 7647ec681f3Smrg} 765361fc4cbSmaya 7667ec681f3Smrg/* Handles instruction selections for instructions we want to prioritize 7677ec681f3Smrg * even if csp/csr would not pick them. 7687ec681f3Smrg */ 7697ec681f3Smrgstatic struct ir3_sched_node * 7707ec681f3Smrgchoose_instr_prio(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) 7717ec681f3Smrg{ 7727ec681f3Smrg struct ir3_sched_node *chosen = NULL; 7737ec681f3Smrg 7747ec681f3Smrg foreach_sched_node (n, &ctx->dag->heads) { 7757ec681f3Smrg /* 7767ec681f3Smrg * - phi nodes and inputs must be scheduled first 7777ec681f3Smrg * - split should be scheduled first, so that the vector value is 7787ec681f3Smrg * killed as soon as possible. RA cannot split up the vector and 7797ec681f3Smrg * reuse components that have been killed until it's been killed. 7807ec681f3Smrg * - collect, on the other hand, should be treated as a "normal" 7817ec681f3Smrg * instruction, and may add to register pressure if its sources are 7827ec681f3Smrg * part of another vector or immediates. 7837ec681f3Smrg */ 7847ec681f3Smrg if (!is_meta(n->instr) || n->instr->opc == OPC_META_COLLECT) 7857ec681f3Smrg continue; 7867ec681f3Smrg 7877ec681f3Smrg if (!chosen || (chosen->max_delay < n->max_delay)) 7887ec681f3Smrg chosen = n; 7897ec681f3Smrg } 7907ec681f3Smrg 7917ec681f3Smrg if (chosen) { 7927ec681f3Smrg di(chosen->instr, "prio: chose (meta)"); 7937ec681f3Smrg return chosen; 7947ec681f3Smrg } 7957ec681f3Smrg 7967ec681f3Smrg return NULL; 7977ec681f3Smrg} 798361fc4cbSmaya 7997ec681f3Smrgstatic void 8007ec681f3Smrgdump_state(struct ir3_sched_ctx *ctx) 8017ec681f3Smrg{ 8027ec681f3Smrg if (!SCHED_DEBUG) 8037ec681f3Smrg return; 804361fc4cbSmaya 8057ec681f3Smrg foreach_sched_node (n, &ctx->dag->heads) { 8067ec681f3Smrg di(n->instr, "maxdel=%3d le=%d del=%u ", n->max_delay, 8077ec681f3Smrg live_effect(n->instr), ir3_delay_calc_prera(ctx->block, n->instr)); 808361fc4cbSmaya 8097ec681f3Smrg util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 8107ec681f3Smrg struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child; 811361fc4cbSmaya 8127ec681f3Smrg di(child->instr, " -> (%d parents) ", child->dag.parent_count); 8137ec681f3Smrg } 8147ec681f3Smrg } 815361fc4cbSmaya} 816361fc4cbSmaya 8177ec681f3Smrg/* find instruction to schedule: */ 8187ec681f3Smrgstatic struct ir3_instruction * 8197ec681f3Smrgchoose_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes) 820361fc4cbSmaya{ 8217ec681f3Smrg struct ir3_sched_node *chosen; 822361fc4cbSmaya 8237ec681f3Smrg dump_state(ctx); 824361fc4cbSmaya 8257ec681f3Smrg chosen = choose_instr_prio(ctx, notes); 8267ec681f3Smrg if (chosen) 8277ec681f3Smrg return chosen->instr; 828361fc4cbSmaya 8297ec681f3Smrg chosen = choose_instr_dec(ctx, notes, true); 8307ec681f3Smrg if (chosen) 8317ec681f3Smrg return chosen->instr; 832361fc4cbSmaya 8337ec681f3Smrg chosen = choose_instr_dec(ctx, notes, false); 8347ec681f3Smrg if (chosen) 8357ec681f3Smrg return chosen->instr; 836361fc4cbSmaya 8377ec681f3Smrg chosen = choose_instr_inc(ctx, notes, false, false); 8387ec681f3Smrg if (chosen) 8397ec681f3Smrg return chosen->instr; 8407ec681f3Smrg 8417ec681f3Smrg return NULL; 842361fc4cbSmaya} 843361fc4cbSmaya 844361fc4cbSmayastatic struct ir3_instruction * 8457ec681f3Smrgsplit_instr(struct ir3_sched_ctx *ctx, struct ir3_instruction *orig_instr) 8467ec681f3Smrg{ 8477ec681f3Smrg struct ir3_instruction *new_instr = ir3_instr_clone(orig_instr); 8487ec681f3Smrg di(new_instr, "split instruction"); 8497ec681f3Smrg sched_node_init(ctx, new_instr); 8507ec681f3Smrg return new_instr; 851361fc4cbSmaya} 852361fc4cbSmaya 8537ec681f3Smrg/* "spill" the address registers by remapping any unscheduled 854361fc4cbSmaya * instructions which depend on the current address register 855361fc4cbSmaya * to a clone of the instruction which wrote the address reg. 856361fc4cbSmaya */ 857361fc4cbSmayastatic struct ir3_instruction * 8587ec681f3Smrgsplit_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr, 8597ec681f3Smrg struct ir3_instruction **users, unsigned users_count) 860361fc4cbSmaya{ 8617ec681f3Smrg struct ir3_instruction *new_addr = NULL; 8627ec681f3Smrg unsigned i; 8637ec681f3Smrg 8647ec681f3Smrg debug_assert(*addr); 8657ec681f3Smrg 8667ec681f3Smrg for (i = 0; i < users_count; i++) { 8677ec681f3Smrg struct ir3_instruction *indirect = users[i]; 8687ec681f3Smrg 8697ec681f3Smrg if (!indirect) 8707ec681f3Smrg continue; 8717ec681f3Smrg 8727ec681f3Smrg /* skip instructions already scheduled: */ 8737ec681f3Smrg if (is_scheduled(indirect)) 8747ec681f3Smrg continue; 8757ec681f3Smrg 8767ec681f3Smrg /* remap remaining instructions using current addr 8777ec681f3Smrg * to new addr: 8787ec681f3Smrg */ 8797ec681f3Smrg if (indirect->address->def == (*addr)->dsts[0]) { 8807ec681f3Smrg if (!new_addr) { 8817ec681f3Smrg new_addr = split_instr(ctx, *addr); 8827ec681f3Smrg /* original addr is scheduled, but new one isn't: */ 8837ec681f3Smrg new_addr->flags &= ~IR3_INSTR_MARK; 8847ec681f3Smrg } 8857ec681f3Smrg indirect->address->def = new_addr->dsts[0]; 8867ec681f3Smrg /* don't need to remove old dag edge since old addr is 8877ec681f3Smrg * already scheduled: 8887ec681f3Smrg */ 8897ec681f3Smrg sched_node_add_dep(indirect, new_addr, 0); 8907ec681f3Smrg di(indirect, "new address"); 8917ec681f3Smrg } 8927ec681f3Smrg } 8937ec681f3Smrg 8947ec681f3Smrg /* all remaining indirects remapped to new addr: */ 8957ec681f3Smrg *addr = NULL; 8967ec681f3Smrg 8977ec681f3Smrg return new_addr; 898361fc4cbSmaya} 899361fc4cbSmaya 900361fc4cbSmaya/* "spill" the predicate register by remapping any unscheduled 901361fc4cbSmaya * instructions which depend on the current predicate register 902361fc4cbSmaya * to a clone of the instruction which wrote the address reg. 903361fc4cbSmaya */ 904361fc4cbSmayastatic struct ir3_instruction * 905361fc4cbSmayasplit_pred(struct ir3_sched_ctx *ctx) 906361fc4cbSmaya{ 9077ec681f3Smrg struct ir3 *ir; 9087ec681f3Smrg struct ir3_instruction *new_pred = NULL; 9097ec681f3Smrg unsigned i; 9107ec681f3Smrg 9117ec681f3Smrg debug_assert(ctx->pred); 9127ec681f3Smrg 9137ec681f3Smrg ir = ctx->pred->block->shader; 9147ec681f3Smrg 9157ec681f3Smrg for (i = 0; i < ir->predicates_count; i++) { 9167ec681f3Smrg struct ir3_instruction *predicated = ir->predicates[i]; 9177ec681f3Smrg 9187ec681f3Smrg if (!predicated) 9197ec681f3Smrg continue; 9207ec681f3Smrg 9217ec681f3Smrg /* skip instructions already scheduled: */ 9227ec681f3Smrg if (is_scheduled(predicated)) 9237ec681f3Smrg continue; 9247ec681f3Smrg 9257ec681f3Smrg /* remap remaining instructions using current pred 9267ec681f3Smrg * to new pred: 9277ec681f3Smrg * 9287ec681f3Smrg * TODO is there ever a case when pred isn't first 9297ec681f3Smrg * (and only) src? 9307ec681f3Smrg */ 9317ec681f3Smrg if (ssa(predicated->srcs[0]) == ctx->pred) { 9327ec681f3Smrg if (!new_pred) { 9337ec681f3Smrg new_pred = split_instr(ctx, ctx->pred); 9347ec681f3Smrg /* original pred is scheduled, but new one isn't: */ 9357ec681f3Smrg new_pred->flags &= ~IR3_INSTR_MARK; 9367ec681f3Smrg } 9377ec681f3Smrg predicated->srcs[0]->instr = new_pred; 9387ec681f3Smrg /* don't need to remove old dag edge since old pred is 9397ec681f3Smrg * already scheduled: 9407ec681f3Smrg */ 9417ec681f3Smrg sched_node_add_dep(predicated, new_pred, 0); 9427ec681f3Smrg di(predicated, "new predicate"); 9437ec681f3Smrg } 9447ec681f3Smrg } 9457ec681f3Smrg 9467ec681f3Smrg if (ctx->block->condition == ctx->pred) { 9477ec681f3Smrg if (!new_pred) { 9487ec681f3Smrg new_pred = split_instr(ctx, ctx->pred); 9497ec681f3Smrg /* original pred is scheduled, but new one isn't: */ 9507ec681f3Smrg new_pred->flags &= ~IR3_INSTR_MARK; 9517ec681f3Smrg } 9527ec681f3Smrg ctx->block->condition = new_pred; 9537ec681f3Smrg d("new branch condition"); 9547ec681f3Smrg } 9557ec681f3Smrg 9567ec681f3Smrg /* all remaining predicated remapped to new pred: */ 9577ec681f3Smrg ctx->pred = NULL; 9587ec681f3Smrg 9597ec681f3Smrg return new_pred; 9607ec681f3Smrg} 9617ec681f3Smrg 9627ec681f3Smrgstatic void 9637ec681f3Smrgsched_node_init(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) 9647ec681f3Smrg{ 9657ec681f3Smrg struct ir3_sched_node *n = rzalloc(ctx->dag, struct ir3_sched_node); 9667ec681f3Smrg 9677ec681f3Smrg dag_init_node(ctx->dag, &n->dag); 968361fc4cbSmaya 9697ec681f3Smrg n->instr = instr; 9707ec681f3Smrg instr->data = n; 9717ec681f3Smrg} 9727ec681f3Smrg 9737ec681f3Smrgstatic void 9747ec681f3Smrgsched_node_add_dep(struct ir3_instruction *instr, struct ir3_instruction *src, 9757ec681f3Smrg int i) 9767ec681f3Smrg{ 9777ec681f3Smrg /* don't consider dependencies in other blocks: */ 9787ec681f3Smrg if (src->block != instr->block) 9797ec681f3Smrg return; 980361fc4cbSmaya 9817ec681f3Smrg /* we could have false-dep's that end up unused: */ 9827ec681f3Smrg if (src->flags & IR3_INSTR_UNUSED) { 9837ec681f3Smrg debug_assert(__is_false_dep(instr, i)); 9847ec681f3Smrg return; 9857ec681f3Smrg } 986361fc4cbSmaya 9877ec681f3Smrg struct ir3_sched_node *n = instr->data; 9887ec681f3Smrg struct ir3_sched_node *sn = src->data; 989361fc4cbSmaya 9907ec681f3Smrg /* If src is consumed by a collect, track that to realize that once 9917ec681f3Smrg * any of the collect srcs are live, we should hurry up and schedule 9927ec681f3Smrg * the rest. 9937ec681f3Smrg */ 9947ec681f3Smrg if (instr->opc == OPC_META_COLLECT) 9957ec681f3Smrg sn->collect = instr; 996361fc4cbSmaya 9977ec681f3Smrg dag_add_edge(&sn->dag, &n->dag, NULL); 998361fc4cbSmaya 9997ec681f3Smrg unsigned d = ir3_delayslots(src, instr, i, true); 1000361fc4cbSmaya 10017ec681f3Smrg n->delay = MAX2(n->delay, d); 1002361fc4cbSmaya} 1003361fc4cbSmaya 1004361fc4cbSmayastatic void 10057ec681f3Smrgmark_kill_path(struct ir3_instruction *instr) 1006361fc4cbSmaya{ 10077ec681f3Smrg struct ir3_sched_node *n = instr->data; 1008361fc4cbSmaya 10097ec681f3Smrg if (n->kill_path) { 10107ec681f3Smrg return; 10117ec681f3Smrg } 1012361fc4cbSmaya 10137ec681f3Smrg n->kill_path = true; 1014361fc4cbSmaya 10157ec681f3Smrg foreach_ssa_src (src, instr) { 10167ec681f3Smrg if (src->block != instr->block) 10177ec681f3Smrg continue; 10187ec681f3Smrg mark_kill_path(src); 10197ec681f3Smrg } 10207ec681f3Smrg} 1021361fc4cbSmaya 10227ec681f3Smrg/* Is it an output? */ 10237ec681f3Smrgstatic bool 10247ec681f3Smrgis_output_collect(struct ir3_instruction *instr) 10257ec681f3Smrg{ 10267ec681f3Smrg if (instr->opc != OPC_META_COLLECT) 10277ec681f3Smrg return false; 1028361fc4cbSmaya 10297ec681f3Smrg foreach_ssa_use (use, instr) { 10307ec681f3Smrg if (use->opc != OPC_END && use->opc != OPC_CHMASK) 10317ec681f3Smrg return false; 10327ec681f3Smrg } 1033361fc4cbSmaya 10347ec681f3Smrg return true; 1035361fc4cbSmaya} 1036361fc4cbSmaya 10377ec681f3Smrg/* Is it's only use as output? */ 10387ec681f3Smrgstatic bool 10397ec681f3Smrgis_output_only(struct ir3_instruction *instr) 1040361fc4cbSmaya{ 10417ec681f3Smrg if (!writes_gpr(instr)) 10427ec681f3Smrg return false; 1043361fc4cbSmaya 10447ec681f3Smrg if (!(instr->dsts[0]->flags & IR3_REG_SSA)) 10457ec681f3Smrg return false; 1046361fc4cbSmaya 10477ec681f3Smrg foreach_ssa_use (use, instr) 10487ec681f3Smrg if (!is_output_collect(use)) 10497ec681f3Smrg return false; 1050361fc4cbSmaya 10517ec681f3Smrg return true; 10527ec681f3Smrg} 1053361fc4cbSmaya 10547ec681f3Smrgstatic void 10557ec681f3Smrgsched_node_add_deps(struct ir3_instruction *instr) 10567ec681f3Smrg{ 10577ec681f3Smrg /* There's nothing to do for phi nodes, since they always go first. And 10587ec681f3Smrg * phi nodes can reference sources later in the same block, so handling 10597ec681f3Smrg * sources is not only unnecessary but could cause problems. 10607ec681f3Smrg */ 10617ec681f3Smrg if (instr->opc == OPC_META_PHI) 10627ec681f3Smrg return; 10637ec681f3Smrg 10647ec681f3Smrg /* Since foreach_ssa_src() already handles false-dep's we can construct 10657ec681f3Smrg * the DAG easily in a single pass. 10667ec681f3Smrg */ 10677ec681f3Smrg foreach_ssa_src_n (src, i, instr) { 10687ec681f3Smrg sched_node_add_dep(instr, src, i); 10697ec681f3Smrg } 10707ec681f3Smrg 10717ec681f3Smrg /* NOTE that all inputs must be scheduled before a kill, so 10727ec681f3Smrg * mark these to be prioritized as well: 10737ec681f3Smrg */ 10747ec681f3Smrg if (is_kill_or_demote(instr) || is_input(instr)) { 10757ec681f3Smrg mark_kill_path(instr); 10767ec681f3Smrg } 10777ec681f3Smrg 10787ec681f3Smrg if (is_output_only(instr)) { 10797ec681f3Smrg struct ir3_sched_node *n = instr->data; 10807ec681f3Smrg n->output = true; 10817ec681f3Smrg } 1082361fc4cbSmaya} 1083361fc4cbSmaya 10847ec681f3Smrgstatic void 10857ec681f3Smrgsched_dag_max_delay_cb(struct dag_node *node, void *state) 1086361fc4cbSmaya{ 10877ec681f3Smrg struct ir3_sched_node *n = (struct ir3_sched_node *)node; 10887ec681f3Smrg uint32_t max_delay = 0; 1089361fc4cbSmaya 10907ec681f3Smrg util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) { 10917ec681f3Smrg struct ir3_sched_node *child = (struct ir3_sched_node *)edge->child; 10927ec681f3Smrg max_delay = MAX2(child->max_delay, max_delay); 10937ec681f3Smrg } 1094361fc4cbSmaya 10957ec681f3Smrg n->max_delay = MAX2(n->max_delay, max_delay + n->delay); 1096361fc4cbSmaya} 1097361fc4cbSmaya 10987ec681f3Smrgstatic void 10997ec681f3Smrgsched_dag_init(struct ir3_sched_ctx *ctx) 1100361fc4cbSmaya{ 11017ec681f3Smrg ctx->dag = dag_create(ctx); 1102361fc4cbSmaya 11037ec681f3Smrg foreach_instr (instr, &ctx->unscheduled_list) { 11047ec681f3Smrg sched_node_init(ctx, instr); 11057ec681f3Smrg sched_node_add_deps(instr); 11067ec681f3Smrg } 1107361fc4cbSmaya 11087ec681f3Smrg dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL); 11097ec681f3Smrg} 1110361fc4cbSmaya 11117ec681f3Smrgstatic void 11127ec681f3Smrgsched_dag_destroy(struct ir3_sched_ctx *ctx) 11137ec681f3Smrg{ 11147ec681f3Smrg ralloc_free(ctx->dag); 11157ec681f3Smrg ctx->dag = NULL; 1116361fc4cbSmaya} 1117361fc4cbSmaya 1118361fc4cbSmayastatic void 11197ec681f3Smrgsched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) 1120361fc4cbSmaya{ 11217ec681f3Smrg ctx->block = block; 11227ec681f3Smrg 11237ec681f3Smrg /* addr/pred writes are per-block: */ 11247ec681f3Smrg ctx->addr0 = NULL; 11257ec681f3Smrg ctx->addr1 = NULL; 11267ec681f3Smrg ctx->pred = NULL; 11277ec681f3Smrg ctx->tex_delay = 0; 11287ec681f3Smrg ctx->sfu_delay = 0; 11297ec681f3Smrg ctx->tex_index = ctx->first_outstanding_tex_index = 0; 11307ec681f3Smrg ctx->sfu_index = ctx->first_outstanding_sfu_index = 0; 11317ec681f3Smrg 11327ec681f3Smrg /* move all instructions to the unscheduled list, and 11337ec681f3Smrg * empty the block's instruction list (to which we will 11347ec681f3Smrg * be inserting). 11357ec681f3Smrg */ 11367ec681f3Smrg list_replace(&block->instr_list, &ctx->unscheduled_list); 11377ec681f3Smrg list_inithead(&block->instr_list); 11387ec681f3Smrg 11397ec681f3Smrg sched_dag_init(ctx); 11407ec681f3Smrg 11417ec681f3Smrg ctx->remaining_kills = 0; 11427ec681f3Smrg ctx->remaining_tex = 0; 11437ec681f3Smrg foreach_instr_safe (instr, &ctx->unscheduled_list) { 11447ec681f3Smrg if (is_kill_or_demote(instr)) 11457ec681f3Smrg ctx->remaining_kills++; 11467ec681f3Smrg if (is_tex_or_prefetch(instr)) 11477ec681f3Smrg ctx->remaining_tex++; 11487ec681f3Smrg } 11497ec681f3Smrg 11507ec681f3Smrg /* First schedule all meta:input and meta:phi instructions, followed by 11517ec681f3Smrg * tex-prefetch. We want all of the instructions that load values into 11527ec681f3Smrg * registers before the shader starts to go before any other instructions. 11537ec681f3Smrg * But in particular we want inputs to come before prefetches. This is 11547ec681f3Smrg * because a FS's bary_ij input may not actually be live in the shader, 11557ec681f3Smrg * but it should not be scheduled on top of any other input (but can be 11567ec681f3Smrg * overwritten by a tex prefetch) 11577ec681f3Smrg * 11587ec681f3Smrg * Note: Because the first block cannot have predecessors, meta:input and 11597ec681f3Smrg * meta:phi cannot exist in the same block. 11607ec681f3Smrg */ 11617ec681f3Smrg foreach_instr_safe (instr, &ctx->unscheduled_list) 11627ec681f3Smrg if (instr->opc == OPC_META_INPUT || instr->opc == OPC_META_PHI) 11637ec681f3Smrg schedule(ctx, instr); 11647ec681f3Smrg 11657ec681f3Smrg foreach_instr_safe (instr, &ctx->unscheduled_list) 11667ec681f3Smrg if (instr->opc == OPC_META_TEX_PREFETCH) 11677ec681f3Smrg schedule(ctx, instr); 11687ec681f3Smrg 11697ec681f3Smrg while (!list_is_empty(&ctx->unscheduled_list)) { 11707ec681f3Smrg struct ir3_sched_notes notes = {0}; 11717ec681f3Smrg struct ir3_instruction *instr; 11727ec681f3Smrg 11737ec681f3Smrg instr = choose_instr(ctx, ¬es); 11747ec681f3Smrg if (instr) { 11757ec681f3Smrg unsigned delay = ir3_delay_calc_prera(ctx->block, instr); 11767ec681f3Smrg d("delay=%u", delay); 11777ec681f3Smrg 11787ec681f3Smrg /* and if we run out of instructions that can be scheduled, 11797ec681f3Smrg * then it is time for nop's: 11807ec681f3Smrg */ 11817ec681f3Smrg debug_assert(delay <= 6); 11827ec681f3Smrg while (delay > 0) { 11837ec681f3Smrg ir3_NOP(block); 11847ec681f3Smrg delay--; 11857ec681f3Smrg } 11867ec681f3Smrg 11877ec681f3Smrg schedule(ctx, instr); 11887ec681f3Smrg 11897ec681f3Smrg /* Since we've scheduled a "real" instruction, we can now 11907ec681f3Smrg * schedule any split instruction created by the scheduler again. 11917ec681f3Smrg */ 11927ec681f3Smrg ctx->split = NULL; 11937ec681f3Smrg } else { 11947ec681f3Smrg struct ir3_instruction *new_instr = NULL; 11957ec681f3Smrg struct ir3 *ir = block->shader; 11967ec681f3Smrg 11977ec681f3Smrg /* nothing available to schedule.. if we are blocked on 11987ec681f3Smrg * address/predicate register conflict, then break the 11997ec681f3Smrg * deadlock by cloning the instruction that wrote that 12007ec681f3Smrg * reg: 12017ec681f3Smrg */ 12027ec681f3Smrg if (notes.addr0_conflict) { 12037ec681f3Smrg new_instr = 12047ec681f3Smrg split_addr(ctx, &ctx->addr0, ir->a0_users, ir->a0_users_count); 12057ec681f3Smrg } else if (notes.addr1_conflict) { 12067ec681f3Smrg new_instr = 12077ec681f3Smrg split_addr(ctx, &ctx->addr1, ir->a1_users, ir->a1_users_count); 12087ec681f3Smrg } else if (notes.pred_conflict) { 12097ec681f3Smrg new_instr = split_pred(ctx); 12107ec681f3Smrg } else { 12117ec681f3Smrg d("unscheduled_list:"); 12127ec681f3Smrg foreach_instr (instr, &ctx->unscheduled_list) 12137ec681f3Smrg di(instr, "unscheduled: "); 12147ec681f3Smrg debug_assert(0); 12157ec681f3Smrg ctx->error = true; 12167ec681f3Smrg return; 12177ec681f3Smrg } 12187ec681f3Smrg 12197ec681f3Smrg if (new_instr) { 12207ec681f3Smrg list_delinit(&new_instr->node); 12217ec681f3Smrg list_addtail(&new_instr->node, &ctx->unscheduled_list); 12227ec681f3Smrg } 12237ec681f3Smrg 12247ec681f3Smrg /* If we produced a new instruction, do not schedule it next to 12257ec681f3Smrg * guarantee progress. 12267ec681f3Smrg */ 12277ec681f3Smrg ctx->split = new_instr; 12287ec681f3Smrg } 12297ec681f3Smrg } 12307ec681f3Smrg 12317ec681f3Smrg sched_dag_destroy(ctx); 12327ec681f3Smrg} 1233361fc4cbSmaya 12347ec681f3Smrgint 12357ec681f3Smrgir3_sched(struct ir3 *ir) 12367ec681f3Smrg{ 12377ec681f3Smrg struct ir3_sched_ctx *ctx = rzalloc(NULL, struct ir3_sched_ctx); 1238361fc4cbSmaya 12397ec681f3Smrg foreach_block (block, &ir->block_list) { 12407ec681f3Smrg foreach_instr (instr, &block->instr_list) { 12417ec681f3Smrg instr->data = NULL; 12427ec681f3Smrg } 12437ec681f3Smrg } 1244361fc4cbSmaya 12457ec681f3Smrg ir3_count_instructions(ir); 12467ec681f3Smrg ir3_clear_mark(ir); 12477ec681f3Smrg ir3_find_ssa_uses(ir, ctx, false); 1248361fc4cbSmaya 12497ec681f3Smrg foreach_block (block, &ir->block_list) { 12507ec681f3Smrg sched_block(ctx, block); 12517ec681f3Smrg } 1252361fc4cbSmaya 12537ec681f3Smrg int ret = ctx->error ? -1 : 0; 1254361fc4cbSmaya 12557ec681f3Smrg ralloc_free(ctx); 1256361fc4cbSmaya 12577ec681f3Smrg return ret; 12587ec681f3Smrg} 1259361fc4cbSmaya 12607ec681f3Smrgstatic unsigned 12617ec681f3Smrgget_array_id(struct ir3_instruction *instr) 12627ec681f3Smrg{ 12637ec681f3Smrg /* The expectation is that there is only a single array 12647ec681f3Smrg * src or dst, ir3_cp should enforce this. 12657ec681f3Smrg */ 12667ec681f3Smrg 12677ec681f3Smrg foreach_dst (dst, instr) 12687ec681f3Smrg if (dst->flags & IR3_REG_ARRAY) 12697ec681f3Smrg return dst->array.id; 12707ec681f3Smrg foreach_src (src, instr) 12717ec681f3Smrg if (src->flags & IR3_REG_ARRAY) 12727ec681f3Smrg return src->array.id; 12737ec681f3Smrg 12747ec681f3Smrg unreachable("this was unexpected"); 12757ec681f3Smrg} 1276361fc4cbSmaya 12777ec681f3Smrg/* does instruction 'prior' need to be scheduled before 'instr'? */ 12787ec681f3Smrgstatic bool 12797ec681f3Smrgdepends_on(struct ir3_instruction *instr, struct ir3_instruction *prior) 12807ec681f3Smrg{ 12817ec681f3Smrg /* TODO for dependencies that are related to a specific object, ie 12827ec681f3Smrg * a specific SSBO/image/array, we could relax this constraint to 12837ec681f3Smrg * make accesses to unrelated objects not depend on each other (at 12847ec681f3Smrg * least as long as not declared coherent) 12857ec681f3Smrg */ 12867ec681f3Smrg if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && 12877ec681f3Smrg prior->barrier_class) || 12887ec681f3Smrg ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && 12897ec681f3Smrg instr->barrier_class)) 12907ec681f3Smrg return true; 12917ec681f3Smrg 12927ec681f3Smrg if (instr->barrier_class & prior->barrier_conflict) { 12937ec681f3Smrg if (!(instr->barrier_class & 12947ec681f3Smrg ~(IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W))) { 12957ec681f3Smrg /* if only array barrier, then we can further limit false-deps 12967ec681f3Smrg * by considering the array-id, ie reads/writes to different 12977ec681f3Smrg * arrays do not depend on each other (no aliasing) 12987ec681f3Smrg */ 12997ec681f3Smrg if (get_array_id(instr) != get_array_id(prior)) { 13007ec681f3Smrg return false; 13017ec681f3Smrg } 13027ec681f3Smrg } 13037ec681f3Smrg 13047ec681f3Smrg return true; 13057ec681f3Smrg } 13067ec681f3Smrg 13077ec681f3Smrg return false; 13087ec681f3Smrg} 1309361fc4cbSmaya 13107ec681f3Smrgstatic void 13117ec681f3Smrgadd_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr) 13127ec681f3Smrg{ 13137ec681f3Smrg struct list_head *prev = instr->node.prev; 13147ec681f3Smrg struct list_head *next = instr->node.next; 13157ec681f3Smrg 13167ec681f3Smrg /* add dependencies on previous instructions that must be scheduled 13177ec681f3Smrg * prior to the current instruction 13187ec681f3Smrg */ 13197ec681f3Smrg while (prev != &block->instr_list) { 13207ec681f3Smrg struct ir3_instruction *pi = 13217ec681f3Smrg LIST_ENTRY(struct ir3_instruction, prev, node); 13227ec681f3Smrg 13237ec681f3Smrg prev = prev->prev; 13247ec681f3Smrg 13257ec681f3Smrg if (is_meta(pi)) 13267ec681f3Smrg continue; 13277ec681f3Smrg 13287ec681f3Smrg if (instr->barrier_class == pi->barrier_class) { 13297ec681f3Smrg ir3_instr_add_dep(instr, pi); 13307ec681f3Smrg break; 13317ec681f3Smrg } 13327ec681f3Smrg 13337ec681f3Smrg if (depends_on(instr, pi)) 13347ec681f3Smrg ir3_instr_add_dep(instr, pi); 13357ec681f3Smrg } 13367ec681f3Smrg 13377ec681f3Smrg /* add dependencies on this instruction to following instructions 13387ec681f3Smrg * that must be scheduled after the current instruction: 13397ec681f3Smrg */ 13407ec681f3Smrg while (next != &block->instr_list) { 13417ec681f3Smrg struct ir3_instruction *ni = 13427ec681f3Smrg LIST_ENTRY(struct ir3_instruction, next, node); 13437ec681f3Smrg 13447ec681f3Smrg next = next->next; 13457ec681f3Smrg 13467ec681f3Smrg if (is_meta(ni)) 13477ec681f3Smrg continue; 13487ec681f3Smrg 13497ec681f3Smrg if (instr->barrier_class == ni->barrier_class) { 13507ec681f3Smrg ir3_instr_add_dep(ni, instr); 13517ec681f3Smrg break; 13527ec681f3Smrg } 13537ec681f3Smrg 13547ec681f3Smrg if (depends_on(ni, instr)) 13557ec681f3Smrg ir3_instr_add_dep(ni, instr); 13567ec681f3Smrg } 1357361fc4cbSmaya} 1358361fc4cbSmaya 1359361fc4cbSmaya/* before scheduling a block, we need to add any necessary false-dependencies 1360361fc4cbSmaya * to ensure that: 1361361fc4cbSmaya * 1362361fc4cbSmaya * (1) barriers are scheduled in the right order wrt instructions related 1363361fc4cbSmaya * to the barrier 1364361fc4cbSmaya * 1365361fc4cbSmaya * (2) reads that come before a write actually get scheduled before the 1366361fc4cbSmaya * write 1367361fc4cbSmaya */ 13687ec681f3Smrgbool 1369361fc4cbSmayair3_sched_add_deps(struct ir3 *ir) 1370361fc4cbSmaya{ 13717ec681f3Smrg bool progress = false; 13727ec681f3Smrg 13737ec681f3Smrg foreach_block (block, &ir->block_list) { 13747ec681f3Smrg foreach_instr (instr, &block->instr_list) { 13757ec681f3Smrg if (instr->barrier_class) { 13767ec681f3Smrg add_barrier_deps(block, instr); 13777ec681f3Smrg progress = true; 13787ec681f3Smrg } 13797ec681f3Smrg } 13807ec681f3Smrg } 13817ec681f3Smrg 13827ec681f3Smrg return progress; 1383361fc4cbSmaya} 1384