19f464c52Smaya/* 29f464c52Smaya * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 39f464c52Smaya * 49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a 59f464c52Smaya * copy of this software and associated documentation files (the "Software"), 69f464c52Smaya * to deal in the Software without restriction, including without limitation 79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the 99f464c52Smaya * Software is furnished to do so, subject to the following conditions: 109f464c52Smaya * 119f464c52Smaya * The above copyright notice and this permission notice (including the next 129f464c52Smaya * paragraph) shall be included in all copies or substantial portions of the 139f464c52Smaya * Software. 149f464c52Smaya * 159f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 169f464c52Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 179f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 189f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 199f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 209f464c52Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 219f464c52Smaya * SOFTWARE. 229f464c52Smaya * 239f464c52Smaya * Authors: 249f464c52Smaya * Jonathan Marek <jonathan@marek.ca> 259f464c52Smaya */ 269f464c52Smaya 279f464c52Smaya#include "ir2_private.h" 289f464c52Smaya 297ec681f3Smrgstatic bool 307ec681f3Smrgscalar_possible(struct ir2_instr *instr) 319f464c52Smaya{ 327ec681f3Smrg if (instr->alu.scalar_opc == SCALAR_NONE) 337ec681f3Smrg return false; 349f464c52Smaya 357ec681f3Smrg return src_ncomp(instr) == 1; 369f464c52Smaya} 379f464c52Smaya 387ec681f3Smrgstatic bool 397ec681f3Smrgis_alu_compatible(struct ir2_instr *a, struct ir2_instr *b) 409f464c52Smaya{ 417ec681f3Smrg if (!a) 427ec681f3Smrg return true; 439f464c52Smaya 447ec681f3Smrg /* dont use same instruction twice */ 457ec681f3Smrg if (a == b) 467ec681f3Smrg return false; 479f464c52Smaya 487ec681f3Smrg /* PRED_SET must be alone */ 497ec681f3Smrg if (b->alu.scalar_opc >= PRED_SETEs && 507ec681f3Smrg b->alu.scalar_opc <= PRED_SET_RESTOREs) 517ec681f3Smrg return false; 529f464c52Smaya 537ec681f3Smrg /* must write to same export (issues otherwise?) */ 547ec681f3Smrg return a->alu.export == b->alu.export; 559f464c52Smaya} 569f464c52Smaya 579f464c52Smaya/* priority of vector instruction for scheduling (lower=higher prio) */ 587ec681f3Smrgstatic unsigned 597ec681f3Smrgalu_vector_prio(struct ir2_instr *instr) 609f464c52Smaya{ 617ec681f3Smrg if (instr->alu.vector_opc == VECTOR_NONE) 627ec681f3Smrg return ~0u; 639f464c52Smaya 647ec681f3Smrg if (is_export(instr)) 657ec681f3Smrg return 4; 669f464c52Smaya 677ec681f3Smrg /* TODO check src type and ncomps */ 687ec681f3Smrg if (instr->src_count == 3) 697ec681f3Smrg return 0; 709f464c52Smaya 717ec681f3Smrg if (!scalar_possible(instr)) 727ec681f3Smrg return 1; 739f464c52Smaya 747ec681f3Smrg return instr->src_count == 2 ? 2 : 3; 759f464c52Smaya} 769f464c52Smaya 779f464c52Smaya/* priority of scalar instruction for scheduling (lower=higher prio) */ 787ec681f3Smrgstatic unsigned 797ec681f3Smrgalu_scalar_prio(struct ir2_instr *instr) 809f464c52Smaya{ 817ec681f3Smrg if (!scalar_possible(instr)) 827ec681f3Smrg return ~0u; 839f464c52Smaya 847ec681f3Smrg /* this case is dealt with later */ 857ec681f3Smrg if (instr->src_count > 1) 867ec681f3Smrg return ~0u; 879f464c52Smaya 887ec681f3Smrg if (is_export(instr)) 897ec681f3Smrg return 4; 909f464c52Smaya 917ec681f3Smrg /* PRED to end of block */ 927ec681f3Smrg if (instr->alu.scalar_opc >= PRED_SETEs && 937ec681f3Smrg instr->alu.scalar_opc <= PRED_SET_RESTOREs) 947ec681f3Smrg return 5; 959f464c52Smaya 967ec681f3Smrg /* scalar only have highest priority */ 977ec681f3Smrg return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3; 989f464c52Smaya} 999f464c52Smaya 1009f464c52Smaya/* this is a bit messy: 1019f464c52Smaya * we want to find a slot where we can insert a scalar MOV with 1029f464c52Smaya * a vector instruction that was already scheduled 1039f464c52Smaya */ 1047ec681f3Smrgstatic struct ir2_sched_instr * 1059f464c52Smayainsert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx, 1067ec681f3Smrg struct ir2_src src1, unsigned *comp) 1079f464c52Smaya{ 1087ec681f3Smrg struct ir2_sched_instr *sched = NULL, *s; 1097ec681f3Smrg unsigned i, mask = 0xf; 1107ec681f3Smrg 1117ec681f3Smrg /* go first earliest point where the mov can be inserted */ 1127ec681f3Smrg for (i = ctx->instr_sched_count - 1; i > 0; i--) { 1137ec681f3Smrg s = &ctx->instr_sched[i - 1]; 1147ec681f3Smrg 1157ec681f3Smrg if (s->instr && s->instr->block_idx != block_idx) 1167ec681f3Smrg break; 1177ec681f3Smrg if (s->instr_s && s->instr_s->block_idx != block_idx) 1187ec681f3Smrg break; 1197ec681f3Smrg 1207ec681f3Smrg if (src1.type == IR2_SRC_SSA) { 1217ec681f3Smrg if ((s->instr && s->instr->idx == src1.num) || 1227ec681f3Smrg (s->instr_s && s->instr_s->idx == src1.num)) 1237ec681f3Smrg break; 1247ec681f3Smrg } 1257ec681f3Smrg 1267ec681f3Smrg unsigned mr = ~(s->reg_state[reg_idx / 8] >> reg_idx % 8 * 4 & 0xf); 1277ec681f3Smrg if ((mask & mr) == 0) 1287ec681f3Smrg break; 1297ec681f3Smrg 1307ec681f3Smrg mask &= mr; 1317ec681f3Smrg if (s->instr_s || s->instr->src_count == 3) 1327ec681f3Smrg continue; 1337ec681f3Smrg 1347ec681f3Smrg if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0) 1357ec681f3Smrg continue; 1367ec681f3Smrg 1377ec681f3Smrg sched = s; 1387ec681f3Smrg } 1397ec681f3Smrg *comp = ffs(mask) - 1; 1407ec681f3Smrg 1417ec681f3Smrg if (sched) { 1427ec681f3Smrg for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++) 1437ec681f3Smrg s->reg_state[reg_idx / 8] |= 1 << (*comp + reg_idx % 8 * 4); 1447ec681f3Smrg } 1457ec681f3Smrg 1467ec681f3Smrg return sched; 1479f464c52Smaya} 1489f464c52Smaya 1499f464c52Smaya/* case1: 1509f464c52Smaya * in this case, insert a mov to place the 2nd src into to same reg 1519f464c52Smaya * (scalar sources come from the same register) 1529f464c52Smaya * 1539f464c52Smaya * this is a common case which works when one of the srcs is input/const 1549f464c52Smaya * but for instrs which have 2 ssa/reg srcs, then its not ideal 1559f464c52Smaya */ 1569f464c52Smayastatic bool 1579f464c52Smayascalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order) 1589f464c52Smaya{ 1597ec681f3Smrg struct ir2_src src0 = instr->src[order]; 1607ec681f3Smrg struct ir2_src src1 = instr->src[!order]; 1617ec681f3Smrg struct ir2_sched_instr *sched; 1627ec681f3Smrg struct ir2_instr *ins; 1637ec681f3Smrg struct ir2_reg *reg; 1647ec681f3Smrg unsigned idx, comp; 1657ec681f3Smrg 1667ec681f3Smrg switch (src0.type) { 1677ec681f3Smrg case IR2_SRC_CONST: 1687ec681f3Smrg case IR2_SRC_INPUT: 1697ec681f3Smrg return false; 1707ec681f3Smrg default: 1717ec681f3Smrg break; 1727ec681f3Smrg } 1737ec681f3Smrg 1747ec681f3Smrg /* TODO, insert needs logic for this */ 1757ec681f3Smrg if (src1.type == IR2_SRC_REG) 1767ec681f3Smrg return false; 1777ec681f3Smrg 1787ec681f3Smrg /* we could do something if they match src1.. */ 1797ec681f3Smrg if (src0.negate || src0.abs) 1807ec681f3Smrg return false; 1817ec681f3Smrg 1827ec681f3Smrg reg = get_reg_src(ctx, &src0); 1837ec681f3Smrg 1847ec681f3Smrg /* result not used more since we will overwrite */ 1857ec681f3Smrg for (int i = 0; i < 4; i++) 1867ec681f3Smrg if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i)) 1877ec681f3Smrg return false; 1887ec681f3Smrg 1897ec681f3Smrg /* find a place to insert the mov */ 1907ec681f3Smrg sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp); 1917ec681f3Smrg if (!sched) 1927ec681f3Smrg return false; 1937ec681f3Smrg 1947ec681f3Smrg ins = &ctx->instr[idx = ctx->instr_count++]; 1957ec681f3Smrg ins->idx = idx; 1967ec681f3Smrg ins->type = IR2_ALU; 1977ec681f3Smrg ins->src[0] = src1; 1987ec681f3Smrg ins->src_count = 1; 1997ec681f3Smrg ins->is_ssa = true; 2007ec681f3Smrg ins->ssa.idx = reg->idx; 2017ec681f3Smrg ins->ssa.ncomp = 1; 2027ec681f3Smrg ins->ssa.comp[0].c = comp; 2037ec681f3Smrg ins->alu.scalar_opc = MAXs; 2047ec681f3Smrg ins->alu.export = -1; 2057ec681f3Smrg ins->alu.write_mask = 1; 2067ec681f3Smrg ins->pred = instr->pred; 2077ec681f3Smrg ins->block_idx = instr->block_idx; 2087ec681f3Smrg 2097ec681f3Smrg instr->src[0] = src0; 2107ec681f3Smrg instr->alu.src1_swizzle = comp; 2117ec681f3Smrg 2127ec681f3Smrg sched->instr_s = ins; 2137ec681f3Smrg return true; 2149f464c52Smaya} 2159f464c52Smaya 2169f464c52Smaya/* fill sched with next fetch or (vector and/or scalar) alu instruction */ 2177ec681f3Smrgstatic int 2187ec681f3Smrgsched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched) 2199f464c52Smaya{ 2207ec681f3Smrg struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL; 2217ec681f3Smrg unsigned avail_count = 0; 2227ec681f3Smrg 2237ec681f3Smrg instr_alloc_type_t export = ~0u; 2247ec681f3Smrg int block_idx = -1; 2257ec681f3Smrg 2267ec681f3Smrg /* XXX merge this loop with the other one somehow? */ 2277ec681f3Smrg ir2_foreach_instr (instr, ctx) { 2287ec681f3Smrg if (!instr->need_emit) 2297ec681f3Smrg continue; 2307ec681f3Smrg if (is_export(instr)) 2317ec681f3Smrg export = MIN2(export, export_buf(instr->alu.export)); 2327ec681f3Smrg } 2337ec681f3Smrg 2347ec681f3Smrg ir2_foreach_instr (instr, ctx) { 2357ec681f3Smrg if (!instr->need_emit) 2367ec681f3Smrg continue; 2377ec681f3Smrg 2387ec681f3Smrg /* dont mix exports */ 2397ec681f3Smrg if (is_export(instr) && export_buf(instr->alu.export) != export) 2407ec681f3Smrg continue; 2417ec681f3Smrg 2427ec681f3Smrg if (block_idx < 0) 2437ec681f3Smrg block_idx = instr->block_idx; 2447ec681f3Smrg else if (block_idx != instr->block_idx || /* must be same block */ 2457ec681f3Smrg instr->type == IR2_CF || /* CF/MEM must be alone */ 2467ec681f3Smrg (is_export(instr) && export == SQ_MEMORY)) 2477ec681f3Smrg break; 2487ec681f3Smrg /* it works because IR2_CF is always at end of block 2497ec681f3Smrg * and somewhat same idea with MEM exports, which might not be alone 2507ec681f3Smrg * but will end up in-order at least 2517ec681f3Smrg */ 2527ec681f3Smrg 2537ec681f3Smrg /* check if dependencies are satisfied */ 2547ec681f3Smrg bool is_ok = true; 2557ec681f3Smrg ir2_foreach_src (src, instr) { 2567ec681f3Smrg if (src->type == IR2_SRC_REG) { 2577ec681f3Smrg /* need to check if all previous instructions in the block 2587ec681f3Smrg * which write the reg have been emitted 2597ec681f3Smrg * slow.. 2607ec681f3Smrg * XXX: check components instead of whole register 2617ec681f3Smrg */ 2627ec681f3Smrg struct ir2_reg *reg = get_reg_src(ctx, src); 2637ec681f3Smrg ir2_foreach_instr (p, ctx) { 2647ec681f3Smrg if (!p->is_ssa && p->reg == reg && p->idx < instr->idx) 2657ec681f3Smrg is_ok &= !p->need_emit; 2667ec681f3Smrg } 2677ec681f3Smrg } else if (src->type == IR2_SRC_SSA) { 2687ec681f3Smrg /* in this case its easy, just check need_emit */ 2697ec681f3Smrg is_ok &= !ctx->instr[src->num].need_emit; 2707ec681f3Smrg } 2717ec681f3Smrg } 2727ec681f3Smrg /* don't reorder non-ssa write before read */ 2737ec681f3Smrg if (!instr->is_ssa) { 2747ec681f3Smrg ir2_foreach_instr (p, ctx) { 2757ec681f3Smrg if (!p->need_emit || p->idx >= instr->idx) 2767ec681f3Smrg continue; 2777ec681f3Smrg 2787ec681f3Smrg ir2_foreach_src (src, p) { 2797ec681f3Smrg if (get_reg_src(ctx, src) == instr->reg) 2807ec681f3Smrg is_ok = false; 2817ec681f3Smrg } 2827ec681f3Smrg } 2837ec681f3Smrg } 2847ec681f3Smrg /* don't reorder across predicates */ 2857ec681f3Smrg if (avail_count && instr->pred != avail[0]->pred) 2867ec681f3Smrg is_ok = false; 2877ec681f3Smrg 2887ec681f3Smrg if (!is_ok) 2897ec681f3Smrg continue; 2907ec681f3Smrg 2917ec681f3Smrg avail[avail_count++] = instr; 2927ec681f3Smrg } 2937ec681f3Smrg 2947ec681f3Smrg if (!avail_count) { 2957ec681f3Smrg assert(block_idx == -1); 2967ec681f3Smrg return -1; 2977ec681f3Smrg } 2987ec681f3Smrg 2997ec681f3Smrg /* priority to FETCH instructions */ 3007ec681f3Smrg ir2_foreach_avail (instr) { 3017ec681f3Smrg if (instr->type == IR2_ALU) 3027ec681f3Smrg continue; 3037ec681f3Smrg 3047ec681f3Smrg ra_src_free(ctx, instr); 3057ec681f3Smrg ra_reg(ctx, get_reg(instr), -1, false, 0); 3067ec681f3Smrg 3077ec681f3Smrg instr->need_emit = false; 3087ec681f3Smrg sched->instr = instr; 3097ec681f3Smrg sched->instr_s = NULL; 3107ec681f3Smrg return block_idx; 3117ec681f3Smrg } 3127ec681f3Smrg 3137ec681f3Smrg /* TODO precompute priorities */ 3147ec681f3Smrg 3157ec681f3Smrg unsigned prio_v = ~0u, prio_s = ~0u, prio; 3167ec681f3Smrg ir2_foreach_avail (instr) { 3177ec681f3Smrg prio = alu_vector_prio(instr); 3187ec681f3Smrg if (prio < prio_v) { 3197ec681f3Smrg instr_v = instr; 3207ec681f3Smrg prio_v = prio; 3217ec681f3Smrg } 3227ec681f3Smrg } 3237ec681f3Smrg 3247ec681f3Smrg /* TODO can still insert scalar if src_count=3, if smart about it */ 3257ec681f3Smrg if (!instr_v || instr_v->src_count < 3) { 3267ec681f3Smrg ir2_foreach_avail (instr) { 3277ec681f3Smrg bool compat = is_alu_compatible(instr_v, instr); 3287ec681f3Smrg 3297ec681f3Smrg prio = alu_scalar_prio(instr); 3307ec681f3Smrg if (prio >= prio_v && !compat) 3317ec681f3Smrg continue; 3327ec681f3Smrg 3337ec681f3Smrg if (prio < prio_s) { 3347ec681f3Smrg instr_s = instr; 3357ec681f3Smrg prio_s = prio; 3367ec681f3Smrg if (!compat) 3377ec681f3Smrg instr_v = NULL; 3387ec681f3Smrg } 3397ec681f3Smrg } 3407ec681f3Smrg } 3417ec681f3Smrg 3427ec681f3Smrg assert(instr_v || instr_s); 3437ec681f3Smrg 3447ec681f3Smrg /* now, we try more complex insertion of vector instruction as scalar 3457ec681f3Smrg * TODO: if we are smart we can still insert if instr_v->src_count==3 3467ec681f3Smrg */ 3477ec681f3Smrg if (!instr_s && instr_v->src_count < 3) { 3487ec681f3Smrg ir2_foreach_avail (instr) { 3497ec681f3Smrg if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr)) 3507ec681f3Smrg continue; 3517ec681f3Smrg 3527ec681f3Smrg /* at this point, src_count should always be 2 */ 3537ec681f3Smrg assert(instr->src_count == 2); 3547ec681f3Smrg 3557ec681f3Smrg if (scalarize_case1(ctx, instr, 0)) { 3567ec681f3Smrg instr_s = instr; 3577ec681f3Smrg break; 3587ec681f3Smrg } 3597ec681f3Smrg if (scalarize_case1(ctx, instr, 1)) { 3607ec681f3Smrg instr_s = instr; 3617ec681f3Smrg break; 3627ec681f3Smrg } 3637ec681f3Smrg } 3647ec681f3Smrg } 3657ec681f3Smrg 3667ec681f3Smrg /* free src registers */ 3677ec681f3Smrg if (instr_v) { 3687ec681f3Smrg instr_v->need_emit = false; 3697ec681f3Smrg ra_src_free(ctx, instr_v); 3707ec681f3Smrg } 3717ec681f3Smrg 3727ec681f3Smrg if (instr_s) { 3737ec681f3Smrg instr_s->need_emit = false; 3747ec681f3Smrg ra_src_free(ctx, instr_s); 3757ec681f3Smrg } 3767ec681f3Smrg 3777ec681f3Smrg /* allocate dst registers */ 3787ec681f3Smrg if (instr_v) 3797ec681f3Smrg ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), 3807ec681f3Smrg instr_v->alu.write_mask); 3817ec681f3Smrg 3827ec681f3Smrg if (instr_s) 3837ec681f3Smrg ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), 3847ec681f3Smrg instr_s->alu.write_mask); 3857ec681f3Smrg 3867ec681f3Smrg sched->instr = instr_v; 3877ec681f3Smrg sched->instr_s = instr_s; 3887ec681f3Smrg return block_idx; 3899f464c52Smaya} 3909f464c52Smaya 3919f464c52Smaya/* scheduling: determine order of instructions */ 3927ec681f3Smrgstatic void 3937ec681f3Smrgschedule_instrs(struct ir2_context *ctx) 3949f464c52Smaya{ 3957ec681f3Smrg struct ir2_sched_instr *sched; 3967ec681f3Smrg int block_idx; 3977ec681f3Smrg 3987ec681f3Smrg /* allocate input registers */ 3997ec681f3Smrg for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++) 4007ec681f3Smrg if (ctx->input[idx].initialized) 4017ec681f3Smrg ra_reg(ctx, &ctx->input[idx], idx, false, 0); 4027ec681f3Smrg 4037ec681f3Smrg for (;;) { 4047ec681f3Smrg sched = &ctx->instr_sched[ctx->instr_sched_count++]; 4057ec681f3Smrg block_idx = sched_next(ctx, sched); 4067ec681f3Smrg if (block_idx < 0) 4077ec681f3Smrg break; 4087ec681f3Smrg memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state)); 4097ec681f3Smrg 4107ec681f3Smrg /* catch texture fetch after scheduling and insert the 4117ec681f3Smrg * SET_TEX_LOD right before it if necessary 4127ec681f3Smrg * TODO clean this up 4137ec681f3Smrg */ 4147ec681f3Smrg struct ir2_instr *instr = sched->instr, *tex_lod; 4157ec681f3Smrg if (instr && instr->type == IR2_FETCH && instr->fetch.opc == TEX_FETCH && 4167ec681f3Smrg instr->src_count == 2) { 4177ec681f3Smrg /* generate the SET_LOD instruction */ 4187ec681f3Smrg tex_lod = &ctx->instr[ctx->instr_count++]; 4197ec681f3Smrg tex_lod->type = IR2_FETCH; 4207ec681f3Smrg tex_lod->block_idx = instr->block_idx; 4217ec681f3Smrg tex_lod->pred = instr->pred; 4227ec681f3Smrg tex_lod->fetch.opc = TEX_SET_TEX_LOD; 4237ec681f3Smrg tex_lod->src[0] = instr->src[1]; 4247ec681f3Smrg tex_lod->src_count = 1; 4257ec681f3Smrg 4267ec681f3Smrg sched[1] = sched[0]; 4277ec681f3Smrg sched->instr = tex_lod; 4287ec681f3Smrg ctx->instr_sched_count++; 4297ec681f3Smrg } 4307ec681f3Smrg 4317ec681f3Smrg bool free_block = true; 4327ec681f3Smrg ir2_foreach_instr (instr, ctx) 4337ec681f3Smrg free_block &= instr->block_idx != block_idx; 4347ec681f3Smrg if (free_block) 4357ec681f3Smrg ra_block_free(ctx, block_idx); 4367ec681f3Smrg }; 4377ec681f3Smrg ctx->instr_sched_count--; 4389f464c52Smaya} 4399f464c52Smaya 4409f464c52Smayavoid 4419f464c52Smayair2_compile(struct fd2_shader_stateobj *so, unsigned variant, 4427ec681f3Smrg struct fd2_shader_stateobj *fp) 4439f464c52Smaya{ 4447ec681f3Smrg struct ir2_context ctx = {}; 4457ec681f3Smrg bool binning = !fp && so->type == MESA_SHADER_VERTEX; 4469f464c52Smaya 4477ec681f3Smrg if (fp) 4487ec681f3Smrg so->variant[variant].f = fp->variant[0].f; 4499f464c52Smaya 4507ec681f3Smrg ctx.so = so; 4517ec681f3Smrg ctx.info = &so->variant[variant].info; 4527ec681f3Smrg ctx.f = &so->variant[variant].f; 4537ec681f3Smrg ctx.info->max_reg = -1; 4549f464c52Smaya 4557ec681f3Smrg /* convert nir to internal representation */ 4567ec681f3Smrg ir2_nir_compile(&ctx, binning); 4579f464c52Smaya 4587ec681f3Smrg /* copy propagate srcs */ 4597ec681f3Smrg cp_src(&ctx); 4609f464c52Smaya 4617ec681f3Smrg /* get ref_counts and kill non-needed instructions */ 4627ec681f3Smrg ra_count_refs(&ctx); 4639f464c52Smaya 4647ec681f3Smrg /* remove movs used to write outputs */ 4657ec681f3Smrg cp_export(&ctx); 4669f464c52Smaya 4677ec681f3Smrg /* instruction order.. and vector->scalar conversions */ 4687ec681f3Smrg schedule_instrs(&ctx); 4699f464c52Smaya 4707ec681f3Smrg /* finally, assemble to bitcode */ 4717ec681f3Smrg assemble(&ctx, binning); 4729f464c52Smaya} 473