19f464c52Smaya/*
29f464c52Smaya * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
39f464c52Smaya *
49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a
59f464c52Smaya * copy of this software and associated documentation files (the "Software"),
69f464c52Smaya * to deal in the Software without restriction, including without limitation
79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the
99f464c52Smaya * Software is furnished to do so, subject to the following conditions:
109f464c52Smaya *
119f464c52Smaya * The above copyright notice and this permission notice (including the next
129f464c52Smaya * paragraph) shall be included in all copies or substantial portions of the
139f464c52Smaya * Software.
149f464c52Smaya *
159f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
169f464c52Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
179f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
189f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
199f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
209f464c52Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
219f464c52Smaya * SOFTWARE.
229f464c52Smaya *
239f464c52Smaya * Authors:
249f464c52Smaya *    Jonathan Marek <jonathan@marek.ca>
259f464c52Smaya */
269f464c52Smaya
279f464c52Smaya#include "ir2_private.h"
289f464c52Smaya
297ec681f3Smrgstatic bool
307ec681f3Smrgscalar_possible(struct ir2_instr *instr)
319f464c52Smaya{
327ec681f3Smrg   if (instr->alu.scalar_opc == SCALAR_NONE)
337ec681f3Smrg      return false;
349f464c52Smaya
357ec681f3Smrg   return src_ncomp(instr) == 1;
369f464c52Smaya}
379f464c52Smaya
387ec681f3Smrgstatic bool
397ec681f3Smrgis_alu_compatible(struct ir2_instr *a, struct ir2_instr *b)
409f464c52Smaya{
417ec681f3Smrg   if (!a)
427ec681f3Smrg      return true;
439f464c52Smaya
447ec681f3Smrg   /* dont use same instruction twice */
457ec681f3Smrg   if (a == b)
467ec681f3Smrg      return false;
479f464c52Smaya
487ec681f3Smrg   /* PRED_SET must be alone */
497ec681f3Smrg   if (b->alu.scalar_opc >= PRED_SETEs &&
507ec681f3Smrg       b->alu.scalar_opc <= PRED_SET_RESTOREs)
517ec681f3Smrg      return false;
529f464c52Smaya
537ec681f3Smrg   /* must write to same export (issues otherwise?) */
547ec681f3Smrg   return a->alu.export == b->alu.export;
559f464c52Smaya}
569f464c52Smaya
579f464c52Smaya/* priority of vector instruction for scheduling (lower=higher prio) */
587ec681f3Smrgstatic unsigned
597ec681f3Smrgalu_vector_prio(struct ir2_instr *instr)
609f464c52Smaya{
617ec681f3Smrg   if (instr->alu.vector_opc == VECTOR_NONE)
627ec681f3Smrg      return ~0u;
639f464c52Smaya
647ec681f3Smrg   if (is_export(instr))
657ec681f3Smrg      return 4;
669f464c52Smaya
677ec681f3Smrg   /* TODO check src type and ncomps */
687ec681f3Smrg   if (instr->src_count == 3)
697ec681f3Smrg      return 0;
709f464c52Smaya
717ec681f3Smrg   if (!scalar_possible(instr))
727ec681f3Smrg      return 1;
739f464c52Smaya
747ec681f3Smrg   return instr->src_count == 2 ? 2 : 3;
759f464c52Smaya}
769f464c52Smaya
779f464c52Smaya/* priority of scalar instruction for scheduling (lower=higher prio) */
787ec681f3Smrgstatic unsigned
797ec681f3Smrgalu_scalar_prio(struct ir2_instr *instr)
809f464c52Smaya{
817ec681f3Smrg   if (!scalar_possible(instr))
827ec681f3Smrg      return ~0u;
839f464c52Smaya
847ec681f3Smrg   /* this case is dealt with later */
857ec681f3Smrg   if (instr->src_count > 1)
867ec681f3Smrg      return ~0u;
879f464c52Smaya
887ec681f3Smrg   if (is_export(instr))
897ec681f3Smrg      return 4;
909f464c52Smaya
917ec681f3Smrg   /* PRED to end of block */
927ec681f3Smrg   if (instr->alu.scalar_opc >= PRED_SETEs &&
937ec681f3Smrg       instr->alu.scalar_opc <= PRED_SET_RESTOREs)
947ec681f3Smrg      return 5;
959f464c52Smaya
967ec681f3Smrg   /* scalar only have highest priority */
977ec681f3Smrg   return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3;
989f464c52Smaya}
999f464c52Smaya
1009f464c52Smaya/* this is a bit messy:
1019f464c52Smaya * we want to find a slot where we can insert a scalar MOV with
1029f464c52Smaya * a vector instruction that was already scheduled
1039f464c52Smaya */
1047ec681f3Smrgstatic struct ir2_sched_instr *
1059f464c52Smayainsert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx,
1067ec681f3Smrg       struct ir2_src src1, unsigned *comp)
1079f464c52Smaya{
1087ec681f3Smrg   struct ir2_sched_instr *sched = NULL, *s;
1097ec681f3Smrg   unsigned i, mask = 0xf;
1107ec681f3Smrg
1117ec681f3Smrg   /* go first earliest point where the mov can be inserted */
1127ec681f3Smrg   for (i = ctx->instr_sched_count - 1; i > 0; i--) {
1137ec681f3Smrg      s = &ctx->instr_sched[i - 1];
1147ec681f3Smrg
1157ec681f3Smrg      if (s->instr && s->instr->block_idx != block_idx)
1167ec681f3Smrg         break;
1177ec681f3Smrg      if (s->instr_s && s->instr_s->block_idx != block_idx)
1187ec681f3Smrg         break;
1197ec681f3Smrg
1207ec681f3Smrg      if (src1.type == IR2_SRC_SSA) {
1217ec681f3Smrg         if ((s->instr && s->instr->idx == src1.num) ||
1227ec681f3Smrg             (s->instr_s && s->instr_s->idx == src1.num))
1237ec681f3Smrg            break;
1247ec681f3Smrg      }
1257ec681f3Smrg
1267ec681f3Smrg      unsigned mr = ~(s->reg_state[reg_idx / 8] >> reg_idx % 8 * 4 & 0xf);
1277ec681f3Smrg      if ((mask & mr) == 0)
1287ec681f3Smrg         break;
1297ec681f3Smrg
1307ec681f3Smrg      mask &= mr;
1317ec681f3Smrg      if (s->instr_s || s->instr->src_count == 3)
1327ec681f3Smrg         continue;
1337ec681f3Smrg
1347ec681f3Smrg      if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0)
1357ec681f3Smrg         continue;
1367ec681f3Smrg
1377ec681f3Smrg      sched = s;
1387ec681f3Smrg   }
1397ec681f3Smrg   *comp = ffs(mask) - 1;
1407ec681f3Smrg
1417ec681f3Smrg   if (sched) {
1427ec681f3Smrg      for (s = sched; s != &ctx->instr_sched[ctx->instr_sched_count]; s++)
1437ec681f3Smrg         s->reg_state[reg_idx / 8] |= 1 << (*comp + reg_idx % 8 * 4);
1447ec681f3Smrg   }
1457ec681f3Smrg
1467ec681f3Smrg   return sched;
1479f464c52Smaya}
1489f464c52Smaya
1499f464c52Smaya/* case1:
1509f464c52Smaya * in this case, insert a mov to place the 2nd src into to same reg
1519f464c52Smaya * (scalar sources come from the same register)
1529f464c52Smaya *
1539f464c52Smaya * this is a common case which works when one of the srcs is input/const
1549f464c52Smaya * but for instrs which have 2 ssa/reg srcs, then its not ideal
1559f464c52Smaya */
1569f464c52Smayastatic bool
1579f464c52Smayascalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order)
1589f464c52Smaya{
1597ec681f3Smrg   struct ir2_src src0 = instr->src[order];
1607ec681f3Smrg   struct ir2_src src1 = instr->src[!order];
1617ec681f3Smrg   struct ir2_sched_instr *sched;
1627ec681f3Smrg   struct ir2_instr *ins;
1637ec681f3Smrg   struct ir2_reg *reg;
1647ec681f3Smrg   unsigned idx, comp;
1657ec681f3Smrg
1667ec681f3Smrg   switch (src0.type) {
1677ec681f3Smrg   case IR2_SRC_CONST:
1687ec681f3Smrg   case IR2_SRC_INPUT:
1697ec681f3Smrg      return false;
1707ec681f3Smrg   default:
1717ec681f3Smrg      break;
1727ec681f3Smrg   }
1737ec681f3Smrg
1747ec681f3Smrg   /* TODO, insert needs logic for this */
1757ec681f3Smrg   if (src1.type == IR2_SRC_REG)
1767ec681f3Smrg      return false;
1777ec681f3Smrg
1787ec681f3Smrg   /* we could do something if they match src1.. */
1797ec681f3Smrg   if (src0.negate || src0.abs)
1807ec681f3Smrg      return false;
1817ec681f3Smrg
1827ec681f3Smrg   reg = get_reg_src(ctx, &src0);
1837ec681f3Smrg
1847ec681f3Smrg   /* result not used more since we will overwrite */
1857ec681f3Smrg   for (int i = 0; i < 4; i++)
1867ec681f3Smrg      if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i))
1877ec681f3Smrg         return false;
1887ec681f3Smrg
1897ec681f3Smrg   /* find a place to insert the mov */
1907ec681f3Smrg   sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp);
1917ec681f3Smrg   if (!sched)
1927ec681f3Smrg      return false;
1937ec681f3Smrg
1947ec681f3Smrg   ins = &ctx->instr[idx = ctx->instr_count++];
1957ec681f3Smrg   ins->idx = idx;
1967ec681f3Smrg   ins->type = IR2_ALU;
1977ec681f3Smrg   ins->src[0] = src1;
1987ec681f3Smrg   ins->src_count = 1;
1997ec681f3Smrg   ins->is_ssa = true;
2007ec681f3Smrg   ins->ssa.idx = reg->idx;
2017ec681f3Smrg   ins->ssa.ncomp = 1;
2027ec681f3Smrg   ins->ssa.comp[0].c = comp;
2037ec681f3Smrg   ins->alu.scalar_opc = MAXs;
2047ec681f3Smrg   ins->alu.export = -1;
2057ec681f3Smrg   ins->alu.write_mask = 1;
2067ec681f3Smrg   ins->pred = instr->pred;
2077ec681f3Smrg   ins->block_idx = instr->block_idx;
2087ec681f3Smrg
2097ec681f3Smrg   instr->src[0] = src0;
2107ec681f3Smrg   instr->alu.src1_swizzle = comp;
2117ec681f3Smrg
2127ec681f3Smrg   sched->instr_s = ins;
2137ec681f3Smrg   return true;
2149f464c52Smaya}
2159f464c52Smaya
2169f464c52Smaya/* fill sched with next fetch or (vector and/or scalar) alu instruction */
2177ec681f3Smrgstatic int
2187ec681f3Smrgsched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched)
2199f464c52Smaya{
2207ec681f3Smrg   struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL;
2217ec681f3Smrg   unsigned avail_count = 0;
2227ec681f3Smrg
2237ec681f3Smrg   instr_alloc_type_t export = ~0u;
2247ec681f3Smrg   int block_idx = -1;
2257ec681f3Smrg
2267ec681f3Smrg   /* XXX merge this loop with the other one somehow? */
2277ec681f3Smrg   ir2_foreach_instr (instr, ctx) {
2287ec681f3Smrg      if (!instr->need_emit)
2297ec681f3Smrg         continue;
2307ec681f3Smrg      if (is_export(instr))
2317ec681f3Smrg         export = MIN2(export, export_buf(instr->alu.export));
2327ec681f3Smrg   }
2337ec681f3Smrg
2347ec681f3Smrg   ir2_foreach_instr (instr, ctx) {
2357ec681f3Smrg      if (!instr->need_emit)
2367ec681f3Smrg         continue;
2377ec681f3Smrg
2387ec681f3Smrg      /* dont mix exports */
2397ec681f3Smrg      if (is_export(instr) && export_buf(instr->alu.export) != export)
2407ec681f3Smrg         continue;
2417ec681f3Smrg
2427ec681f3Smrg      if (block_idx < 0)
2437ec681f3Smrg         block_idx = instr->block_idx;
2447ec681f3Smrg      else if (block_idx != instr->block_idx || /* must be same block */
2457ec681f3Smrg               instr->type == IR2_CF ||         /* CF/MEM must be alone */
2467ec681f3Smrg               (is_export(instr) && export == SQ_MEMORY))
2477ec681f3Smrg         break;
2487ec681f3Smrg      /* it works because IR2_CF is always at end of block
2497ec681f3Smrg       * and somewhat same idea with MEM exports, which might not be alone
2507ec681f3Smrg       * but will end up in-order at least
2517ec681f3Smrg       */
2527ec681f3Smrg
2537ec681f3Smrg      /* check if dependencies are satisfied */
2547ec681f3Smrg      bool is_ok = true;
2557ec681f3Smrg      ir2_foreach_src (src, instr) {
2567ec681f3Smrg         if (src->type == IR2_SRC_REG) {
2577ec681f3Smrg            /* need to check if all previous instructions in the block
2587ec681f3Smrg             * which write the reg have been emitted
2597ec681f3Smrg             * slow..
2607ec681f3Smrg             * XXX: check components instead of whole register
2617ec681f3Smrg             */
2627ec681f3Smrg            struct ir2_reg *reg = get_reg_src(ctx, src);
2637ec681f3Smrg            ir2_foreach_instr (p, ctx) {
2647ec681f3Smrg               if (!p->is_ssa && p->reg == reg && p->idx < instr->idx)
2657ec681f3Smrg                  is_ok &= !p->need_emit;
2667ec681f3Smrg            }
2677ec681f3Smrg         } else if (src->type == IR2_SRC_SSA) {
2687ec681f3Smrg            /* in this case its easy, just check need_emit */
2697ec681f3Smrg            is_ok &= !ctx->instr[src->num].need_emit;
2707ec681f3Smrg         }
2717ec681f3Smrg      }
2727ec681f3Smrg      /* don't reorder non-ssa write before read */
2737ec681f3Smrg      if (!instr->is_ssa) {
2747ec681f3Smrg         ir2_foreach_instr (p, ctx) {
2757ec681f3Smrg            if (!p->need_emit || p->idx >= instr->idx)
2767ec681f3Smrg               continue;
2777ec681f3Smrg
2787ec681f3Smrg            ir2_foreach_src (src, p) {
2797ec681f3Smrg               if (get_reg_src(ctx, src) == instr->reg)
2807ec681f3Smrg                  is_ok = false;
2817ec681f3Smrg            }
2827ec681f3Smrg         }
2837ec681f3Smrg      }
2847ec681f3Smrg      /* don't reorder across predicates */
2857ec681f3Smrg      if (avail_count && instr->pred != avail[0]->pred)
2867ec681f3Smrg         is_ok = false;
2877ec681f3Smrg
2887ec681f3Smrg      if (!is_ok)
2897ec681f3Smrg         continue;
2907ec681f3Smrg
2917ec681f3Smrg      avail[avail_count++] = instr;
2927ec681f3Smrg   }
2937ec681f3Smrg
2947ec681f3Smrg   if (!avail_count) {
2957ec681f3Smrg      assert(block_idx == -1);
2967ec681f3Smrg      return -1;
2977ec681f3Smrg   }
2987ec681f3Smrg
2997ec681f3Smrg   /* priority to FETCH instructions */
3007ec681f3Smrg   ir2_foreach_avail (instr) {
3017ec681f3Smrg      if (instr->type == IR2_ALU)
3027ec681f3Smrg         continue;
3037ec681f3Smrg
3047ec681f3Smrg      ra_src_free(ctx, instr);
3057ec681f3Smrg      ra_reg(ctx, get_reg(instr), -1, false, 0);
3067ec681f3Smrg
3077ec681f3Smrg      instr->need_emit = false;
3087ec681f3Smrg      sched->instr = instr;
3097ec681f3Smrg      sched->instr_s = NULL;
3107ec681f3Smrg      return block_idx;
3117ec681f3Smrg   }
3127ec681f3Smrg
3137ec681f3Smrg   /* TODO precompute priorities */
3147ec681f3Smrg
3157ec681f3Smrg   unsigned prio_v = ~0u, prio_s = ~0u, prio;
3167ec681f3Smrg   ir2_foreach_avail (instr) {
3177ec681f3Smrg      prio = alu_vector_prio(instr);
3187ec681f3Smrg      if (prio < prio_v) {
3197ec681f3Smrg         instr_v = instr;
3207ec681f3Smrg         prio_v = prio;
3217ec681f3Smrg      }
3227ec681f3Smrg   }
3237ec681f3Smrg
3247ec681f3Smrg   /* TODO can still insert scalar if src_count=3, if smart about it */
3257ec681f3Smrg   if (!instr_v || instr_v->src_count < 3) {
3267ec681f3Smrg      ir2_foreach_avail (instr) {
3277ec681f3Smrg         bool compat = is_alu_compatible(instr_v, instr);
3287ec681f3Smrg
3297ec681f3Smrg         prio = alu_scalar_prio(instr);
3307ec681f3Smrg         if (prio >= prio_v && !compat)
3317ec681f3Smrg            continue;
3327ec681f3Smrg
3337ec681f3Smrg         if (prio < prio_s) {
3347ec681f3Smrg            instr_s = instr;
3357ec681f3Smrg            prio_s = prio;
3367ec681f3Smrg            if (!compat)
3377ec681f3Smrg               instr_v = NULL;
3387ec681f3Smrg         }
3397ec681f3Smrg      }
3407ec681f3Smrg   }
3417ec681f3Smrg
3427ec681f3Smrg   assert(instr_v || instr_s);
3437ec681f3Smrg
3447ec681f3Smrg   /* now, we try more complex insertion of vector instruction as scalar
3457ec681f3Smrg    * TODO: if we are smart we can still insert if instr_v->src_count==3
3467ec681f3Smrg    */
3477ec681f3Smrg   if (!instr_s && instr_v->src_count < 3) {
3487ec681f3Smrg      ir2_foreach_avail (instr) {
3497ec681f3Smrg         if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr))
3507ec681f3Smrg            continue;
3517ec681f3Smrg
3527ec681f3Smrg         /* at this point, src_count should always be 2 */
3537ec681f3Smrg         assert(instr->src_count == 2);
3547ec681f3Smrg
3557ec681f3Smrg         if (scalarize_case1(ctx, instr, 0)) {
3567ec681f3Smrg            instr_s = instr;
3577ec681f3Smrg            break;
3587ec681f3Smrg         }
3597ec681f3Smrg         if (scalarize_case1(ctx, instr, 1)) {
3607ec681f3Smrg            instr_s = instr;
3617ec681f3Smrg            break;
3627ec681f3Smrg         }
3637ec681f3Smrg      }
3647ec681f3Smrg   }
3657ec681f3Smrg
3667ec681f3Smrg   /* free src registers */
3677ec681f3Smrg   if (instr_v) {
3687ec681f3Smrg      instr_v->need_emit = false;
3697ec681f3Smrg      ra_src_free(ctx, instr_v);
3707ec681f3Smrg   }
3717ec681f3Smrg
3727ec681f3Smrg   if (instr_s) {
3737ec681f3Smrg      instr_s->need_emit = false;
3747ec681f3Smrg      ra_src_free(ctx, instr_s);
3757ec681f3Smrg   }
3767ec681f3Smrg
3777ec681f3Smrg   /* allocate dst registers */
3787ec681f3Smrg   if (instr_v)
3797ec681f3Smrg      ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v),
3807ec681f3Smrg             instr_v->alu.write_mask);
3817ec681f3Smrg
3827ec681f3Smrg   if (instr_s)
3837ec681f3Smrg      ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s),
3847ec681f3Smrg             instr_s->alu.write_mask);
3857ec681f3Smrg
3867ec681f3Smrg   sched->instr = instr_v;
3877ec681f3Smrg   sched->instr_s = instr_s;
3887ec681f3Smrg   return block_idx;
3899f464c52Smaya}
3909f464c52Smaya
3919f464c52Smaya/* scheduling: determine order of instructions */
3927ec681f3Smrgstatic void
3937ec681f3Smrgschedule_instrs(struct ir2_context *ctx)
3949f464c52Smaya{
3957ec681f3Smrg   struct ir2_sched_instr *sched;
3967ec681f3Smrg   int block_idx;
3977ec681f3Smrg
3987ec681f3Smrg   /* allocate input registers */
3997ec681f3Smrg   for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++)
4007ec681f3Smrg      if (ctx->input[idx].initialized)
4017ec681f3Smrg         ra_reg(ctx, &ctx->input[idx], idx, false, 0);
4027ec681f3Smrg
4037ec681f3Smrg   for (;;) {
4047ec681f3Smrg      sched = &ctx->instr_sched[ctx->instr_sched_count++];
4057ec681f3Smrg      block_idx = sched_next(ctx, sched);
4067ec681f3Smrg      if (block_idx < 0)
4077ec681f3Smrg         break;
4087ec681f3Smrg      memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state));
4097ec681f3Smrg
4107ec681f3Smrg      /* catch texture fetch after scheduling and insert the
4117ec681f3Smrg       * SET_TEX_LOD right before it if necessary
4127ec681f3Smrg       * TODO clean this up
4137ec681f3Smrg       */
4147ec681f3Smrg      struct ir2_instr *instr = sched->instr, *tex_lod;
4157ec681f3Smrg      if (instr && instr->type == IR2_FETCH && instr->fetch.opc == TEX_FETCH &&
4167ec681f3Smrg          instr->src_count == 2) {
4177ec681f3Smrg         /* generate the SET_LOD instruction */
4187ec681f3Smrg         tex_lod = &ctx->instr[ctx->instr_count++];
4197ec681f3Smrg         tex_lod->type = IR2_FETCH;
4207ec681f3Smrg         tex_lod->block_idx = instr->block_idx;
4217ec681f3Smrg         tex_lod->pred = instr->pred;
4227ec681f3Smrg         tex_lod->fetch.opc = TEX_SET_TEX_LOD;
4237ec681f3Smrg         tex_lod->src[0] = instr->src[1];
4247ec681f3Smrg         tex_lod->src_count = 1;
4257ec681f3Smrg
4267ec681f3Smrg         sched[1] = sched[0];
4277ec681f3Smrg         sched->instr = tex_lod;
4287ec681f3Smrg         ctx->instr_sched_count++;
4297ec681f3Smrg      }
4307ec681f3Smrg
4317ec681f3Smrg      bool free_block = true;
4327ec681f3Smrg      ir2_foreach_instr (instr, ctx)
4337ec681f3Smrg         free_block &= instr->block_idx != block_idx;
4347ec681f3Smrg      if (free_block)
4357ec681f3Smrg         ra_block_free(ctx, block_idx);
4367ec681f3Smrg   };
4377ec681f3Smrg   ctx->instr_sched_count--;
4389f464c52Smaya}
4399f464c52Smaya
4409f464c52Smayavoid
4419f464c52Smayair2_compile(struct fd2_shader_stateobj *so, unsigned variant,
4427ec681f3Smrg            struct fd2_shader_stateobj *fp)
4439f464c52Smaya{
4447ec681f3Smrg   struct ir2_context ctx = {};
4457ec681f3Smrg   bool binning = !fp && so->type == MESA_SHADER_VERTEX;
4469f464c52Smaya
4477ec681f3Smrg   if (fp)
4487ec681f3Smrg      so->variant[variant].f = fp->variant[0].f;
4499f464c52Smaya
4507ec681f3Smrg   ctx.so = so;
4517ec681f3Smrg   ctx.info = &so->variant[variant].info;
4527ec681f3Smrg   ctx.f = &so->variant[variant].f;
4537ec681f3Smrg   ctx.info->max_reg = -1;
4549f464c52Smaya
4557ec681f3Smrg   /* convert nir to internal representation */
4567ec681f3Smrg   ir2_nir_compile(&ctx, binning);
4579f464c52Smaya
4587ec681f3Smrg   /* copy propagate srcs */
4597ec681f3Smrg   cp_src(&ctx);
4609f464c52Smaya
4617ec681f3Smrg   /* get ref_counts and kill non-needed instructions */
4627ec681f3Smrg   ra_count_refs(&ctx);
4639f464c52Smaya
4647ec681f3Smrg   /* remove movs used to write outputs */
4657ec681f3Smrg   cp_export(&ctx);
4669f464c52Smaya
4677ec681f3Smrg   /* instruction order.. and vector->scalar conversions */
4687ec681f3Smrg   schedule_instrs(&ctx);
4699f464c52Smaya
4707ec681f3Smrg   /* finally, assemble to bitcode */
4717ec681f3Smrg   assemble(&ctx, binning);
4729f464c52Smaya}
473