13464ebd5Sriastradh/*
23464ebd5Sriastradh * Mesa 3-D graphics library
33464ebd5Sriastradh *
43464ebd5Sriastradh * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
53464ebd5Sriastradh *
63464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a
73464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"),
83464ebd5Sriastradh * to deal in the Software without restriction, including without limitation
93464ebd5Sriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense,
103464ebd5Sriastradh * and/or sell copies of the Software, and to permit persons to whom the
113464ebd5Sriastradh * Software is furnished to do so, subject to the following conditions:
123464ebd5Sriastradh *
133464ebd5Sriastradh * The above copyright notice and this permission notice shall be included
143464ebd5Sriastradh * in all copies or substantial portions of the Software.
153464ebd5Sriastradh *
163464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
173464ebd5Sriastradh * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
183464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
193464ebd5Sriastradh * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
203464ebd5Sriastradh * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
213464ebd5Sriastradh * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
223464ebd5Sriastradh */
233464ebd5Sriastradh
243464ebd5Sriastradh
253464ebd5Sriastradh
263464ebd5Sriastradh#include "main/glheader.h"
273464ebd5Sriastradh#include "main/context.h"
283464ebd5Sriastradh#include "main/macros.h"
293464ebd5Sriastradh#include "program.h"
303464ebd5Sriastradh#include "prog_instruction.h"
313464ebd5Sriastradh#include "prog_optimize.h"
323464ebd5Sriastradh#include "prog_print.h"
333464ebd5Sriastradh
343464ebd5Sriastradh
353464ebd5Sriastradh#define MAX_LOOP_NESTING 50
363464ebd5Sriastradh/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
373464ebd5Sriastradh * register allocate many temporary values into that small number of
383464ebd5Sriastradh * temps.  So allow large temporary indices coming into the register
393464ebd5Sriastradh * allocator.
403464ebd5Sriastradh */
413464ebd5Sriastradh#define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
423464ebd5Sriastradh
433464ebd5Sriastradhstatic GLboolean dbg = GL_FALSE;
443464ebd5Sriastradh
453464ebd5Sriastradh#define NO_MASK 0xf
463464ebd5Sriastradh
473464ebd5Sriastradh/**
483464ebd5Sriastradh * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
493464ebd5Sriastradh * are read from the given src in this instruction, We also provide
503464ebd5Sriastradh * one optional masks which may mask other components in the dst
513464ebd5Sriastradh * register
523464ebd5Sriastradh */
533464ebd5Sriastradhstatic GLuint
543464ebd5Sriastradhget_src_arg_mask(const struct prog_instruction *inst,
553464ebd5Sriastradh                 GLuint arg, GLuint dst_mask)
563464ebd5Sriastradh{
573464ebd5Sriastradh   GLuint read_mask, channel_mask;
583464ebd5Sriastradh   GLuint comp;
593464ebd5Sriastradh
6001e04c3fSmrg   assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
613464ebd5Sriastradh
623464ebd5Sriastradh   /* Form the dst register, find the written channels */
6301e04c3fSmrg   switch (inst->Opcode) {
6401e04c3fSmrg   case OPCODE_MOV:
6501e04c3fSmrg   case OPCODE_MIN:
6601e04c3fSmrg   case OPCODE_MAX:
6701e04c3fSmrg   case OPCODE_ABS:
6801e04c3fSmrg   case OPCODE_ADD:
6901e04c3fSmrg   case OPCODE_MAD:
7001e04c3fSmrg   case OPCODE_MUL:
7101e04c3fSmrg   case OPCODE_SUB:
7201e04c3fSmrg   case OPCODE_CMP:
7301e04c3fSmrg   case OPCODE_FLR:
7401e04c3fSmrg   case OPCODE_FRC:
7501e04c3fSmrg   case OPCODE_LRP:
7601e04c3fSmrg   case OPCODE_SGE:
7701e04c3fSmrg   case OPCODE_SLT:
7801e04c3fSmrg   case OPCODE_SSG:
7901e04c3fSmrg      channel_mask = inst->DstReg.WriteMask & dst_mask;
8001e04c3fSmrg      break;
8101e04c3fSmrg   case OPCODE_RCP:
8201e04c3fSmrg   case OPCODE_SIN:
8301e04c3fSmrg   case OPCODE_COS:
8401e04c3fSmrg   case OPCODE_RSQ:
8501e04c3fSmrg   case OPCODE_POW:
8601e04c3fSmrg   case OPCODE_EX2:
8701e04c3fSmrg   case OPCODE_LOG:
8801e04c3fSmrg      channel_mask = WRITEMASK_X;
8901e04c3fSmrg      break;
9001e04c3fSmrg   case OPCODE_DP2:
9101e04c3fSmrg      channel_mask = WRITEMASK_XY;
9201e04c3fSmrg      break;
9301e04c3fSmrg   case OPCODE_DP3:
9401e04c3fSmrg   case OPCODE_XPD:
9501e04c3fSmrg      channel_mask = WRITEMASK_XYZ;
9601e04c3fSmrg      break;
9701e04c3fSmrg   default:
983464ebd5Sriastradh      channel_mask = WRITEMASK_XYZW;
9901e04c3fSmrg      break;
1003464ebd5Sriastradh   }
1013464ebd5Sriastradh
1023464ebd5Sriastradh   /* Now, given the src swizzle and the written channels, find which
1033464ebd5Sriastradh    * components are actually read
1043464ebd5Sriastradh    */
1053464ebd5Sriastradh   read_mask = 0x0;
1063464ebd5Sriastradh   for (comp = 0; comp < 4; ++comp) {
1073464ebd5Sriastradh      const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
1083464ebd5Sriastradh      if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
1093464ebd5Sriastradh         read_mask |= 1 << coord;
1103464ebd5Sriastradh   }
1113464ebd5Sriastradh
1123464ebd5Sriastradh   return read_mask;
1133464ebd5Sriastradh}
1143464ebd5Sriastradh
1153464ebd5Sriastradh
1163464ebd5Sriastradh/**
1173464ebd5Sriastradh * For a MOV instruction, compute a write mask when src register also has
1183464ebd5Sriastradh * a mask
1193464ebd5Sriastradh */
1203464ebd5Sriastradhstatic GLuint
1213464ebd5Sriastradhget_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
1223464ebd5Sriastradh{
1233464ebd5Sriastradh   const GLuint mask = mov->DstReg.WriteMask;
1243464ebd5Sriastradh   GLuint comp;
1253464ebd5Sriastradh   GLuint updated_mask = 0x0;
1263464ebd5Sriastradh
12701e04c3fSmrg   assert(mov->Opcode == OPCODE_MOV);
1283464ebd5Sriastradh
1293464ebd5Sriastradh   for (comp = 0; comp < 4; ++comp) {
1303464ebd5Sriastradh      GLuint src_comp;
1313464ebd5Sriastradh      if ((mask & (1 << comp)) == 0)
1323464ebd5Sriastradh         continue;
1333464ebd5Sriastradh      src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
1343464ebd5Sriastradh      if ((src_mask & (1 << src_comp)) == 0)
1353464ebd5Sriastradh         continue;
1363464ebd5Sriastradh      updated_mask |= 1 << comp;
1373464ebd5Sriastradh   }
1383464ebd5Sriastradh
1393464ebd5Sriastradh   return updated_mask;
1403464ebd5Sriastradh}
1413464ebd5Sriastradh
1423464ebd5Sriastradh
1433464ebd5Sriastradh/**
1443464ebd5Sriastradh * Ensure that the swizzle is regular.  That is, all of the swizzle
1453464ebd5Sriastradh * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
1463464ebd5Sriastradh */
1473464ebd5Sriastradhstatic GLboolean
1483464ebd5Sriastradhis_swizzle_regular(GLuint swz)
1493464ebd5Sriastradh{
1503464ebd5Sriastradh   return GET_SWZ(swz,0) <= SWIZZLE_W &&
1513464ebd5Sriastradh          GET_SWZ(swz,1) <= SWIZZLE_W &&
1523464ebd5Sriastradh          GET_SWZ(swz,2) <= SWIZZLE_W &&
1533464ebd5Sriastradh          GET_SWZ(swz,3) <= SWIZZLE_W;
1543464ebd5Sriastradh}
1553464ebd5Sriastradh
1563464ebd5Sriastradh
1573464ebd5Sriastradh/**
1583464ebd5Sriastradh * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
1593464ebd5Sriastradh * \return number of instructions removed
1603464ebd5Sriastradh */
1613464ebd5Sriastradhstatic GLuint
16201e04c3fSmrgremove_instructions(struct gl_program *prog, const GLboolean *removeFlags,
16301e04c3fSmrg                    void *mem_ctx)
1643464ebd5Sriastradh{
1653464ebd5Sriastradh   GLint i, removeEnd = 0, removeCount = 0;
1663464ebd5Sriastradh   GLuint totalRemoved = 0;
1673464ebd5Sriastradh
1683464ebd5Sriastradh   /* go backward */
16901e04c3fSmrg   for (i = prog->arb.NumInstructions - 1; i >= 0; i--) {
1703464ebd5Sriastradh      if (removeFlags[i]) {
1713464ebd5Sriastradh         totalRemoved++;
1723464ebd5Sriastradh         if (removeCount == 0) {
1733464ebd5Sriastradh            /* begin a run of instructions to remove */
1743464ebd5Sriastradh            removeEnd = i;
1753464ebd5Sriastradh            removeCount = 1;
1763464ebd5Sriastradh         }
1773464ebd5Sriastradh         else {
1783464ebd5Sriastradh            /* extend the run of instructions to remove */
1793464ebd5Sriastradh            removeCount++;
1803464ebd5Sriastradh         }
1813464ebd5Sriastradh      }
1823464ebd5Sriastradh      else {
1833464ebd5Sriastradh         /* don't remove this instruction, but check if the preceeding
1843464ebd5Sriastradh          * instructions are to be removed.
1853464ebd5Sriastradh          */
1863464ebd5Sriastradh         if (removeCount > 0) {
1873464ebd5Sriastradh            GLint removeStart = removeEnd - removeCount + 1;
18801e04c3fSmrg            _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
1893464ebd5Sriastradh            removeStart = removeCount = 0; /* reset removal info */
1903464ebd5Sriastradh         }
1913464ebd5Sriastradh      }
1923464ebd5Sriastradh   }
1933464ebd5Sriastradh   /* Finish removing if the first instruction was to be removed. */
1943464ebd5Sriastradh   if (removeCount > 0) {
1953464ebd5Sriastradh      GLint removeStart = removeEnd - removeCount + 1;
19601e04c3fSmrg      _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
1973464ebd5Sriastradh   }
1983464ebd5Sriastradh   return totalRemoved;
1993464ebd5Sriastradh}
2003464ebd5Sriastradh
2013464ebd5Sriastradh
2023464ebd5Sriastradh/**
2033464ebd5Sriastradh * Remap register indexes according to map.
2043464ebd5Sriastradh * \param prog  the program to search/replace
2053464ebd5Sriastradh * \param file  the type of register file to search/replace
2063464ebd5Sriastradh * \param map  maps old register indexes to new indexes
2073464ebd5Sriastradh */
2083464ebd5Sriastradhstatic void
2093464ebd5Sriastradhreplace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
2103464ebd5Sriastradh{
2113464ebd5Sriastradh   GLuint i;
2123464ebd5Sriastradh
21301e04c3fSmrg   for (i = 0; i < prog->arb.NumInstructions; i++) {
21401e04c3fSmrg      struct prog_instruction *inst = prog->arb.Instructions + i;
2153464ebd5Sriastradh      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
2163464ebd5Sriastradh      GLuint j;
2173464ebd5Sriastradh      for (j = 0; j < numSrc; j++) {
2183464ebd5Sriastradh         if (inst->SrcReg[j].File == file) {
2193464ebd5Sriastradh            GLuint index = inst->SrcReg[j].Index;
22001e04c3fSmrg            assert(map[index] >= 0);
2213464ebd5Sriastradh            inst->SrcReg[j].Index = map[index];
2223464ebd5Sriastradh         }
2233464ebd5Sriastradh      }
2243464ebd5Sriastradh      if (inst->DstReg.File == file) {
2253464ebd5Sriastradh         const GLuint index = inst->DstReg.Index;
22601e04c3fSmrg         assert(map[index] >= 0);
2273464ebd5Sriastradh         inst->DstReg.Index = map[index];
2283464ebd5Sriastradh      }
2293464ebd5Sriastradh   }
2303464ebd5Sriastradh}
2313464ebd5Sriastradh
2323464ebd5Sriastradh
2333464ebd5Sriastradh/**
2343464ebd5Sriastradh * Remove dead instructions from the given program.
2353464ebd5Sriastradh * This is very primitive for now.  Basically look for temp registers
2363464ebd5Sriastradh * that are written to but never read.  Remove any instructions that
2373464ebd5Sriastradh * write to such registers.  Be careful with condition code setters.
2383464ebd5Sriastradh */
2393464ebd5Sriastradhstatic GLboolean
24001e04c3fSmrg_mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx)
2413464ebd5Sriastradh{
2423464ebd5Sriastradh   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
2433464ebd5Sriastradh   GLboolean *removeInst; /* per-instruction removal flag */
2443464ebd5Sriastradh   GLuint i, rem = 0, comp;
2453464ebd5Sriastradh
2463464ebd5Sriastradh   memset(tempRead, 0, sizeof(tempRead));
2473464ebd5Sriastradh
2483464ebd5Sriastradh   if (dbg) {
2493464ebd5Sriastradh      printf("Optimize: Begin dead code removal\n");
2503464ebd5Sriastradh      /*_mesa_print_program(prog);*/
2513464ebd5Sriastradh   }
2523464ebd5Sriastradh
253af69d88dSmrg   removeInst =
25401e04c3fSmrg      calloc(prog->arb.NumInstructions, sizeof(GLboolean));
2553464ebd5Sriastradh
2563464ebd5Sriastradh   /* Determine which temps are read and written */
25701e04c3fSmrg   for (i = 0; i < prog->arb.NumInstructions; i++) {
25801e04c3fSmrg      const struct prog_instruction *inst = prog->arb.Instructions + i;
2593464ebd5Sriastradh      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
2603464ebd5Sriastradh      GLuint j;
2613464ebd5Sriastradh
2623464ebd5Sriastradh      /* check src regs */
2633464ebd5Sriastradh      for (j = 0; j < numSrc; j++) {
2643464ebd5Sriastradh         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
2653464ebd5Sriastradh            const GLuint index = inst->SrcReg[j].Index;
2663464ebd5Sriastradh            GLuint read_mask;
26701e04c3fSmrg            assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
2683464ebd5Sriastradh	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
2693464ebd5Sriastradh
2703464ebd5Sriastradh            if (inst->SrcReg[j].RelAddr) {
2713464ebd5Sriastradh               if (dbg)
2723464ebd5Sriastradh                  printf("abort remove dead code (indirect temp)\n");
2733464ebd5Sriastradh               goto done;
2743464ebd5Sriastradh            }
2753464ebd5Sriastradh
2763464ebd5Sriastradh	    for (comp = 0; comp < 4; comp++) {
2773464ebd5Sriastradh	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
278af69d88dSmrg               if (swz <= SWIZZLE_W) {
279af69d88dSmrg                  if ((read_mask & (1 << swz)) == 0)
280af69d88dSmrg                     continue;
2813464ebd5Sriastradh                  tempRead[index][swz] = GL_TRUE;
282af69d88dSmrg               }
2833464ebd5Sriastradh	    }
2843464ebd5Sriastradh         }
2853464ebd5Sriastradh      }
2863464ebd5Sriastradh
2873464ebd5Sriastradh      /* check dst reg */
2883464ebd5Sriastradh      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
28901e04c3fSmrg         assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
2903464ebd5Sriastradh
2913464ebd5Sriastradh         if (inst->DstReg.RelAddr) {
2923464ebd5Sriastradh            if (dbg)
2933464ebd5Sriastradh               printf("abort remove dead code (indirect temp)\n");
2943464ebd5Sriastradh            goto done;
2953464ebd5Sriastradh         }
2963464ebd5Sriastradh      }
2973464ebd5Sriastradh   }
2983464ebd5Sriastradh
2993464ebd5Sriastradh   /* find instructions that write to dead registers, flag for removal */
30001e04c3fSmrg   for (i = 0; i < prog->arb.NumInstructions; i++) {
30101e04c3fSmrg      struct prog_instruction *inst = prog->arb.Instructions + i;
3023464ebd5Sriastradh      const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
3033464ebd5Sriastradh
3043464ebd5Sriastradh      if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
3053464ebd5Sriastradh         GLint chan, index = inst->DstReg.Index;
3063464ebd5Sriastradh
3073464ebd5Sriastradh	 for (chan = 0; chan < 4; chan++) {
3083464ebd5Sriastradh	    if (!tempRead[index][chan] &&
3093464ebd5Sriastradh		inst->DstReg.WriteMask & (1 << chan)) {
3103464ebd5Sriastradh	       if (dbg) {
3113464ebd5Sriastradh		  printf("Remove writemask on %u.%c\n", i,
3123464ebd5Sriastradh			       chan == 3 ? 'w' : 'x' + chan);
3133464ebd5Sriastradh	       }
3143464ebd5Sriastradh	       inst->DstReg.WriteMask &= ~(1 << chan);
3153464ebd5Sriastradh	       rem++;
3163464ebd5Sriastradh	    }
3173464ebd5Sriastradh	 }
3183464ebd5Sriastradh
3193464ebd5Sriastradh	 if (inst->DstReg.WriteMask == 0) {
3203464ebd5Sriastradh	    /* If we cleared all writes, the instruction can be removed. */
3213464ebd5Sriastradh	    if (dbg)
3223464ebd5Sriastradh	       printf("Remove instruction %u: \n", i);
3233464ebd5Sriastradh	    removeInst[i] = GL_TRUE;
3243464ebd5Sriastradh	 }
3253464ebd5Sriastradh      }
3263464ebd5Sriastradh   }
3273464ebd5Sriastradh
3283464ebd5Sriastradh   /* now remove the instructions which aren't needed */
32901e04c3fSmrg   rem = remove_instructions(prog, removeInst, mem_ctx);
3303464ebd5Sriastradh
3313464ebd5Sriastradh   if (dbg) {
3323464ebd5Sriastradh      printf("Optimize: End dead code removal.\n");
3333464ebd5Sriastradh      printf("  %u channel writes removed\n", rem);
3343464ebd5Sriastradh      printf("  %u instructions removed\n", rem);
3353464ebd5Sriastradh      /*_mesa_print_program(prog);*/
3363464ebd5Sriastradh   }
3373464ebd5Sriastradh
3383464ebd5Sriastradhdone:
3393464ebd5Sriastradh   free(removeInst);
3403464ebd5Sriastradh   return rem != 0;
3413464ebd5Sriastradh}
3423464ebd5Sriastradh
3433464ebd5Sriastradh
3443464ebd5Sriastradhenum inst_use
3453464ebd5Sriastradh{
3463464ebd5Sriastradh   READ,
3473464ebd5Sriastradh   WRITE,
3483464ebd5Sriastradh   FLOW,
3493464ebd5Sriastradh   END
3503464ebd5Sriastradh};
3513464ebd5Sriastradh
3523464ebd5Sriastradh
3533464ebd5Sriastradh/**
3543464ebd5Sriastradh * Scan forward in program from 'start' for the next occurances of TEMP[index].
3553464ebd5Sriastradh * We look if an instruction reads the component given by the masks and if they
3563464ebd5Sriastradh * are overwritten.
3573464ebd5Sriastradh * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
3583464ebd5Sriastradh * that we can't look further.
3593464ebd5Sriastradh */
3603464ebd5Sriastradhstatic enum inst_use
3613464ebd5Sriastradhfind_next_use(const struct gl_program *prog,
3623464ebd5Sriastradh              GLuint start,
3633464ebd5Sriastradh              GLuint index,
3643464ebd5Sriastradh              GLuint mask)
3653464ebd5Sriastradh{
3663464ebd5Sriastradh   GLuint i;
3673464ebd5Sriastradh
36801e04c3fSmrg   for (i = start; i < prog->arb.NumInstructions; i++) {
36901e04c3fSmrg      const struct prog_instruction *inst = prog->arb.Instructions + i;
3703464ebd5Sriastradh      switch (inst->Opcode) {
3713464ebd5Sriastradh      case OPCODE_BGNLOOP:
3723464ebd5Sriastradh      case OPCODE_BGNSUB:
3733464ebd5Sriastradh      case OPCODE_CAL:
3743464ebd5Sriastradh      case OPCODE_CONT:
3753464ebd5Sriastradh      case OPCODE_IF:
3763464ebd5Sriastradh      case OPCODE_ELSE:
3773464ebd5Sriastradh      case OPCODE_ENDIF:
3783464ebd5Sriastradh      case OPCODE_ENDLOOP:
3793464ebd5Sriastradh      case OPCODE_ENDSUB:
3803464ebd5Sriastradh      case OPCODE_RET:
3813464ebd5Sriastradh         return FLOW;
3823464ebd5Sriastradh      case OPCODE_END:
3833464ebd5Sriastradh         return END;
3843464ebd5Sriastradh      default:
3853464ebd5Sriastradh         {
3863464ebd5Sriastradh            const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
3873464ebd5Sriastradh            GLuint j;
3883464ebd5Sriastradh            for (j = 0; j < numSrc; j++) {
3893464ebd5Sriastradh               if (inst->SrcReg[j].RelAddr ||
3903464ebd5Sriastradh                   (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
39101e04c3fSmrg                   inst->SrcReg[j].Index == (GLint)index &&
3923464ebd5Sriastradh                   (get_src_arg_mask(inst,j,NO_MASK) & mask)))
3933464ebd5Sriastradh                  return READ;
3943464ebd5Sriastradh            }
3953464ebd5Sriastradh            if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
3963464ebd5Sriastradh                inst->DstReg.File == PROGRAM_TEMPORARY &&
3973464ebd5Sriastradh                inst->DstReg.Index == index) {
3983464ebd5Sriastradh               mask &= ~inst->DstReg.WriteMask;
3993464ebd5Sriastradh               if (mask == 0)
4003464ebd5Sriastradh                  return WRITE;
4013464ebd5Sriastradh            }
4023464ebd5Sriastradh         }
4033464ebd5Sriastradh      }
4043464ebd5Sriastradh   }
4053464ebd5Sriastradh   return END;
4063464ebd5Sriastradh}
4073464ebd5Sriastradh
4083464ebd5Sriastradh
4093464ebd5Sriastradh/**
4103464ebd5Sriastradh * Is the given instruction opcode a flow-control opcode?
4113464ebd5Sriastradh * XXX maybe move this into prog_instruction.[ch]
4123464ebd5Sriastradh */
4133464ebd5Sriastradhstatic GLboolean
4143464ebd5Sriastradh_mesa_is_flow_control_opcode(enum prog_opcode opcode)
4153464ebd5Sriastradh{
4163464ebd5Sriastradh   switch (opcode) {
4173464ebd5Sriastradh   case OPCODE_BGNLOOP:
4183464ebd5Sriastradh   case OPCODE_BGNSUB:
4193464ebd5Sriastradh   case OPCODE_CAL:
4203464ebd5Sriastradh   case OPCODE_CONT:
4213464ebd5Sriastradh   case OPCODE_IF:
4223464ebd5Sriastradh   case OPCODE_ELSE:
4233464ebd5Sriastradh   case OPCODE_END:
4243464ebd5Sriastradh   case OPCODE_ENDIF:
4253464ebd5Sriastradh   case OPCODE_ENDLOOP:
4263464ebd5Sriastradh   case OPCODE_ENDSUB:
4273464ebd5Sriastradh   case OPCODE_RET:
4283464ebd5Sriastradh      return GL_TRUE;
4293464ebd5Sriastradh   default:
4303464ebd5Sriastradh      return GL_FALSE;
4313464ebd5Sriastradh   }
4323464ebd5Sriastradh}
4333464ebd5Sriastradh
4343464ebd5Sriastradh
4353464ebd5Sriastradh/**
4363464ebd5Sriastradh * Test if the given instruction is a simple MOV (no conditional updating,
4373464ebd5Sriastradh * not relative addressing, no negation/abs, etc).
4383464ebd5Sriastradh */
4393464ebd5Sriastradhstatic GLboolean
4403464ebd5Sriastradhcan_downward_mov_be_modifed(const struct prog_instruction *mov)
4413464ebd5Sriastradh{
4423464ebd5Sriastradh   return
4433464ebd5Sriastradh      mov->Opcode == OPCODE_MOV &&
4443464ebd5Sriastradh      mov->SrcReg[0].RelAddr == 0 &&
4453464ebd5Sriastradh      mov->SrcReg[0].Negate == 0 &&
44601e04c3fSmrg      mov->DstReg.RelAddr == 0;
4473464ebd5Sriastradh}
4483464ebd5Sriastradh
4493464ebd5Sriastradh
4503464ebd5Sriastradhstatic GLboolean
4513464ebd5Sriastradhcan_upward_mov_be_modifed(const struct prog_instruction *mov)
4523464ebd5Sriastradh{
4533464ebd5Sriastradh   return
4543464ebd5Sriastradh      can_downward_mov_be_modifed(mov) &&
455af69d88dSmrg      mov->DstReg.File == PROGRAM_TEMPORARY &&
45601e04c3fSmrg      !mov->Saturate;
4573464ebd5Sriastradh}
4583464ebd5Sriastradh
4593464ebd5Sriastradh
4603464ebd5Sriastradh/**
4613464ebd5Sriastradh * Try to remove use of extraneous MOV instructions, to free them up for dead
4623464ebd5Sriastradh * code removal.
4633464ebd5Sriastradh */
4643464ebd5Sriastradhstatic void
4653464ebd5Sriastradh_mesa_remove_extra_move_use(struct gl_program *prog)
4663464ebd5Sriastradh{
4673464ebd5Sriastradh   GLuint i, j;
4683464ebd5Sriastradh
4693464ebd5Sriastradh   if (dbg) {
4703464ebd5Sriastradh      printf("Optimize: Begin remove extra move use\n");
4713464ebd5Sriastradh      _mesa_print_program(prog);
4723464ebd5Sriastradh   }
4733464ebd5Sriastradh
4743464ebd5Sriastradh   /*
4753464ebd5Sriastradh    * Look for sequences such as this:
4763464ebd5Sriastradh    *    MOV tmpX, arg0;
4773464ebd5Sriastradh    *    ...
4783464ebd5Sriastradh    *    FOO tmpY, tmpX, arg1;
4793464ebd5Sriastradh    * and convert into:
4803464ebd5Sriastradh    *    MOV tmpX, arg0;
4813464ebd5Sriastradh    *    ...
4823464ebd5Sriastradh    *    FOO tmpY, arg0, arg1;
4833464ebd5Sriastradh    */
4843464ebd5Sriastradh
48501e04c3fSmrg   for (i = 0; i + 1 < prog->arb.NumInstructions; i++) {
48601e04c3fSmrg      const struct prog_instruction *mov = prog->arb.Instructions + i;
4873464ebd5Sriastradh      GLuint dst_mask, src_mask;
4883464ebd5Sriastradh      if (can_upward_mov_be_modifed(mov) == GL_FALSE)
4893464ebd5Sriastradh         continue;
4903464ebd5Sriastradh
4913464ebd5Sriastradh      /* Scanning the code, we maintain the components which are still active in
4923464ebd5Sriastradh       * these two masks
4933464ebd5Sriastradh       */
4943464ebd5Sriastradh      dst_mask = mov->DstReg.WriteMask;
4953464ebd5Sriastradh      src_mask = get_src_arg_mask(mov, 0, NO_MASK);
4963464ebd5Sriastradh
4973464ebd5Sriastradh      /* Walk through remaining instructions until the or src reg gets
4983464ebd5Sriastradh       * rewritten or we get into some flow-control, eliminating the use of
4993464ebd5Sriastradh       * this MOV.
5003464ebd5Sriastradh       */
50101e04c3fSmrg      for (j = i + 1; j < prog->arb.NumInstructions; j++) {
50201e04c3fSmrg         struct prog_instruction *inst2 = prog->arb.Instructions + j;
5033464ebd5Sriastradh         GLuint arg;
5043464ebd5Sriastradh
5053464ebd5Sriastradh	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
5063464ebd5Sriastradh	     break;
5073464ebd5Sriastradh
5083464ebd5Sriastradh	 /* First rewrite this instruction's args if appropriate. */
5093464ebd5Sriastradh	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
5103464ebd5Sriastradh	    GLuint comp, read_mask;
5113464ebd5Sriastradh
5123464ebd5Sriastradh	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
5133464ebd5Sriastradh		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
51401e04c3fSmrg		inst2->SrcReg[arg].RelAddr)
5153464ebd5Sriastradh	       continue;
5163464ebd5Sriastradh            read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
5173464ebd5Sriastradh
5183464ebd5Sriastradh	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
5193464ebd5Sriastradh             * components read still come from the mov instructions
5203464ebd5Sriastradh             */
5213464ebd5Sriastradh            if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
5223464ebd5Sriastradh               (read_mask & dst_mask) == read_mask) {
5233464ebd5Sriastradh               for (comp = 0; comp < 4; comp++) {
5243464ebd5Sriastradh                  const GLuint inst2_swz =
5253464ebd5Sriastradh                     GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
5263464ebd5Sriastradh                  const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
5273464ebd5Sriastradh                  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
5283464ebd5Sriastradh                  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
5293464ebd5Sriastradh                  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
5303464ebd5Sriastradh                                                  inst2_swz) & 0x1) << comp);
5313464ebd5Sriastradh               }
5323464ebd5Sriastradh               inst2->SrcReg[arg].File = mov->SrcReg[0].File;
5333464ebd5Sriastradh               inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
5343464ebd5Sriastradh            }
5353464ebd5Sriastradh	 }
5363464ebd5Sriastradh
5373464ebd5Sriastradh	 /* The source of MOV is written. This potentially deactivates some
5383464ebd5Sriastradh          * components from the src and dst of the MOV instruction
5393464ebd5Sriastradh          */
5403464ebd5Sriastradh	 if (inst2->DstReg.File == mov->DstReg.File &&
5413464ebd5Sriastradh	     (inst2->DstReg.RelAddr ||
5423464ebd5Sriastradh	      inst2->DstReg.Index == mov->DstReg.Index)) {
5433464ebd5Sriastradh            dst_mask &= ~inst2->DstReg.WriteMask;
5443464ebd5Sriastradh            src_mask = get_src_arg_mask(mov, 0, dst_mask);
5453464ebd5Sriastradh         }
5463464ebd5Sriastradh
5473464ebd5Sriastradh         /* Idem when the destination of mov is written */
5483464ebd5Sriastradh	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
5493464ebd5Sriastradh	     (inst2->DstReg.RelAddr ||
5503464ebd5Sriastradh	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
5513464ebd5Sriastradh            src_mask &= ~inst2->DstReg.WriteMask;
5523464ebd5Sriastradh            dst_mask &= get_dst_mask_for_mov(mov, src_mask);
5533464ebd5Sriastradh         }
5543464ebd5Sriastradh         if (dst_mask == 0)
5553464ebd5Sriastradh            break;
5563464ebd5Sriastradh      }
5573464ebd5Sriastradh   }
5583464ebd5Sriastradh
5593464ebd5Sriastradh   if (dbg) {
5603464ebd5Sriastradh      printf("Optimize: End remove extra move use.\n");
5613464ebd5Sriastradh      /*_mesa_print_program(prog);*/
5623464ebd5Sriastradh   }
5633464ebd5Sriastradh}
5643464ebd5Sriastradh
5653464ebd5Sriastradh
5663464ebd5Sriastradh/**
5673464ebd5Sriastradh * Complements dead_code_global. Try to remove code in block of code by
5683464ebd5Sriastradh * carefully monitoring the swizzles. Both functions should be merged into one
5693464ebd5Sriastradh * with a proper control flow graph
5703464ebd5Sriastradh */
5713464ebd5Sriastradhstatic GLboolean
57201e04c3fSmrg_mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx)
5733464ebd5Sriastradh{
5743464ebd5Sriastradh   GLboolean *removeInst;
5753464ebd5Sriastradh   GLuint i, arg, rem = 0;
5763464ebd5Sriastradh
577af69d88dSmrg   removeInst =
57801e04c3fSmrg      calloc(prog->arb.NumInstructions, sizeof(GLboolean));
5793464ebd5Sriastradh
58001e04c3fSmrg   for (i = 0; i < prog->arb.NumInstructions; i++) {
58101e04c3fSmrg      const struct prog_instruction *inst = prog->arb.Instructions + i;
5823464ebd5Sriastradh      const GLuint index = inst->DstReg.Index;
5833464ebd5Sriastradh      const GLuint mask = inst->DstReg.WriteMask;
5843464ebd5Sriastradh      enum inst_use use;
5853464ebd5Sriastradh
5863464ebd5Sriastradh      /* We must deactivate the pass as soon as some indirection is used */
5873464ebd5Sriastradh      if (inst->DstReg.RelAddr)
5883464ebd5Sriastradh         goto done;
5893464ebd5Sriastradh      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
5903464ebd5Sriastradh         if (inst->SrcReg[arg].RelAddr)
5913464ebd5Sriastradh            goto done;
5923464ebd5Sriastradh
5933464ebd5Sriastradh      if (_mesa_is_flow_control_opcode(inst->Opcode) ||
5943464ebd5Sriastradh          _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
5953464ebd5Sriastradh          inst->DstReg.File != PROGRAM_TEMPORARY ||
5963464ebd5Sriastradh          inst->DstReg.RelAddr)
5973464ebd5Sriastradh         continue;
5983464ebd5Sriastradh
5993464ebd5Sriastradh      use = find_next_use(prog, i+1, index, mask);
6003464ebd5Sriastradh      if (use == WRITE || use == END)
6013464ebd5Sriastradh         removeInst[i] = GL_TRUE;
6023464ebd5Sriastradh   }
6033464ebd5Sriastradh
60401e04c3fSmrg   rem = remove_instructions(prog, removeInst, mem_ctx);
6053464ebd5Sriastradh
6063464ebd5Sriastradhdone:
6073464ebd5Sriastradh   free(removeInst);
6083464ebd5Sriastradh   return rem != 0;
6093464ebd5Sriastradh}
6103464ebd5Sriastradh
6113464ebd5Sriastradh
6123464ebd5Sriastradh/**
6133464ebd5Sriastradh * Try to inject the destination of mov as the destination of inst and recompute
6143464ebd5Sriastradh * the swizzles operators for the sources of inst if required. Return GL_TRUE
6153464ebd5Sriastradh * of the substitution was possible, GL_FALSE otherwise
6163464ebd5Sriastradh */
6173464ebd5Sriastradhstatic GLboolean
6183464ebd5Sriastradh_mesa_merge_mov_into_inst(struct prog_instruction *inst,
6193464ebd5Sriastradh                          const struct prog_instruction *mov)
6203464ebd5Sriastradh{
6213464ebd5Sriastradh   /* Indirection table which associates destination and source components for
6223464ebd5Sriastradh    * the mov instruction
6233464ebd5Sriastradh    */
6243464ebd5Sriastradh   const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
6253464ebd5Sriastradh
6263464ebd5Sriastradh   /* Some components are not written by inst. We cannot remove the mov */
6273464ebd5Sriastradh   if (mask != (inst->DstReg.WriteMask & mask))
6283464ebd5Sriastradh      return GL_FALSE;
6293464ebd5Sriastradh
63001e04c3fSmrg   inst->Saturate |= mov->Saturate;
631af69d88dSmrg
6323464ebd5Sriastradh   /* Depending on the instruction, we may need to recompute the swizzles.
6333464ebd5Sriastradh    * Also, some other instructions (like TEX) are not linear. We will only
6343464ebd5Sriastradh    * consider completely active sources and destinations
6353464ebd5Sriastradh    */
6363464ebd5Sriastradh   switch (inst->Opcode) {
6373464ebd5Sriastradh
6383464ebd5Sriastradh   /* Carstesian instructions: we compute the swizzle */
6393464ebd5Sriastradh   case OPCODE_MOV:
6403464ebd5Sriastradh   case OPCODE_MIN:
6413464ebd5Sriastradh   case OPCODE_MAX:
6423464ebd5Sriastradh   case OPCODE_ABS:
6433464ebd5Sriastradh   case OPCODE_ADD:
6443464ebd5Sriastradh   case OPCODE_MAD:
6453464ebd5Sriastradh   case OPCODE_MUL:
6463464ebd5Sriastradh   case OPCODE_SUB:
6473464ebd5Sriastradh   {
6483464ebd5Sriastradh      GLuint dst_to_src_comp[4] = {0,0,0,0};
6493464ebd5Sriastradh      GLuint dst_comp, arg;
6503464ebd5Sriastradh      for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
6513464ebd5Sriastradh         if (mov->DstReg.WriteMask & (1 << dst_comp)) {
6523464ebd5Sriastradh            const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
65301e04c3fSmrg            assert(src_comp < 4);
6543464ebd5Sriastradh            dst_to_src_comp[dst_comp] = src_comp;
6553464ebd5Sriastradh         }
6563464ebd5Sriastradh      }
6573464ebd5Sriastradh
6583464ebd5Sriastradh      /* Patch each source of the instruction */
6593464ebd5Sriastradh      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
6603464ebd5Sriastradh         const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
6613464ebd5Sriastradh         inst->SrcReg[arg].Swizzle = 0;
6623464ebd5Sriastradh
6633464ebd5Sriastradh         /* Reset each active component of the swizzle */
6643464ebd5Sriastradh         for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
6653464ebd5Sriastradh            GLuint src_comp, arg_comp;
6663464ebd5Sriastradh            if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
6673464ebd5Sriastradh               continue;
6683464ebd5Sriastradh            src_comp = dst_to_src_comp[dst_comp];
66901e04c3fSmrg            assert(src_comp < 4);
6703464ebd5Sriastradh            arg_comp = GET_SWZ(arg_swz, src_comp);
67101e04c3fSmrg            assert(arg_comp < 4);
6723464ebd5Sriastradh            inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
6733464ebd5Sriastradh         }
6743464ebd5Sriastradh      }
6753464ebd5Sriastradh      inst->DstReg = mov->DstReg;
6763464ebd5Sriastradh      return GL_TRUE;
6773464ebd5Sriastradh   }
6783464ebd5Sriastradh
6793464ebd5Sriastradh   /* Dot products and scalar instructions: we only change the destination */
6803464ebd5Sriastradh   case OPCODE_RCP:
6813464ebd5Sriastradh   case OPCODE_SIN:
6823464ebd5Sriastradh   case OPCODE_COS:
6833464ebd5Sriastradh   case OPCODE_RSQ:
6843464ebd5Sriastradh   case OPCODE_POW:
6853464ebd5Sriastradh   case OPCODE_EX2:
6863464ebd5Sriastradh   case OPCODE_LOG:
6873464ebd5Sriastradh   case OPCODE_DP2:
6883464ebd5Sriastradh   case OPCODE_DP3:
6893464ebd5Sriastradh   case OPCODE_DP4:
6903464ebd5Sriastradh      inst->DstReg = mov->DstReg;
6913464ebd5Sriastradh      return GL_TRUE;
6923464ebd5Sriastradh
6933464ebd5Sriastradh   /* All other instructions require fully active components with no swizzle */
6943464ebd5Sriastradh   default:
6953464ebd5Sriastradh      if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
6963464ebd5Sriastradh          inst->DstReg.WriteMask != WRITEMASK_XYZW)
6973464ebd5Sriastradh         return GL_FALSE;
6983464ebd5Sriastradh      inst->DstReg = mov->DstReg;
6993464ebd5Sriastradh      return GL_TRUE;
7003464ebd5Sriastradh   }
7013464ebd5Sriastradh}
7023464ebd5Sriastradh
7033464ebd5Sriastradh
7043464ebd5Sriastradh/**
7053464ebd5Sriastradh * Try to remove extraneous MOV instructions from the given program.
7063464ebd5Sriastradh */
7073464ebd5Sriastradhstatic GLboolean
70801e04c3fSmrg_mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx)
7093464ebd5Sriastradh{
7103464ebd5Sriastradh   GLboolean *removeInst; /* per-instruction removal flag */
7113464ebd5Sriastradh   GLuint i, rem = 0, nesting = 0;
7123464ebd5Sriastradh
7133464ebd5Sriastradh   if (dbg) {
7143464ebd5Sriastradh      printf("Optimize: Begin remove extra moves\n");
7153464ebd5Sriastradh      _mesa_print_program(prog);
7163464ebd5Sriastradh   }
7173464ebd5Sriastradh
718af69d88dSmrg   removeInst =
71901e04c3fSmrg      calloc(prog->arb.NumInstructions, sizeof(GLboolean));
7203464ebd5Sriastradh
7213464ebd5Sriastradh   /*
7223464ebd5Sriastradh    * Look for sequences such as this:
7233464ebd5Sriastradh    *    FOO tmpX, arg0, arg1;
7243464ebd5Sriastradh    *    MOV tmpY, tmpX;
7253464ebd5Sriastradh    * and convert into:
7263464ebd5Sriastradh    *    FOO tmpY, arg0, arg1;
7273464ebd5Sriastradh    */
7283464ebd5Sriastradh
72901e04c3fSmrg   for (i = 0; i < prog->arb.NumInstructions; i++) {
73001e04c3fSmrg      const struct prog_instruction *mov = prog->arb.Instructions + i;
7313464ebd5Sriastradh
7323464ebd5Sriastradh      switch (mov->Opcode) {
7333464ebd5Sriastradh      case OPCODE_BGNLOOP:
7343464ebd5Sriastradh      case OPCODE_BGNSUB:
7353464ebd5Sriastradh      case OPCODE_IF:
7363464ebd5Sriastradh         nesting++;
7373464ebd5Sriastradh         break;
7383464ebd5Sriastradh      case OPCODE_ENDLOOP:
7393464ebd5Sriastradh      case OPCODE_ENDSUB:
7403464ebd5Sriastradh      case OPCODE_ENDIF:
7413464ebd5Sriastradh         nesting--;
7423464ebd5Sriastradh         break;
7433464ebd5Sriastradh      case OPCODE_MOV:
7443464ebd5Sriastradh         if (i > 0 &&
7453464ebd5Sriastradh             can_downward_mov_be_modifed(mov) &&
7463464ebd5Sriastradh             mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
7473464ebd5Sriastradh             nesting == 0)
7483464ebd5Sriastradh         {
7493464ebd5Sriastradh
7503464ebd5Sriastradh            /* see if this MOV can be removed */
7513464ebd5Sriastradh            const GLuint id = mov->SrcReg[0].Index;
7523464ebd5Sriastradh            struct prog_instruction *prevInst;
7533464ebd5Sriastradh            GLuint prevI;
7543464ebd5Sriastradh
7553464ebd5Sriastradh            /* get pointer to previous instruction */
7563464ebd5Sriastradh            prevI = i - 1;
7573464ebd5Sriastradh            while (prevI > 0 && removeInst[prevI])
7583464ebd5Sriastradh               prevI--;
75901e04c3fSmrg            prevInst = prog->arb.Instructions + prevI;
7603464ebd5Sriastradh
7613464ebd5Sriastradh            if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
7623464ebd5Sriastradh                prevInst->DstReg.Index == id &&
76301e04c3fSmrg                prevInst->DstReg.RelAddr == 0) {
7643464ebd5Sriastradh
7653464ebd5Sriastradh               const GLuint dst_mask = prevInst->DstReg.WriteMask;
7663464ebd5Sriastradh               enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
7673464ebd5Sriastradh
7683464ebd5Sriastradh               if (next_use == WRITE || next_use == END) {
7693464ebd5Sriastradh                  /* OK, we can safely remove this MOV instruction.
7703464ebd5Sriastradh                   * Transform:
7713464ebd5Sriastradh                   *   prevI: FOO tempIndex, x, y;
7723464ebd5Sriastradh                   *       i: MOV z, tempIndex;
7733464ebd5Sriastradh                   * Into:
7743464ebd5Sriastradh                   *   prevI: FOO z, x, y;
7753464ebd5Sriastradh                   */
7763464ebd5Sriastradh                  if (_mesa_merge_mov_into_inst(prevInst, mov)) {
7773464ebd5Sriastradh                     removeInst[i] = GL_TRUE;
7783464ebd5Sriastradh                     if (dbg) {
7793464ebd5Sriastradh                        printf("Remove MOV at %u\n", i);
7803464ebd5Sriastradh                        printf("new prev inst %u: ", prevI);
7813464ebd5Sriastradh                        _mesa_print_instruction(prevInst);
7823464ebd5Sriastradh                     }
7833464ebd5Sriastradh                  }
7843464ebd5Sriastradh               }
7853464ebd5Sriastradh            }
7863464ebd5Sriastradh         }
7873464ebd5Sriastradh         break;
7883464ebd5Sriastradh      default:
7893464ebd5Sriastradh         ; /* nothing */
7903464ebd5Sriastradh      }
7913464ebd5Sriastradh   }
7923464ebd5Sriastradh
7933464ebd5Sriastradh   /* now remove the instructions which aren't needed */
79401e04c3fSmrg   rem = remove_instructions(prog, removeInst, mem_ctx);
7953464ebd5Sriastradh
7963464ebd5Sriastradh   free(removeInst);
7973464ebd5Sriastradh
7983464ebd5Sriastradh   if (dbg) {
7993464ebd5Sriastradh      printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
8003464ebd5Sriastradh      /*_mesa_print_program(prog);*/
8013464ebd5Sriastradh   }
8023464ebd5Sriastradh
8033464ebd5Sriastradh   return rem != 0;
8043464ebd5Sriastradh}
8053464ebd5Sriastradh
8063464ebd5Sriastradh
8073464ebd5Sriastradh/** A live register interval */
8083464ebd5Sriastradhstruct interval
8093464ebd5Sriastradh{
8103464ebd5Sriastradh   GLuint Reg;         /** The temporary register index */
8113464ebd5Sriastradh   GLuint Start, End;  /** Start/end instruction numbers */
8123464ebd5Sriastradh};
8133464ebd5Sriastradh
8143464ebd5Sriastradh
8153464ebd5Sriastradh/** A list of register intervals */
8163464ebd5Sriastradhstruct interval_list
8173464ebd5Sriastradh{
8183464ebd5Sriastradh   GLuint Num;
8193464ebd5Sriastradh   struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
8203464ebd5Sriastradh};
8213464ebd5Sriastradh
8223464ebd5Sriastradh
8233464ebd5Sriastradhstatic void
8243464ebd5Sriastradhappend_interval(struct interval_list *list, const struct interval *inv)
8253464ebd5Sriastradh{
8263464ebd5Sriastradh   list->Intervals[list->Num++] = *inv;
8273464ebd5Sriastradh}
8283464ebd5Sriastradh
8293464ebd5Sriastradh
8303464ebd5Sriastradh/** Insert interval inv into list, sorted by interval end */
8313464ebd5Sriastradhstatic void
8323464ebd5Sriastradhinsert_interval_by_end(struct interval_list *list, const struct interval *inv)
8333464ebd5Sriastradh{
8343464ebd5Sriastradh   /* XXX we could do a binary search insertion here since list is sorted */
8353464ebd5Sriastradh   GLint i = list->Num - 1;
8363464ebd5Sriastradh   while (i >= 0 && list->Intervals[i].End > inv->End) {
8373464ebd5Sriastradh      list->Intervals[i + 1] = list->Intervals[i];
8383464ebd5Sriastradh      i--;
8393464ebd5Sriastradh   }
8403464ebd5Sriastradh   list->Intervals[i + 1] = *inv;
8413464ebd5Sriastradh   list->Num++;
8423464ebd5Sriastradh
8433464ebd5Sriastradh#ifdef DEBUG
8443464ebd5Sriastradh   {
8453464ebd5Sriastradh      GLuint i;
8463464ebd5Sriastradh      for (i = 0; i + 1 < list->Num; i++) {
84701e04c3fSmrg         assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
8483464ebd5Sriastradh      }
8493464ebd5Sriastradh   }
8503464ebd5Sriastradh#endif
8513464ebd5Sriastradh}
8523464ebd5Sriastradh
8533464ebd5Sriastradh
8543464ebd5Sriastradh/** Remove the given interval from the interval list */
8553464ebd5Sriastradhstatic void
8563464ebd5Sriastradhremove_interval(struct interval_list *list, const struct interval *inv)
8573464ebd5Sriastradh{
8583464ebd5Sriastradh   /* XXX we could binary search since list is sorted */
8593464ebd5Sriastradh   GLuint k;
8603464ebd5Sriastradh   for (k = 0; k < list->Num; k++) {
8613464ebd5Sriastradh      if (list->Intervals[k].Reg == inv->Reg) {
8623464ebd5Sriastradh         /* found, remove it */
86301e04c3fSmrg         assert(list->Intervals[k].Start == inv->Start);
86401e04c3fSmrg         assert(list->Intervals[k].End == inv->End);
8653464ebd5Sriastradh         while (k < list->Num - 1) {
8663464ebd5Sriastradh            list->Intervals[k] = list->Intervals[k + 1];
8673464ebd5Sriastradh            k++;
8683464ebd5Sriastradh         }
8693464ebd5Sriastradh         list->Num--;
8703464ebd5Sriastradh         return;
8713464ebd5Sriastradh      }
8723464ebd5Sriastradh   }
8733464ebd5Sriastradh}
8743464ebd5Sriastradh
8753464ebd5Sriastradh
8763464ebd5Sriastradh/** called by qsort() */
8773464ebd5Sriastradhstatic int
8783464ebd5Sriastradhcompare_start(const void *a, const void *b)
8793464ebd5Sriastradh{
8803464ebd5Sriastradh   const struct interval *ia = (const struct interval *) a;
8813464ebd5Sriastradh   const struct interval *ib = (const struct interval *) b;
8823464ebd5Sriastradh   if (ia->Start < ib->Start)
8833464ebd5Sriastradh      return -1;
8843464ebd5Sriastradh   else if (ia->Start > ib->Start)
8853464ebd5Sriastradh      return +1;
8863464ebd5Sriastradh   else
8873464ebd5Sriastradh      return 0;
8883464ebd5Sriastradh}
8893464ebd5Sriastradh
8903464ebd5Sriastradh
8913464ebd5Sriastradh/** sort the interval list according to interval starts */
8923464ebd5Sriastradhstatic void
8933464ebd5Sriastradhsort_interval_list_by_start(struct interval_list *list)
8943464ebd5Sriastradh{
8953464ebd5Sriastradh   qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
8963464ebd5Sriastradh#ifdef DEBUG
8973464ebd5Sriastradh   {
8983464ebd5Sriastradh      GLuint i;
8993464ebd5Sriastradh      for (i = 0; i + 1 < list->Num; i++) {
90001e04c3fSmrg         assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
9013464ebd5Sriastradh      }
9023464ebd5Sriastradh   }
9033464ebd5Sriastradh#endif
9043464ebd5Sriastradh}
9053464ebd5Sriastradh
9063464ebd5Sriastradhstruct loop_info
9073464ebd5Sriastradh{
9083464ebd5Sriastradh   GLuint Start, End;  /**< Start, end instructions of loop */
9093464ebd5Sriastradh};
9103464ebd5Sriastradh
9113464ebd5Sriastradh/**
9123464ebd5Sriastradh * Update the intermediate interval info for register 'index' and
9133464ebd5Sriastradh * instruction 'ic'.
9143464ebd5Sriastradh */
9153464ebd5Sriastradhstatic void
9163464ebd5Sriastradhupdate_interval(GLint intBegin[], GLint intEnd[],
9173464ebd5Sriastradh		struct loop_info *loopStack, GLuint loopStackDepth,
9183464ebd5Sriastradh		GLuint index, GLuint ic)
9193464ebd5Sriastradh{
92001e04c3fSmrg   unsigned i;
9213464ebd5Sriastradh   GLuint begin = ic;
9223464ebd5Sriastradh   GLuint end = ic;
9233464ebd5Sriastradh
9243464ebd5Sriastradh   /* If the register is used in a loop, extend its lifetime through the end
9253464ebd5Sriastradh    * of the outermost loop that doesn't contain its definition.
9263464ebd5Sriastradh    */
9273464ebd5Sriastradh   for (i = 0; i < loopStackDepth; i++) {
9283464ebd5Sriastradh      if (intBegin[index] < loopStack[i].Start) {
9293464ebd5Sriastradh	 end = loopStack[i].End;
9303464ebd5Sriastradh	 break;
9313464ebd5Sriastradh      }
9323464ebd5Sriastradh   }
9333464ebd5Sriastradh
9343464ebd5Sriastradh   /* Variables that are live at the end of a loop will also be live at the
9353464ebd5Sriastradh    * beginning, so an instruction inside of a loop should have its live
9363464ebd5Sriastradh    * interval begin at the start of the outermost loop.
9373464ebd5Sriastradh    */
9383464ebd5Sriastradh   if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
9393464ebd5Sriastradh      begin = loopStack[0].Start;
9403464ebd5Sriastradh   }
9413464ebd5Sriastradh
94201e04c3fSmrg   assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
9433464ebd5Sriastradh   if (intBegin[index] == -1) {
94401e04c3fSmrg      assert(intEnd[index] == -1);
9453464ebd5Sriastradh      intBegin[index] = begin;
9463464ebd5Sriastradh      intEnd[index] = end;
9473464ebd5Sriastradh   }
9483464ebd5Sriastradh   else {
9493464ebd5Sriastradh      intEnd[index] = end;
9503464ebd5Sriastradh   }
9513464ebd5Sriastradh}
9523464ebd5Sriastradh
9533464ebd5Sriastradh
9543464ebd5Sriastradh/**
9553464ebd5Sriastradh * Find first/last instruction that references each temporary register.
9563464ebd5Sriastradh */
9577ec681f3Smrgstatic bool
9587ec681f3Smrgfind_temp_intervals(const struct prog_instruction *instructions,
9597ec681f3Smrg                    GLuint numInstructions,
9607ec681f3Smrg                    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
9617ec681f3Smrg                    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
9623464ebd5Sriastradh{
9633464ebd5Sriastradh   struct loop_info loopStack[MAX_LOOP_NESTING];
9643464ebd5Sriastradh   GLuint loopStackDepth = 0;
9653464ebd5Sriastradh   GLuint i;
9663464ebd5Sriastradh
9673464ebd5Sriastradh   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
9683464ebd5Sriastradh      intBegin[i] = intEnd[i] = -1;
9693464ebd5Sriastradh   }
9703464ebd5Sriastradh
9713464ebd5Sriastradh   /* Scan instructions looking for temporary registers */
9723464ebd5Sriastradh   for (i = 0; i < numInstructions; i++) {
9733464ebd5Sriastradh      const struct prog_instruction *inst = instructions + i;
9743464ebd5Sriastradh      if (inst->Opcode == OPCODE_BGNLOOP) {
9753464ebd5Sriastradh         loopStack[loopStackDepth].Start = i;
9763464ebd5Sriastradh         loopStack[loopStackDepth].End = inst->BranchTarget;
9773464ebd5Sriastradh         loopStackDepth++;
9783464ebd5Sriastradh      }
9793464ebd5Sriastradh      else if (inst->Opcode == OPCODE_ENDLOOP) {
9803464ebd5Sriastradh         loopStackDepth--;
9813464ebd5Sriastradh      }
9823464ebd5Sriastradh      else if (inst->Opcode == OPCODE_CAL) {
9833464ebd5Sriastradh         return GL_FALSE;
9843464ebd5Sriastradh      }
9853464ebd5Sriastradh      else {
9863464ebd5Sriastradh         const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
9873464ebd5Sriastradh         GLuint j;
9883464ebd5Sriastradh         for (j = 0; j < numSrc; j++) {
9893464ebd5Sriastradh            if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
9903464ebd5Sriastradh               const GLuint index = inst->SrcReg[j].Index;
9913464ebd5Sriastradh               if (inst->SrcReg[j].RelAddr)
9923464ebd5Sriastradh                  return GL_FALSE;
9933464ebd5Sriastradh               update_interval(intBegin, intEnd, loopStack, loopStackDepth,
9943464ebd5Sriastradh			       index, i);
9953464ebd5Sriastradh            }
9963464ebd5Sriastradh         }
9973464ebd5Sriastradh         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
9983464ebd5Sriastradh            const GLuint index = inst->DstReg.Index;
9993464ebd5Sriastradh            if (inst->DstReg.RelAddr)
10003464ebd5Sriastradh               return GL_FALSE;
10013464ebd5Sriastradh            update_interval(intBegin, intEnd, loopStack, loopStackDepth,
10023464ebd5Sriastradh			    index, i);
10033464ebd5Sriastradh         }
10043464ebd5Sriastradh      }
10053464ebd5Sriastradh   }
10063464ebd5Sriastradh
10073464ebd5Sriastradh   return GL_TRUE;
10083464ebd5Sriastradh}
10093464ebd5Sriastradh
10103464ebd5Sriastradh
10113464ebd5Sriastradh/**
10123464ebd5Sriastradh * Find the live intervals for each temporary register in the program.
10133464ebd5Sriastradh * For register R, the interval [A,B] indicates that R is referenced
10143464ebd5Sriastradh * from instruction A through instruction B.
10153464ebd5Sriastradh * Special consideration is needed for loops and subroutines.
10163464ebd5Sriastradh * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
10173464ebd5Sriastradh */
10183464ebd5Sriastradhstatic GLboolean
10193464ebd5Sriastradhfind_live_intervals(struct gl_program *prog,
10203464ebd5Sriastradh                    struct interval_list *liveIntervals)
10213464ebd5Sriastradh{
10223464ebd5Sriastradh   GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
10233464ebd5Sriastradh   GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
10243464ebd5Sriastradh   GLuint i;
10253464ebd5Sriastradh
10263464ebd5Sriastradh   /*
10273464ebd5Sriastradh    * Note: we'll return GL_FALSE below if we find relative indexing
10283464ebd5Sriastradh    * into the TEMP register file.  We can't handle that yet.
10293464ebd5Sriastradh    * We also give up on subroutines for now.
10303464ebd5Sriastradh    */
10313464ebd5Sriastradh
10323464ebd5Sriastradh   if (dbg) {
10333464ebd5Sriastradh      printf("Optimize: Begin find intervals\n");
10343464ebd5Sriastradh   }
10353464ebd5Sriastradh
10363464ebd5Sriastradh   /* build intermediate arrays */
10377ec681f3Smrg   if (!find_temp_intervals(prog->arb.Instructions, prog->arb.NumInstructions,
10387ec681f3Smrg                            intBegin, intEnd))
10393464ebd5Sriastradh      return GL_FALSE;
10403464ebd5Sriastradh
10413464ebd5Sriastradh   /* Build live intervals list from intermediate arrays */
10423464ebd5Sriastradh   liveIntervals->Num = 0;
10433464ebd5Sriastradh   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
10443464ebd5Sriastradh      if (intBegin[i] >= 0) {
10453464ebd5Sriastradh         struct interval inv;
10463464ebd5Sriastradh         inv.Reg = i;
10473464ebd5Sriastradh         inv.Start = intBegin[i];
10483464ebd5Sriastradh         inv.End = intEnd[i];
10493464ebd5Sriastradh         append_interval(liveIntervals, &inv);
10503464ebd5Sriastradh      }
10513464ebd5Sriastradh   }
10523464ebd5Sriastradh
10533464ebd5Sriastradh   /* Sort the list according to interval starts */
10543464ebd5Sriastradh   sort_interval_list_by_start(liveIntervals);
10553464ebd5Sriastradh
10563464ebd5Sriastradh   if (dbg) {
10573464ebd5Sriastradh      /* print interval info */
10583464ebd5Sriastradh      for (i = 0; i < liveIntervals->Num; i++) {
10593464ebd5Sriastradh         const struct interval *inv = liveIntervals->Intervals + i;
10603464ebd5Sriastradh         printf("Reg[%d] live [%d, %d]:",
10613464ebd5Sriastradh                      inv->Reg, inv->Start, inv->End);
10623464ebd5Sriastradh         if (1) {
10633464ebd5Sriastradh            GLuint j;
10643464ebd5Sriastradh            for (j = 0; j < inv->Start; j++)
10653464ebd5Sriastradh               printf(" ");
10663464ebd5Sriastradh            for (j = inv->Start; j <= inv->End; j++)
10673464ebd5Sriastradh               printf("x");
10683464ebd5Sriastradh         }
10693464ebd5Sriastradh         printf("\n");
10703464ebd5Sriastradh      }
10713464ebd5Sriastradh   }
10723464ebd5Sriastradh
10733464ebd5Sriastradh   return GL_TRUE;
10743464ebd5Sriastradh}
10753464ebd5Sriastradh
10763464ebd5Sriastradh
10773464ebd5Sriastradh/** Scan the array of used register flags to find free entry */
10783464ebd5Sriastradhstatic GLint
10793464ebd5Sriastradhalloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
10803464ebd5Sriastradh{
10813464ebd5Sriastradh   GLuint k;
10823464ebd5Sriastradh   for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
10833464ebd5Sriastradh      if (!usedRegs[k]) {
10843464ebd5Sriastradh         usedRegs[k] = GL_TRUE;
10853464ebd5Sriastradh         return k;
10863464ebd5Sriastradh      }
10873464ebd5Sriastradh   }
10883464ebd5Sriastradh   return -1;
10893464ebd5Sriastradh}
10903464ebd5Sriastradh
10913464ebd5Sriastradh
10923464ebd5Sriastradh/**
10933464ebd5Sriastradh * This function implements "Linear Scan Register Allocation" to reduce
10943464ebd5Sriastradh * the number of temporary registers used by the program.
10953464ebd5Sriastradh *
10963464ebd5Sriastradh * We compute the "live interval" for all temporary registers then
10973464ebd5Sriastradh * examine the overlap of the intervals to allocate new registers.
10983464ebd5Sriastradh * Basically, if two intervals do not overlap, they can use the same register.
10993464ebd5Sriastradh */
11003464ebd5Sriastradhstatic void
11013464ebd5Sriastradh_mesa_reallocate_registers(struct gl_program *prog)
11023464ebd5Sriastradh{
11033464ebd5Sriastradh   struct interval_list liveIntervals;
11043464ebd5Sriastradh   GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
11053464ebd5Sriastradh   GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
11063464ebd5Sriastradh   GLuint i;
11073464ebd5Sriastradh   GLint maxTemp = -1;
11083464ebd5Sriastradh
11093464ebd5Sriastradh   if (dbg) {
11103464ebd5Sriastradh      printf("Optimize: Begin live-interval register reallocation\n");
11113464ebd5Sriastradh      _mesa_print_program(prog);
11123464ebd5Sriastradh   }
11133464ebd5Sriastradh
11143464ebd5Sriastradh   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
11153464ebd5Sriastradh      registerMap[i] = -1;
11163464ebd5Sriastradh      usedRegs[i] = GL_FALSE;
11173464ebd5Sriastradh   }
11183464ebd5Sriastradh
11193464ebd5Sriastradh   if (!find_live_intervals(prog, &liveIntervals)) {
11203464ebd5Sriastradh      if (dbg)
11213464ebd5Sriastradh         printf("Aborting register reallocation\n");
11223464ebd5Sriastradh      return;
11233464ebd5Sriastradh   }
11243464ebd5Sriastradh
11253464ebd5Sriastradh   {
11263464ebd5Sriastradh      struct interval_list activeIntervals;
11273464ebd5Sriastradh      activeIntervals.Num = 0;
11283464ebd5Sriastradh
11293464ebd5Sriastradh      /* loop over live intervals, allocating a new register for each */
11303464ebd5Sriastradh      for (i = 0; i < liveIntervals.Num; i++) {
11313464ebd5Sriastradh         const struct interval *live = liveIntervals.Intervals + i;
11323464ebd5Sriastradh
11333464ebd5Sriastradh         if (dbg)
11343464ebd5Sriastradh            printf("Consider register %u\n", live->Reg);
11353464ebd5Sriastradh
11363464ebd5Sriastradh         /* Expire old intervals.  Intervals which have ended with respect
11373464ebd5Sriastradh          * to the live interval can have their remapped registers freed.
11383464ebd5Sriastradh          */
11393464ebd5Sriastradh         {
11403464ebd5Sriastradh            GLint j;
11413464ebd5Sriastradh            for (j = 0; j < (GLint) activeIntervals.Num; j++) {
11423464ebd5Sriastradh               const struct interval *inv = activeIntervals.Intervals + j;
11433464ebd5Sriastradh               if (inv->End >= live->Start) {
11443464ebd5Sriastradh                  /* Stop now.  Since the activeInterval list is sorted
11453464ebd5Sriastradh                   * we know we don't have to go further.
11463464ebd5Sriastradh                   */
11473464ebd5Sriastradh                  break;
11483464ebd5Sriastradh               }
11493464ebd5Sriastradh               else {
11503464ebd5Sriastradh                  /* Interval 'inv' has expired */
11513464ebd5Sriastradh                  const GLint regNew = registerMap[inv->Reg];
115201e04c3fSmrg                  assert(regNew >= 0);
11533464ebd5Sriastradh
11543464ebd5Sriastradh                  if (dbg)
11553464ebd5Sriastradh                     printf("  expire interval for reg %u\n", inv->Reg);
11563464ebd5Sriastradh
11573464ebd5Sriastradh                  /* remove interval j from active list */
11583464ebd5Sriastradh                  remove_interval(&activeIntervals, inv);
11593464ebd5Sriastradh                  j--;  /* counter-act j++ in for-loop above */
11603464ebd5Sriastradh
11613464ebd5Sriastradh                  /* return register regNew to the free pool */
11623464ebd5Sriastradh                  if (dbg)
11633464ebd5Sriastradh                     printf("  free reg %d\n", regNew);
116401e04c3fSmrg                  assert(usedRegs[regNew] == GL_TRUE);
11653464ebd5Sriastradh                  usedRegs[regNew] = GL_FALSE;
11663464ebd5Sriastradh               }
11673464ebd5Sriastradh            }
11683464ebd5Sriastradh         }
11693464ebd5Sriastradh
11703464ebd5Sriastradh         /* find a free register for this live interval */
11713464ebd5Sriastradh         {
11723464ebd5Sriastradh            const GLint k = alloc_register(usedRegs);
11733464ebd5Sriastradh            if (k < 0) {
11743464ebd5Sriastradh               /* out of registers, give up */
11753464ebd5Sriastradh               return;
11763464ebd5Sriastradh            }
11773464ebd5Sriastradh            registerMap[live->Reg] = k;
11783464ebd5Sriastradh            maxTemp = MAX2(maxTemp, k);
11793464ebd5Sriastradh            if (dbg)
11803464ebd5Sriastradh               printf("  remap register %u -> %d\n", live->Reg, k);
11813464ebd5Sriastradh         }
11823464ebd5Sriastradh
11833464ebd5Sriastradh         /* Insert this live interval into the active list which is sorted
11843464ebd5Sriastradh          * by increasing end points.
11853464ebd5Sriastradh          */
11863464ebd5Sriastradh         insert_interval_by_end(&activeIntervals, live);
11873464ebd5Sriastradh      }
11883464ebd5Sriastradh   }
11893464ebd5Sriastradh
11903464ebd5Sriastradh   if (maxTemp + 1 < (GLint) liveIntervals.Num) {
11913464ebd5Sriastradh      /* OK, we've reduced the number of registers needed.
11923464ebd5Sriastradh       * Scan the program and replace all the old temporary register
11933464ebd5Sriastradh       * indexes with the new indexes.
11943464ebd5Sriastradh       */
11953464ebd5Sriastradh      replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
11963464ebd5Sriastradh
119701e04c3fSmrg      prog->arb.NumTemporaries = maxTemp + 1;
11983464ebd5Sriastradh   }
11993464ebd5Sriastradh
12003464ebd5Sriastradh   if (dbg) {
12013464ebd5Sriastradh      printf("Optimize: End live-interval register reallocation\n");
12023464ebd5Sriastradh      printf("Num temp regs before: %u  after: %u\n",
12033464ebd5Sriastradh                   liveIntervals.Num, maxTemp + 1);
12043464ebd5Sriastradh      _mesa_print_program(prog);
12053464ebd5Sriastradh   }
12063464ebd5Sriastradh}
12073464ebd5Sriastradh
12083464ebd5Sriastradh
12093464ebd5Sriastradh#if 0
12103464ebd5Sriastradhstatic void
12113464ebd5Sriastradhprint_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
121201e04c3fSmrg   fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions);
12133464ebd5Sriastradh   _mesa_print_program(program);
12143464ebd5Sriastradh   _mesa_print_program_parameters(ctx, program);
12153464ebd5Sriastradh   fprintf(stderr, "\n\n");
12163464ebd5Sriastradh}
12173464ebd5Sriastradh#endif
12183464ebd5Sriastradh
12193464ebd5Sriastradh/**
12203464ebd5Sriastradh * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
12213464ebd5Sriastradh * instruction is the first instruction to write to register T0.  The are
12223464ebd5Sriastradh * several lowering passes done in GLSL IR (e.g. branches and
12233464ebd5Sriastradh * relative addressing) that create a large number of conditional assignments
12243464ebd5Sriastradh * that ir_to_mesa converts to CMP instructions like the one mentioned above.
12253464ebd5Sriastradh *
12263464ebd5Sriastradh * Here is why this conversion is safe:
12273464ebd5Sriastradh * CMP T0, T1 T2 T0 can be expanded to:
12283464ebd5Sriastradh * if (T1 < 0.0)
12293464ebd5Sriastradh * 	MOV T0, T2;
12303464ebd5Sriastradh * else
12313464ebd5Sriastradh * 	MOV T0, T0;
12323464ebd5Sriastradh *
12333464ebd5Sriastradh * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
12343464ebd5Sriastradh * as the original program.  If (T1 < 0.0) evaluates to false, executing
12353464ebd5Sriastradh * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
12363464ebd5Sriastradh * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
12373464ebd5Sriastradh * because any instruction that was going to read from T0 after this was going
12383464ebd5Sriastradh * to read a garbage value anyway.
12393464ebd5Sriastradh */
12403464ebd5Sriastradhstatic void
12413464ebd5Sriastradh_mesa_simplify_cmp(struct gl_program * program)
12423464ebd5Sriastradh{
12433464ebd5Sriastradh   GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
12443464ebd5Sriastradh   GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
12453464ebd5Sriastradh   GLuint i;
12463464ebd5Sriastradh
12473464ebd5Sriastradh   if (dbg) {
12483464ebd5Sriastradh      printf("Optimize: Begin reads without writes\n");
12493464ebd5Sriastradh      _mesa_print_program(program);
12503464ebd5Sriastradh   }
12513464ebd5Sriastradh
12523464ebd5Sriastradh   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
12533464ebd5Sriastradh      tempWrites[i] = 0;
12543464ebd5Sriastradh   }
12553464ebd5Sriastradh
12563464ebd5Sriastradh   for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
12573464ebd5Sriastradh      outputWrites[i] = 0;
12583464ebd5Sriastradh   }
12593464ebd5Sriastradh
126001e04c3fSmrg   for (i = 0; i < program->arb.NumInstructions; i++) {
126101e04c3fSmrg      struct prog_instruction *inst = program->arb.Instructions + i;
12623464ebd5Sriastradh      GLuint prevWriteMask;
12633464ebd5Sriastradh
12643464ebd5Sriastradh      /* Give up if we encounter relative addressing or flow control. */
12653464ebd5Sriastradh      if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
12663464ebd5Sriastradh         return;
12673464ebd5Sriastradh      }
12683464ebd5Sriastradh
12693464ebd5Sriastradh      if (inst->DstReg.File == PROGRAM_OUTPUT) {
12703464ebd5Sriastradh         assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
12713464ebd5Sriastradh         prevWriteMask = outputWrites[inst->DstReg.Index];
12723464ebd5Sriastradh         outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
12733464ebd5Sriastradh      } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
12743464ebd5Sriastradh         assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
12753464ebd5Sriastradh         prevWriteMask = tempWrites[inst->DstReg.Index];
12763464ebd5Sriastradh         tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
12773464ebd5Sriastradh      } else {
12783464ebd5Sriastradh         /* No other register type can be a destination register. */
12793464ebd5Sriastradh         continue;
12803464ebd5Sriastradh      }
12813464ebd5Sriastradh
12823464ebd5Sriastradh      /* For a CMP to be considered a conditional write, the destination
12833464ebd5Sriastradh       * register and source register two must be the same. */
12843464ebd5Sriastradh      if (inst->Opcode == OPCODE_CMP
12853464ebd5Sriastradh          && !(inst->DstReg.WriteMask & prevWriteMask)
12863464ebd5Sriastradh          && inst->SrcReg[2].File == inst->DstReg.File
12873464ebd5Sriastradh          && inst->SrcReg[2].Index == inst->DstReg.Index
12883464ebd5Sriastradh          && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
12893464ebd5Sriastradh
12903464ebd5Sriastradh         inst->Opcode = OPCODE_MOV;
12913464ebd5Sriastradh         inst->SrcReg[0] = inst->SrcReg[1];
12923464ebd5Sriastradh
12933464ebd5Sriastradh	 /* Unused operands are expected to have the file set to
12943464ebd5Sriastradh	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
12953464ebd5Sriastradh	  * all of the sources.
12963464ebd5Sriastradh	  */
12973464ebd5Sriastradh	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
12983464ebd5Sriastradh	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
12993464ebd5Sriastradh	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
13003464ebd5Sriastradh	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
13013464ebd5Sriastradh      }
13023464ebd5Sriastradh   }
13033464ebd5Sriastradh   if (dbg) {
13043464ebd5Sriastradh      printf("Optimize: End reads without writes\n");
13053464ebd5Sriastradh      _mesa_print_program(program);
13063464ebd5Sriastradh   }
13073464ebd5Sriastradh}
13083464ebd5Sriastradh
13093464ebd5Sriastradh/**
13103464ebd5Sriastradh * Apply optimizations to the given program to eliminate unnecessary
13113464ebd5Sriastradh * instructions, temp regs, etc.
13123464ebd5Sriastradh */
13133464ebd5Sriastradhvoid
131401e04c3fSmrg_mesa_optimize_program(struct gl_program *program, void *mem_ctx)
13153464ebd5Sriastradh{
13163464ebd5Sriastradh   GLboolean any_change;
13173464ebd5Sriastradh
13183464ebd5Sriastradh   _mesa_simplify_cmp(program);
13193464ebd5Sriastradh   /* Stop when no modifications were output */
13203464ebd5Sriastradh   do {
13213464ebd5Sriastradh      any_change = GL_FALSE;
13223464ebd5Sriastradh      _mesa_remove_extra_move_use(program);
132301e04c3fSmrg      if (_mesa_remove_dead_code_global(program, mem_ctx))
13243464ebd5Sriastradh         any_change = GL_TRUE;
132501e04c3fSmrg      if (_mesa_remove_extra_moves(program, mem_ctx))
13263464ebd5Sriastradh         any_change = GL_TRUE;
132701e04c3fSmrg      if (_mesa_remove_dead_code_local(program, mem_ctx))
13283464ebd5Sriastradh         any_change = GL_TRUE;
1329af69d88dSmrg
1330af69d88dSmrg      any_change = _mesa_constant_fold(program) || any_change;
13313464ebd5Sriastradh      _mesa_reallocate_registers(program);
13323464ebd5Sriastradh   } while (any_change);
13333464ebd5Sriastradh}
13343464ebd5Sriastradh
1335