13464ebd5Sriastradh/* 23464ebd5Sriastradh * Mesa 3-D graphics library 33464ebd5Sriastradh * 43464ebd5Sriastradh * Copyright (C) 2009 VMware, Inc. All Rights Reserved. 53464ebd5Sriastradh * 63464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a 73464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"), 83464ebd5Sriastradh * to deal in the Software without restriction, including without limitation 93464ebd5Sriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense, 103464ebd5Sriastradh * and/or sell copies of the Software, and to permit persons to whom the 113464ebd5Sriastradh * Software is furnished to do so, subject to the following conditions: 123464ebd5Sriastradh * 133464ebd5Sriastradh * The above copyright notice and this permission notice shall be included 143464ebd5Sriastradh * in all copies or substantial portions of the Software. 153464ebd5Sriastradh * 163464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 173464ebd5Sriastradh * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 183464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 193464ebd5Sriastradh * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 203464ebd5Sriastradh * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 213464ebd5Sriastradh * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 223464ebd5Sriastradh */ 233464ebd5Sriastradh 243464ebd5Sriastradh 253464ebd5Sriastradh 263464ebd5Sriastradh#include "main/glheader.h" 273464ebd5Sriastradh#include "main/context.h" 283464ebd5Sriastradh#include "main/macros.h" 293464ebd5Sriastradh#include "program.h" 303464ebd5Sriastradh#include "prog_instruction.h" 313464ebd5Sriastradh#include "prog_optimize.h" 323464ebd5Sriastradh#include "prog_print.h" 333464ebd5Sriastradh 343464ebd5Sriastradh 353464ebd5Sriastradh#define MAX_LOOP_NESTING 50 363464ebd5Sriastradh/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to 373464ebd5Sriastradh * register allocate many temporary values into that small number of 383464ebd5Sriastradh * temps. So allow large temporary indices coming into the register 393464ebd5Sriastradh * allocator. 403464ebd5Sriastradh */ 413464ebd5Sriastradh#define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1) 423464ebd5Sriastradh 433464ebd5Sriastradhstatic GLboolean dbg = GL_FALSE; 443464ebd5Sriastradh 453464ebd5Sriastradh#define NO_MASK 0xf 463464ebd5Sriastradh 473464ebd5Sriastradh/** 483464ebd5Sriastradh * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 493464ebd5Sriastradh * are read from the given src in this instruction, We also provide 503464ebd5Sriastradh * one optional masks which may mask other components in the dst 513464ebd5Sriastradh * register 523464ebd5Sriastradh */ 533464ebd5Sriastradhstatic GLuint 543464ebd5Sriastradhget_src_arg_mask(const struct prog_instruction *inst, 553464ebd5Sriastradh GLuint arg, GLuint dst_mask) 563464ebd5Sriastradh{ 573464ebd5Sriastradh GLuint read_mask, channel_mask; 583464ebd5Sriastradh GLuint comp; 593464ebd5Sriastradh 6001e04c3fSmrg assert(arg < _mesa_num_inst_src_regs(inst->Opcode)); 613464ebd5Sriastradh 623464ebd5Sriastradh /* Form the dst register, find the written channels */ 6301e04c3fSmrg switch (inst->Opcode) { 6401e04c3fSmrg case OPCODE_MOV: 6501e04c3fSmrg case OPCODE_MIN: 6601e04c3fSmrg case OPCODE_MAX: 6701e04c3fSmrg case OPCODE_ABS: 6801e04c3fSmrg case OPCODE_ADD: 6901e04c3fSmrg case OPCODE_MAD: 7001e04c3fSmrg case OPCODE_MUL: 7101e04c3fSmrg case OPCODE_SUB: 7201e04c3fSmrg case OPCODE_CMP: 7301e04c3fSmrg case OPCODE_FLR: 7401e04c3fSmrg case OPCODE_FRC: 7501e04c3fSmrg case OPCODE_LRP: 7601e04c3fSmrg case OPCODE_SGE: 7701e04c3fSmrg case OPCODE_SLT: 7801e04c3fSmrg case OPCODE_SSG: 7901e04c3fSmrg channel_mask = inst->DstReg.WriteMask & dst_mask; 8001e04c3fSmrg break; 8101e04c3fSmrg case OPCODE_RCP: 8201e04c3fSmrg case OPCODE_SIN: 8301e04c3fSmrg case OPCODE_COS: 8401e04c3fSmrg case OPCODE_RSQ: 8501e04c3fSmrg case OPCODE_POW: 8601e04c3fSmrg case OPCODE_EX2: 8701e04c3fSmrg case OPCODE_LOG: 8801e04c3fSmrg channel_mask = WRITEMASK_X; 8901e04c3fSmrg break; 9001e04c3fSmrg case OPCODE_DP2: 9101e04c3fSmrg channel_mask = WRITEMASK_XY; 9201e04c3fSmrg break; 9301e04c3fSmrg case OPCODE_DP3: 9401e04c3fSmrg case OPCODE_XPD: 9501e04c3fSmrg channel_mask = WRITEMASK_XYZ; 9601e04c3fSmrg break; 9701e04c3fSmrg default: 983464ebd5Sriastradh channel_mask = WRITEMASK_XYZW; 9901e04c3fSmrg break; 1003464ebd5Sriastradh } 1013464ebd5Sriastradh 1023464ebd5Sriastradh /* Now, given the src swizzle and the written channels, find which 1033464ebd5Sriastradh * components are actually read 1043464ebd5Sriastradh */ 1053464ebd5Sriastradh read_mask = 0x0; 1063464ebd5Sriastradh for (comp = 0; comp < 4; ++comp) { 1073464ebd5Sriastradh const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); 1083464ebd5Sriastradh if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) 1093464ebd5Sriastradh read_mask |= 1 << coord; 1103464ebd5Sriastradh } 1113464ebd5Sriastradh 1123464ebd5Sriastradh return read_mask; 1133464ebd5Sriastradh} 1143464ebd5Sriastradh 1153464ebd5Sriastradh 1163464ebd5Sriastradh/** 1173464ebd5Sriastradh * For a MOV instruction, compute a write mask when src register also has 1183464ebd5Sriastradh * a mask 1193464ebd5Sriastradh */ 1203464ebd5Sriastradhstatic GLuint 1213464ebd5Sriastradhget_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask) 1223464ebd5Sriastradh{ 1233464ebd5Sriastradh const GLuint mask = mov->DstReg.WriteMask; 1243464ebd5Sriastradh GLuint comp; 1253464ebd5Sriastradh GLuint updated_mask = 0x0; 1263464ebd5Sriastradh 12701e04c3fSmrg assert(mov->Opcode == OPCODE_MOV); 1283464ebd5Sriastradh 1293464ebd5Sriastradh for (comp = 0; comp < 4; ++comp) { 1303464ebd5Sriastradh GLuint src_comp; 1313464ebd5Sriastradh if ((mask & (1 << comp)) == 0) 1323464ebd5Sriastradh continue; 1333464ebd5Sriastradh src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp); 1343464ebd5Sriastradh if ((src_mask & (1 << src_comp)) == 0) 1353464ebd5Sriastradh continue; 1363464ebd5Sriastradh updated_mask |= 1 << comp; 1373464ebd5Sriastradh } 1383464ebd5Sriastradh 1393464ebd5Sriastradh return updated_mask; 1403464ebd5Sriastradh} 1413464ebd5Sriastradh 1423464ebd5Sriastradh 1433464ebd5Sriastradh/** 1443464ebd5Sriastradh * Ensure that the swizzle is regular. That is, all of the swizzle 1453464ebd5Sriastradh * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE. 1463464ebd5Sriastradh */ 1473464ebd5Sriastradhstatic GLboolean 1483464ebd5Sriastradhis_swizzle_regular(GLuint swz) 1493464ebd5Sriastradh{ 1503464ebd5Sriastradh return GET_SWZ(swz,0) <= SWIZZLE_W && 1513464ebd5Sriastradh GET_SWZ(swz,1) <= SWIZZLE_W && 1523464ebd5Sriastradh GET_SWZ(swz,2) <= SWIZZLE_W && 1533464ebd5Sriastradh GET_SWZ(swz,3) <= SWIZZLE_W; 1543464ebd5Sriastradh} 1553464ebd5Sriastradh 1563464ebd5Sriastradh 1573464ebd5Sriastradh/** 1583464ebd5Sriastradh * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. 1593464ebd5Sriastradh * \return number of instructions removed 1603464ebd5Sriastradh */ 1613464ebd5Sriastradhstatic GLuint 16201e04c3fSmrgremove_instructions(struct gl_program *prog, const GLboolean *removeFlags, 16301e04c3fSmrg void *mem_ctx) 1643464ebd5Sriastradh{ 1653464ebd5Sriastradh GLint i, removeEnd = 0, removeCount = 0; 1663464ebd5Sriastradh GLuint totalRemoved = 0; 1673464ebd5Sriastradh 1683464ebd5Sriastradh /* go backward */ 16901e04c3fSmrg for (i = prog->arb.NumInstructions - 1; i >= 0; i--) { 1703464ebd5Sriastradh if (removeFlags[i]) { 1713464ebd5Sriastradh totalRemoved++; 1723464ebd5Sriastradh if (removeCount == 0) { 1733464ebd5Sriastradh /* begin a run of instructions to remove */ 1743464ebd5Sriastradh removeEnd = i; 1753464ebd5Sriastradh removeCount = 1; 1763464ebd5Sriastradh } 1773464ebd5Sriastradh else { 1783464ebd5Sriastradh /* extend the run of instructions to remove */ 1793464ebd5Sriastradh removeCount++; 1803464ebd5Sriastradh } 1813464ebd5Sriastradh } 1823464ebd5Sriastradh else { 1833464ebd5Sriastradh /* don't remove this instruction, but check if the preceeding 1843464ebd5Sriastradh * instructions are to be removed. 1853464ebd5Sriastradh */ 1863464ebd5Sriastradh if (removeCount > 0) { 1873464ebd5Sriastradh GLint removeStart = removeEnd - removeCount + 1; 18801e04c3fSmrg _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx); 1893464ebd5Sriastradh removeStart = removeCount = 0; /* reset removal info */ 1903464ebd5Sriastradh } 1913464ebd5Sriastradh } 1923464ebd5Sriastradh } 1933464ebd5Sriastradh /* Finish removing if the first instruction was to be removed. */ 1943464ebd5Sriastradh if (removeCount > 0) { 1953464ebd5Sriastradh GLint removeStart = removeEnd - removeCount + 1; 19601e04c3fSmrg _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx); 1973464ebd5Sriastradh } 1983464ebd5Sriastradh return totalRemoved; 1993464ebd5Sriastradh} 2003464ebd5Sriastradh 2013464ebd5Sriastradh 2023464ebd5Sriastradh/** 2033464ebd5Sriastradh * Remap register indexes according to map. 2043464ebd5Sriastradh * \param prog the program to search/replace 2053464ebd5Sriastradh * \param file the type of register file to search/replace 2063464ebd5Sriastradh * \param map maps old register indexes to new indexes 2073464ebd5Sriastradh */ 2083464ebd5Sriastradhstatic void 2093464ebd5Sriastradhreplace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) 2103464ebd5Sriastradh{ 2113464ebd5Sriastradh GLuint i; 2123464ebd5Sriastradh 21301e04c3fSmrg for (i = 0; i < prog->arb.NumInstructions; i++) { 21401e04c3fSmrg struct prog_instruction *inst = prog->arb.Instructions + i; 2153464ebd5Sriastradh const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 2163464ebd5Sriastradh GLuint j; 2173464ebd5Sriastradh for (j = 0; j < numSrc; j++) { 2183464ebd5Sriastradh if (inst->SrcReg[j].File == file) { 2193464ebd5Sriastradh GLuint index = inst->SrcReg[j].Index; 22001e04c3fSmrg assert(map[index] >= 0); 2213464ebd5Sriastradh inst->SrcReg[j].Index = map[index]; 2223464ebd5Sriastradh } 2233464ebd5Sriastradh } 2243464ebd5Sriastradh if (inst->DstReg.File == file) { 2253464ebd5Sriastradh const GLuint index = inst->DstReg.Index; 22601e04c3fSmrg assert(map[index] >= 0); 2273464ebd5Sriastradh inst->DstReg.Index = map[index]; 2283464ebd5Sriastradh } 2293464ebd5Sriastradh } 2303464ebd5Sriastradh} 2313464ebd5Sriastradh 2323464ebd5Sriastradh 2333464ebd5Sriastradh/** 2343464ebd5Sriastradh * Remove dead instructions from the given program. 2353464ebd5Sriastradh * This is very primitive for now. Basically look for temp registers 2363464ebd5Sriastradh * that are written to but never read. Remove any instructions that 2373464ebd5Sriastradh * write to such registers. Be careful with condition code setters. 2383464ebd5Sriastradh */ 2393464ebd5Sriastradhstatic GLboolean 24001e04c3fSmrg_mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx) 2413464ebd5Sriastradh{ 2423464ebd5Sriastradh GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4]; 2433464ebd5Sriastradh GLboolean *removeInst; /* per-instruction removal flag */ 2443464ebd5Sriastradh GLuint i, rem = 0, comp; 2453464ebd5Sriastradh 2463464ebd5Sriastradh memset(tempRead, 0, sizeof(tempRead)); 2473464ebd5Sriastradh 2483464ebd5Sriastradh if (dbg) { 2493464ebd5Sriastradh printf("Optimize: Begin dead code removal\n"); 2503464ebd5Sriastradh /*_mesa_print_program(prog);*/ 2513464ebd5Sriastradh } 2523464ebd5Sriastradh 253af69d88dSmrg removeInst = 25401e04c3fSmrg calloc(prog->arb.NumInstructions, sizeof(GLboolean)); 2553464ebd5Sriastradh 2563464ebd5Sriastradh /* Determine which temps are read and written */ 25701e04c3fSmrg for (i = 0; i < prog->arb.NumInstructions; i++) { 25801e04c3fSmrg const struct prog_instruction *inst = prog->arb.Instructions + i; 2593464ebd5Sriastradh const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 2603464ebd5Sriastradh GLuint j; 2613464ebd5Sriastradh 2623464ebd5Sriastradh /* check src regs */ 2633464ebd5Sriastradh for (j = 0; j < numSrc; j++) { 2643464ebd5Sriastradh if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { 2653464ebd5Sriastradh const GLuint index = inst->SrcReg[j].Index; 2663464ebd5Sriastradh GLuint read_mask; 26701e04c3fSmrg assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 2683464ebd5Sriastradh read_mask = get_src_arg_mask(inst, j, NO_MASK); 2693464ebd5Sriastradh 2703464ebd5Sriastradh if (inst->SrcReg[j].RelAddr) { 2713464ebd5Sriastradh if (dbg) 2723464ebd5Sriastradh printf("abort remove dead code (indirect temp)\n"); 2733464ebd5Sriastradh goto done; 2743464ebd5Sriastradh } 2753464ebd5Sriastradh 2763464ebd5Sriastradh for (comp = 0; comp < 4; comp++) { 2773464ebd5Sriastradh const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); 278af69d88dSmrg if (swz <= SWIZZLE_W) { 279af69d88dSmrg if ((read_mask & (1 << swz)) == 0) 280af69d88dSmrg continue; 2813464ebd5Sriastradh tempRead[index][swz] = GL_TRUE; 282af69d88dSmrg } 2833464ebd5Sriastradh } 2843464ebd5Sriastradh } 2853464ebd5Sriastradh } 2863464ebd5Sriastradh 2873464ebd5Sriastradh /* check dst reg */ 2883464ebd5Sriastradh if (inst->DstReg.File == PROGRAM_TEMPORARY) { 28901e04c3fSmrg assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 2903464ebd5Sriastradh 2913464ebd5Sriastradh if (inst->DstReg.RelAddr) { 2923464ebd5Sriastradh if (dbg) 2933464ebd5Sriastradh printf("abort remove dead code (indirect temp)\n"); 2943464ebd5Sriastradh goto done; 2953464ebd5Sriastradh } 2963464ebd5Sriastradh } 2973464ebd5Sriastradh } 2983464ebd5Sriastradh 2993464ebd5Sriastradh /* find instructions that write to dead registers, flag for removal */ 30001e04c3fSmrg for (i = 0; i < prog->arb.NumInstructions; i++) { 30101e04c3fSmrg struct prog_instruction *inst = prog->arb.Instructions + i; 3023464ebd5Sriastradh const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode); 3033464ebd5Sriastradh 3043464ebd5Sriastradh if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) { 3053464ebd5Sriastradh GLint chan, index = inst->DstReg.Index; 3063464ebd5Sriastradh 3073464ebd5Sriastradh for (chan = 0; chan < 4; chan++) { 3083464ebd5Sriastradh if (!tempRead[index][chan] && 3093464ebd5Sriastradh inst->DstReg.WriteMask & (1 << chan)) { 3103464ebd5Sriastradh if (dbg) { 3113464ebd5Sriastradh printf("Remove writemask on %u.%c\n", i, 3123464ebd5Sriastradh chan == 3 ? 'w' : 'x' + chan); 3133464ebd5Sriastradh } 3143464ebd5Sriastradh inst->DstReg.WriteMask &= ~(1 << chan); 3153464ebd5Sriastradh rem++; 3163464ebd5Sriastradh } 3173464ebd5Sriastradh } 3183464ebd5Sriastradh 3193464ebd5Sriastradh if (inst->DstReg.WriteMask == 0) { 3203464ebd5Sriastradh /* If we cleared all writes, the instruction can be removed. */ 3213464ebd5Sriastradh if (dbg) 3223464ebd5Sriastradh printf("Remove instruction %u: \n", i); 3233464ebd5Sriastradh removeInst[i] = GL_TRUE; 3243464ebd5Sriastradh } 3253464ebd5Sriastradh } 3263464ebd5Sriastradh } 3273464ebd5Sriastradh 3283464ebd5Sriastradh /* now remove the instructions which aren't needed */ 32901e04c3fSmrg rem = remove_instructions(prog, removeInst, mem_ctx); 3303464ebd5Sriastradh 3313464ebd5Sriastradh if (dbg) { 3323464ebd5Sriastradh printf("Optimize: End dead code removal.\n"); 3333464ebd5Sriastradh printf(" %u channel writes removed\n", rem); 3343464ebd5Sriastradh printf(" %u instructions removed\n", rem); 3353464ebd5Sriastradh /*_mesa_print_program(prog);*/ 3363464ebd5Sriastradh } 3373464ebd5Sriastradh 3383464ebd5Sriastradhdone: 3393464ebd5Sriastradh free(removeInst); 3403464ebd5Sriastradh return rem != 0; 3413464ebd5Sriastradh} 3423464ebd5Sriastradh 3433464ebd5Sriastradh 3443464ebd5Sriastradhenum inst_use 3453464ebd5Sriastradh{ 3463464ebd5Sriastradh READ, 3473464ebd5Sriastradh WRITE, 3483464ebd5Sriastradh FLOW, 3493464ebd5Sriastradh END 3503464ebd5Sriastradh}; 3513464ebd5Sriastradh 3523464ebd5Sriastradh 3533464ebd5Sriastradh/** 3543464ebd5Sriastradh * Scan forward in program from 'start' for the next occurances of TEMP[index]. 3553464ebd5Sriastradh * We look if an instruction reads the component given by the masks and if they 3563464ebd5Sriastradh * are overwritten. 3573464ebd5Sriastradh * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator 3583464ebd5Sriastradh * that we can't look further. 3593464ebd5Sriastradh */ 3603464ebd5Sriastradhstatic enum inst_use 3613464ebd5Sriastradhfind_next_use(const struct gl_program *prog, 3623464ebd5Sriastradh GLuint start, 3633464ebd5Sriastradh GLuint index, 3643464ebd5Sriastradh GLuint mask) 3653464ebd5Sriastradh{ 3663464ebd5Sriastradh GLuint i; 3673464ebd5Sriastradh 36801e04c3fSmrg for (i = start; i < prog->arb.NumInstructions; i++) { 36901e04c3fSmrg const struct prog_instruction *inst = prog->arb.Instructions + i; 3703464ebd5Sriastradh switch (inst->Opcode) { 3713464ebd5Sriastradh case OPCODE_BGNLOOP: 3723464ebd5Sriastradh case OPCODE_BGNSUB: 3733464ebd5Sriastradh case OPCODE_CAL: 3743464ebd5Sriastradh case OPCODE_CONT: 3753464ebd5Sriastradh case OPCODE_IF: 3763464ebd5Sriastradh case OPCODE_ELSE: 3773464ebd5Sriastradh case OPCODE_ENDIF: 3783464ebd5Sriastradh case OPCODE_ENDLOOP: 3793464ebd5Sriastradh case OPCODE_ENDSUB: 3803464ebd5Sriastradh case OPCODE_RET: 3813464ebd5Sriastradh return FLOW; 3823464ebd5Sriastradh case OPCODE_END: 3833464ebd5Sriastradh return END; 3843464ebd5Sriastradh default: 3853464ebd5Sriastradh { 3863464ebd5Sriastradh const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 3873464ebd5Sriastradh GLuint j; 3883464ebd5Sriastradh for (j = 0; j < numSrc; j++) { 3893464ebd5Sriastradh if (inst->SrcReg[j].RelAddr || 3903464ebd5Sriastradh (inst->SrcReg[j].File == PROGRAM_TEMPORARY && 39101e04c3fSmrg inst->SrcReg[j].Index == (GLint)index && 3923464ebd5Sriastradh (get_src_arg_mask(inst,j,NO_MASK) & mask))) 3933464ebd5Sriastradh return READ; 3943464ebd5Sriastradh } 3953464ebd5Sriastradh if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && 3963464ebd5Sriastradh inst->DstReg.File == PROGRAM_TEMPORARY && 3973464ebd5Sriastradh inst->DstReg.Index == index) { 3983464ebd5Sriastradh mask &= ~inst->DstReg.WriteMask; 3993464ebd5Sriastradh if (mask == 0) 4003464ebd5Sriastradh return WRITE; 4013464ebd5Sriastradh } 4023464ebd5Sriastradh } 4033464ebd5Sriastradh } 4043464ebd5Sriastradh } 4053464ebd5Sriastradh return END; 4063464ebd5Sriastradh} 4073464ebd5Sriastradh 4083464ebd5Sriastradh 4093464ebd5Sriastradh/** 4103464ebd5Sriastradh * Is the given instruction opcode a flow-control opcode? 4113464ebd5Sriastradh * XXX maybe move this into prog_instruction.[ch] 4123464ebd5Sriastradh */ 4133464ebd5Sriastradhstatic GLboolean 4143464ebd5Sriastradh_mesa_is_flow_control_opcode(enum prog_opcode opcode) 4153464ebd5Sriastradh{ 4163464ebd5Sriastradh switch (opcode) { 4173464ebd5Sriastradh case OPCODE_BGNLOOP: 4183464ebd5Sriastradh case OPCODE_BGNSUB: 4193464ebd5Sriastradh case OPCODE_CAL: 4203464ebd5Sriastradh case OPCODE_CONT: 4213464ebd5Sriastradh case OPCODE_IF: 4223464ebd5Sriastradh case OPCODE_ELSE: 4233464ebd5Sriastradh case OPCODE_END: 4243464ebd5Sriastradh case OPCODE_ENDIF: 4253464ebd5Sriastradh case OPCODE_ENDLOOP: 4263464ebd5Sriastradh case OPCODE_ENDSUB: 4273464ebd5Sriastradh case OPCODE_RET: 4283464ebd5Sriastradh return GL_TRUE; 4293464ebd5Sriastradh default: 4303464ebd5Sriastradh return GL_FALSE; 4313464ebd5Sriastradh } 4323464ebd5Sriastradh} 4333464ebd5Sriastradh 4343464ebd5Sriastradh 4353464ebd5Sriastradh/** 4363464ebd5Sriastradh * Test if the given instruction is a simple MOV (no conditional updating, 4373464ebd5Sriastradh * not relative addressing, no negation/abs, etc). 4383464ebd5Sriastradh */ 4393464ebd5Sriastradhstatic GLboolean 4403464ebd5Sriastradhcan_downward_mov_be_modifed(const struct prog_instruction *mov) 4413464ebd5Sriastradh{ 4423464ebd5Sriastradh return 4433464ebd5Sriastradh mov->Opcode == OPCODE_MOV && 4443464ebd5Sriastradh mov->SrcReg[0].RelAddr == 0 && 4453464ebd5Sriastradh mov->SrcReg[0].Negate == 0 && 44601e04c3fSmrg mov->DstReg.RelAddr == 0; 4473464ebd5Sriastradh} 4483464ebd5Sriastradh 4493464ebd5Sriastradh 4503464ebd5Sriastradhstatic GLboolean 4513464ebd5Sriastradhcan_upward_mov_be_modifed(const struct prog_instruction *mov) 4523464ebd5Sriastradh{ 4533464ebd5Sriastradh return 4543464ebd5Sriastradh can_downward_mov_be_modifed(mov) && 455af69d88dSmrg mov->DstReg.File == PROGRAM_TEMPORARY && 45601e04c3fSmrg !mov->Saturate; 4573464ebd5Sriastradh} 4583464ebd5Sriastradh 4593464ebd5Sriastradh 4603464ebd5Sriastradh/** 4613464ebd5Sriastradh * Try to remove use of extraneous MOV instructions, to free them up for dead 4623464ebd5Sriastradh * code removal. 4633464ebd5Sriastradh */ 4643464ebd5Sriastradhstatic void 4653464ebd5Sriastradh_mesa_remove_extra_move_use(struct gl_program *prog) 4663464ebd5Sriastradh{ 4673464ebd5Sriastradh GLuint i, j; 4683464ebd5Sriastradh 4693464ebd5Sriastradh if (dbg) { 4703464ebd5Sriastradh printf("Optimize: Begin remove extra move use\n"); 4713464ebd5Sriastradh _mesa_print_program(prog); 4723464ebd5Sriastradh } 4733464ebd5Sriastradh 4743464ebd5Sriastradh /* 4753464ebd5Sriastradh * Look for sequences such as this: 4763464ebd5Sriastradh * MOV tmpX, arg0; 4773464ebd5Sriastradh * ... 4783464ebd5Sriastradh * FOO tmpY, tmpX, arg1; 4793464ebd5Sriastradh * and convert into: 4803464ebd5Sriastradh * MOV tmpX, arg0; 4813464ebd5Sriastradh * ... 4823464ebd5Sriastradh * FOO tmpY, arg0, arg1; 4833464ebd5Sriastradh */ 4843464ebd5Sriastradh 48501e04c3fSmrg for (i = 0; i + 1 < prog->arb.NumInstructions; i++) { 48601e04c3fSmrg const struct prog_instruction *mov = prog->arb.Instructions + i; 4873464ebd5Sriastradh GLuint dst_mask, src_mask; 4883464ebd5Sriastradh if (can_upward_mov_be_modifed(mov) == GL_FALSE) 4893464ebd5Sriastradh continue; 4903464ebd5Sriastradh 4913464ebd5Sriastradh /* Scanning the code, we maintain the components which are still active in 4923464ebd5Sriastradh * these two masks 4933464ebd5Sriastradh */ 4943464ebd5Sriastradh dst_mask = mov->DstReg.WriteMask; 4953464ebd5Sriastradh src_mask = get_src_arg_mask(mov, 0, NO_MASK); 4963464ebd5Sriastradh 4973464ebd5Sriastradh /* Walk through remaining instructions until the or src reg gets 4983464ebd5Sriastradh * rewritten or we get into some flow-control, eliminating the use of 4993464ebd5Sriastradh * this MOV. 5003464ebd5Sriastradh */ 50101e04c3fSmrg for (j = i + 1; j < prog->arb.NumInstructions; j++) { 50201e04c3fSmrg struct prog_instruction *inst2 = prog->arb.Instructions + j; 5033464ebd5Sriastradh GLuint arg; 5043464ebd5Sriastradh 5053464ebd5Sriastradh if (_mesa_is_flow_control_opcode(inst2->Opcode)) 5063464ebd5Sriastradh break; 5073464ebd5Sriastradh 5083464ebd5Sriastradh /* First rewrite this instruction's args if appropriate. */ 5093464ebd5Sriastradh for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { 5103464ebd5Sriastradh GLuint comp, read_mask; 5113464ebd5Sriastradh 5123464ebd5Sriastradh if (inst2->SrcReg[arg].File != mov->DstReg.File || 5133464ebd5Sriastradh inst2->SrcReg[arg].Index != mov->DstReg.Index || 51401e04c3fSmrg inst2->SrcReg[arg].RelAddr) 5153464ebd5Sriastradh continue; 5163464ebd5Sriastradh read_mask = get_src_arg_mask(inst2, arg, NO_MASK); 5173464ebd5Sriastradh 5183464ebd5Sriastradh /* Adjust the swizzles of inst2 to point at MOV's source if ALL the 5193464ebd5Sriastradh * components read still come from the mov instructions 5203464ebd5Sriastradh */ 5213464ebd5Sriastradh if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && 5223464ebd5Sriastradh (read_mask & dst_mask) == read_mask) { 5233464ebd5Sriastradh for (comp = 0; comp < 4; comp++) { 5243464ebd5Sriastradh const GLuint inst2_swz = 5253464ebd5Sriastradh GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); 5263464ebd5Sriastradh const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); 5273464ebd5Sriastradh inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); 5283464ebd5Sriastradh inst2->SrcReg[arg].Swizzle |= s << (3 * comp); 5293464ebd5Sriastradh inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> 5303464ebd5Sriastradh inst2_swz) & 0x1) << comp); 5313464ebd5Sriastradh } 5323464ebd5Sriastradh inst2->SrcReg[arg].File = mov->SrcReg[0].File; 5333464ebd5Sriastradh inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; 5343464ebd5Sriastradh } 5353464ebd5Sriastradh } 5363464ebd5Sriastradh 5373464ebd5Sriastradh /* The source of MOV is written. This potentially deactivates some 5383464ebd5Sriastradh * components from the src and dst of the MOV instruction 5393464ebd5Sriastradh */ 5403464ebd5Sriastradh if (inst2->DstReg.File == mov->DstReg.File && 5413464ebd5Sriastradh (inst2->DstReg.RelAddr || 5423464ebd5Sriastradh inst2->DstReg.Index == mov->DstReg.Index)) { 5433464ebd5Sriastradh dst_mask &= ~inst2->DstReg.WriteMask; 5443464ebd5Sriastradh src_mask = get_src_arg_mask(mov, 0, dst_mask); 5453464ebd5Sriastradh } 5463464ebd5Sriastradh 5473464ebd5Sriastradh /* Idem when the destination of mov is written */ 5483464ebd5Sriastradh if (inst2->DstReg.File == mov->SrcReg[0].File && 5493464ebd5Sriastradh (inst2->DstReg.RelAddr || 5503464ebd5Sriastradh inst2->DstReg.Index == mov->SrcReg[0].Index)) { 5513464ebd5Sriastradh src_mask &= ~inst2->DstReg.WriteMask; 5523464ebd5Sriastradh dst_mask &= get_dst_mask_for_mov(mov, src_mask); 5533464ebd5Sriastradh } 5543464ebd5Sriastradh if (dst_mask == 0) 5553464ebd5Sriastradh break; 5563464ebd5Sriastradh } 5573464ebd5Sriastradh } 5583464ebd5Sriastradh 5593464ebd5Sriastradh if (dbg) { 5603464ebd5Sriastradh printf("Optimize: End remove extra move use.\n"); 5613464ebd5Sriastradh /*_mesa_print_program(prog);*/ 5623464ebd5Sriastradh } 5633464ebd5Sriastradh} 5643464ebd5Sriastradh 5653464ebd5Sriastradh 5663464ebd5Sriastradh/** 5673464ebd5Sriastradh * Complements dead_code_global. Try to remove code in block of code by 5683464ebd5Sriastradh * carefully monitoring the swizzles. Both functions should be merged into one 5693464ebd5Sriastradh * with a proper control flow graph 5703464ebd5Sriastradh */ 5713464ebd5Sriastradhstatic GLboolean 57201e04c3fSmrg_mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx) 5733464ebd5Sriastradh{ 5743464ebd5Sriastradh GLboolean *removeInst; 5753464ebd5Sriastradh GLuint i, arg, rem = 0; 5763464ebd5Sriastradh 577af69d88dSmrg removeInst = 57801e04c3fSmrg calloc(prog->arb.NumInstructions, sizeof(GLboolean)); 5793464ebd5Sriastradh 58001e04c3fSmrg for (i = 0; i < prog->arb.NumInstructions; i++) { 58101e04c3fSmrg const struct prog_instruction *inst = prog->arb.Instructions + i; 5823464ebd5Sriastradh const GLuint index = inst->DstReg.Index; 5833464ebd5Sriastradh const GLuint mask = inst->DstReg.WriteMask; 5843464ebd5Sriastradh enum inst_use use; 5853464ebd5Sriastradh 5863464ebd5Sriastradh /* We must deactivate the pass as soon as some indirection is used */ 5873464ebd5Sriastradh if (inst->DstReg.RelAddr) 5883464ebd5Sriastradh goto done; 5893464ebd5Sriastradh for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) 5903464ebd5Sriastradh if (inst->SrcReg[arg].RelAddr) 5913464ebd5Sriastradh goto done; 5923464ebd5Sriastradh 5933464ebd5Sriastradh if (_mesa_is_flow_control_opcode(inst->Opcode) || 5943464ebd5Sriastradh _mesa_num_inst_dst_regs(inst->Opcode) == 0 || 5953464ebd5Sriastradh inst->DstReg.File != PROGRAM_TEMPORARY || 5963464ebd5Sriastradh inst->DstReg.RelAddr) 5973464ebd5Sriastradh continue; 5983464ebd5Sriastradh 5993464ebd5Sriastradh use = find_next_use(prog, i+1, index, mask); 6003464ebd5Sriastradh if (use == WRITE || use == END) 6013464ebd5Sriastradh removeInst[i] = GL_TRUE; 6023464ebd5Sriastradh } 6033464ebd5Sriastradh 60401e04c3fSmrg rem = remove_instructions(prog, removeInst, mem_ctx); 6053464ebd5Sriastradh 6063464ebd5Sriastradhdone: 6073464ebd5Sriastradh free(removeInst); 6083464ebd5Sriastradh return rem != 0; 6093464ebd5Sriastradh} 6103464ebd5Sriastradh 6113464ebd5Sriastradh 6123464ebd5Sriastradh/** 6133464ebd5Sriastradh * Try to inject the destination of mov as the destination of inst and recompute 6143464ebd5Sriastradh * the swizzles operators for the sources of inst if required. Return GL_TRUE 6153464ebd5Sriastradh * of the substitution was possible, GL_FALSE otherwise 6163464ebd5Sriastradh */ 6173464ebd5Sriastradhstatic GLboolean 6183464ebd5Sriastradh_mesa_merge_mov_into_inst(struct prog_instruction *inst, 6193464ebd5Sriastradh const struct prog_instruction *mov) 6203464ebd5Sriastradh{ 6213464ebd5Sriastradh /* Indirection table which associates destination and source components for 6223464ebd5Sriastradh * the mov instruction 6233464ebd5Sriastradh */ 6243464ebd5Sriastradh const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); 6253464ebd5Sriastradh 6263464ebd5Sriastradh /* Some components are not written by inst. We cannot remove the mov */ 6273464ebd5Sriastradh if (mask != (inst->DstReg.WriteMask & mask)) 6283464ebd5Sriastradh return GL_FALSE; 6293464ebd5Sriastradh 63001e04c3fSmrg inst->Saturate |= mov->Saturate; 631af69d88dSmrg 6323464ebd5Sriastradh /* Depending on the instruction, we may need to recompute the swizzles. 6333464ebd5Sriastradh * Also, some other instructions (like TEX) are not linear. We will only 6343464ebd5Sriastradh * consider completely active sources and destinations 6353464ebd5Sriastradh */ 6363464ebd5Sriastradh switch (inst->Opcode) { 6373464ebd5Sriastradh 6383464ebd5Sriastradh /* Carstesian instructions: we compute the swizzle */ 6393464ebd5Sriastradh case OPCODE_MOV: 6403464ebd5Sriastradh case OPCODE_MIN: 6413464ebd5Sriastradh case OPCODE_MAX: 6423464ebd5Sriastradh case OPCODE_ABS: 6433464ebd5Sriastradh case OPCODE_ADD: 6443464ebd5Sriastradh case OPCODE_MAD: 6453464ebd5Sriastradh case OPCODE_MUL: 6463464ebd5Sriastradh case OPCODE_SUB: 6473464ebd5Sriastradh { 6483464ebd5Sriastradh GLuint dst_to_src_comp[4] = {0,0,0,0}; 6493464ebd5Sriastradh GLuint dst_comp, arg; 6503464ebd5Sriastradh for (dst_comp = 0; dst_comp < 4; ++dst_comp) { 6513464ebd5Sriastradh if (mov->DstReg.WriteMask & (1 << dst_comp)) { 6523464ebd5Sriastradh const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); 65301e04c3fSmrg assert(src_comp < 4); 6543464ebd5Sriastradh dst_to_src_comp[dst_comp] = src_comp; 6553464ebd5Sriastradh } 6563464ebd5Sriastradh } 6573464ebd5Sriastradh 6583464ebd5Sriastradh /* Patch each source of the instruction */ 6593464ebd5Sriastradh for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { 6603464ebd5Sriastradh const GLuint arg_swz = inst->SrcReg[arg].Swizzle; 6613464ebd5Sriastradh inst->SrcReg[arg].Swizzle = 0; 6623464ebd5Sriastradh 6633464ebd5Sriastradh /* Reset each active component of the swizzle */ 6643464ebd5Sriastradh for (dst_comp = 0; dst_comp < 4; ++dst_comp) { 6653464ebd5Sriastradh GLuint src_comp, arg_comp; 6663464ebd5Sriastradh if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) 6673464ebd5Sriastradh continue; 6683464ebd5Sriastradh src_comp = dst_to_src_comp[dst_comp]; 66901e04c3fSmrg assert(src_comp < 4); 6703464ebd5Sriastradh arg_comp = GET_SWZ(arg_swz, src_comp); 67101e04c3fSmrg assert(arg_comp < 4); 6723464ebd5Sriastradh inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); 6733464ebd5Sriastradh } 6743464ebd5Sriastradh } 6753464ebd5Sriastradh inst->DstReg = mov->DstReg; 6763464ebd5Sriastradh return GL_TRUE; 6773464ebd5Sriastradh } 6783464ebd5Sriastradh 6793464ebd5Sriastradh /* Dot products and scalar instructions: we only change the destination */ 6803464ebd5Sriastradh case OPCODE_RCP: 6813464ebd5Sriastradh case OPCODE_SIN: 6823464ebd5Sriastradh case OPCODE_COS: 6833464ebd5Sriastradh case OPCODE_RSQ: 6843464ebd5Sriastradh case OPCODE_POW: 6853464ebd5Sriastradh case OPCODE_EX2: 6863464ebd5Sriastradh case OPCODE_LOG: 6873464ebd5Sriastradh case OPCODE_DP2: 6883464ebd5Sriastradh case OPCODE_DP3: 6893464ebd5Sriastradh case OPCODE_DP4: 6903464ebd5Sriastradh inst->DstReg = mov->DstReg; 6913464ebd5Sriastradh return GL_TRUE; 6923464ebd5Sriastradh 6933464ebd5Sriastradh /* All other instructions require fully active components with no swizzle */ 6943464ebd5Sriastradh default: 6953464ebd5Sriastradh if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || 6963464ebd5Sriastradh inst->DstReg.WriteMask != WRITEMASK_XYZW) 6973464ebd5Sriastradh return GL_FALSE; 6983464ebd5Sriastradh inst->DstReg = mov->DstReg; 6993464ebd5Sriastradh return GL_TRUE; 7003464ebd5Sriastradh } 7013464ebd5Sriastradh} 7023464ebd5Sriastradh 7033464ebd5Sriastradh 7043464ebd5Sriastradh/** 7053464ebd5Sriastradh * Try to remove extraneous MOV instructions from the given program. 7063464ebd5Sriastradh */ 7073464ebd5Sriastradhstatic GLboolean 70801e04c3fSmrg_mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx) 7093464ebd5Sriastradh{ 7103464ebd5Sriastradh GLboolean *removeInst; /* per-instruction removal flag */ 7113464ebd5Sriastradh GLuint i, rem = 0, nesting = 0; 7123464ebd5Sriastradh 7133464ebd5Sriastradh if (dbg) { 7143464ebd5Sriastradh printf("Optimize: Begin remove extra moves\n"); 7153464ebd5Sriastradh _mesa_print_program(prog); 7163464ebd5Sriastradh } 7173464ebd5Sriastradh 718af69d88dSmrg removeInst = 71901e04c3fSmrg calloc(prog->arb.NumInstructions, sizeof(GLboolean)); 7203464ebd5Sriastradh 7213464ebd5Sriastradh /* 7223464ebd5Sriastradh * Look for sequences such as this: 7233464ebd5Sriastradh * FOO tmpX, arg0, arg1; 7243464ebd5Sriastradh * MOV tmpY, tmpX; 7253464ebd5Sriastradh * and convert into: 7263464ebd5Sriastradh * FOO tmpY, arg0, arg1; 7273464ebd5Sriastradh */ 7283464ebd5Sriastradh 72901e04c3fSmrg for (i = 0; i < prog->arb.NumInstructions; i++) { 73001e04c3fSmrg const struct prog_instruction *mov = prog->arb.Instructions + i; 7313464ebd5Sriastradh 7323464ebd5Sriastradh switch (mov->Opcode) { 7333464ebd5Sriastradh case OPCODE_BGNLOOP: 7343464ebd5Sriastradh case OPCODE_BGNSUB: 7353464ebd5Sriastradh case OPCODE_IF: 7363464ebd5Sriastradh nesting++; 7373464ebd5Sriastradh break; 7383464ebd5Sriastradh case OPCODE_ENDLOOP: 7393464ebd5Sriastradh case OPCODE_ENDSUB: 7403464ebd5Sriastradh case OPCODE_ENDIF: 7413464ebd5Sriastradh nesting--; 7423464ebd5Sriastradh break; 7433464ebd5Sriastradh case OPCODE_MOV: 7443464ebd5Sriastradh if (i > 0 && 7453464ebd5Sriastradh can_downward_mov_be_modifed(mov) && 7463464ebd5Sriastradh mov->SrcReg[0].File == PROGRAM_TEMPORARY && 7473464ebd5Sriastradh nesting == 0) 7483464ebd5Sriastradh { 7493464ebd5Sriastradh 7503464ebd5Sriastradh /* see if this MOV can be removed */ 7513464ebd5Sriastradh const GLuint id = mov->SrcReg[0].Index; 7523464ebd5Sriastradh struct prog_instruction *prevInst; 7533464ebd5Sriastradh GLuint prevI; 7543464ebd5Sriastradh 7553464ebd5Sriastradh /* get pointer to previous instruction */ 7563464ebd5Sriastradh prevI = i - 1; 7573464ebd5Sriastradh while (prevI > 0 && removeInst[prevI]) 7583464ebd5Sriastradh prevI--; 75901e04c3fSmrg prevInst = prog->arb.Instructions + prevI; 7603464ebd5Sriastradh 7613464ebd5Sriastradh if (prevInst->DstReg.File == PROGRAM_TEMPORARY && 7623464ebd5Sriastradh prevInst->DstReg.Index == id && 76301e04c3fSmrg prevInst->DstReg.RelAddr == 0) { 7643464ebd5Sriastradh 7653464ebd5Sriastradh const GLuint dst_mask = prevInst->DstReg.WriteMask; 7663464ebd5Sriastradh enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); 7673464ebd5Sriastradh 7683464ebd5Sriastradh if (next_use == WRITE || next_use == END) { 7693464ebd5Sriastradh /* OK, we can safely remove this MOV instruction. 7703464ebd5Sriastradh * Transform: 7713464ebd5Sriastradh * prevI: FOO tempIndex, x, y; 7723464ebd5Sriastradh * i: MOV z, tempIndex; 7733464ebd5Sriastradh * Into: 7743464ebd5Sriastradh * prevI: FOO z, x, y; 7753464ebd5Sriastradh */ 7763464ebd5Sriastradh if (_mesa_merge_mov_into_inst(prevInst, mov)) { 7773464ebd5Sriastradh removeInst[i] = GL_TRUE; 7783464ebd5Sriastradh if (dbg) { 7793464ebd5Sriastradh printf("Remove MOV at %u\n", i); 7803464ebd5Sriastradh printf("new prev inst %u: ", prevI); 7813464ebd5Sriastradh _mesa_print_instruction(prevInst); 7823464ebd5Sriastradh } 7833464ebd5Sriastradh } 7843464ebd5Sriastradh } 7853464ebd5Sriastradh } 7863464ebd5Sriastradh } 7873464ebd5Sriastradh break; 7883464ebd5Sriastradh default: 7893464ebd5Sriastradh ; /* nothing */ 7903464ebd5Sriastradh } 7913464ebd5Sriastradh } 7923464ebd5Sriastradh 7933464ebd5Sriastradh /* now remove the instructions which aren't needed */ 79401e04c3fSmrg rem = remove_instructions(prog, removeInst, mem_ctx); 7953464ebd5Sriastradh 7963464ebd5Sriastradh free(removeInst); 7973464ebd5Sriastradh 7983464ebd5Sriastradh if (dbg) { 7993464ebd5Sriastradh printf("Optimize: End remove extra moves. %u instructions removed\n", rem); 8003464ebd5Sriastradh /*_mesa_print_program(prog);*/ 8013464ebd5Sriastradh } 8023464ebd5Sriastradh 8033464ebd5Sriastradh return rem != 0; 8043464ebd5Sriastradh} 8053464ebd5Sriastradh 8063464ebd5Sriastradh 8073464ebd5Sriastradh/** A live register interval */ 8083464ebd5Sriastradhstruct interval 8093464ebd5Sriastradh{ 8103464ebd5Sriastradh GLuint Reg; /** The temporary register index */ 8113464ebd5Sriastradh GLuint Start, End; /** Start/end instruction numbers */ 8123464ebd5Sriastradh}; 8133464ebd5Sriastradh 8143464ebd5Sriastradh 8153464ebd5Sriastradh/** A list of register intervals */ 8163464ebd5Sriastradhstruct interval_list 8173464ebd5Sriastradh{ 8183464ebd5Sriastradh GLuint Num; 8193464ebd5Sriastradh struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 8203464ebd5Sriastradh}; 8213464ebd5Sriastradh 8223464ebd5Sriastradh 8233464ebd5Sriastradhstatic void 8243464ebd5Sriastradhappend_interval(struct interval_list *list, const struct interval *inv) 8253464ebd5Sriastradh{ 8263464ebd5Sriastradh list->Intervals[list->Num++] = *inv; 8273464ebd5Sriastradh} 8283464ebd5Sriastradh 8293464ebd5Sriastradh 8303464ebd5Sriastradh/** Insert interval inv into list, sorted by interval end */ 8313464ebd5Sriastradhstatic void 8323464ebd5Sriastradhinsert_interval_by_end(struct interval_list *list, const struct interval *inv) 8333464ebd5Sriastradh{ 8343464ebd5Sriastradh /* XXX we could do a binary search insertion here since list is sorted */ 8353464ebd5Sriastradh GLint i = list->Num - 1; 8363464ebd5Sriastradh while (i >= 0 && list->Intervals[i].End > inv->End) { 8373464ebd5Sriastradh list->Intervals[i + 1] = list->Intervals[i]; 8383464ebd5Sriastradh i--; 8393464ebd5Sriastradh } 8403464ebd5Sriastradh list->Intervals[i + 1] = *inv; 8413464ebd5Sriastradh list->Num++; 8423464ebd5Sriastradh 8433464ebd5Sriastradh#ifdef DEBUG 8443464ebd5Sriastradh { 8453464ebd5Sriastradh GLuint i; 8463464ebd5Sriastradh for (i = 0; i + 1 < list->Num; i++) { 84701e04c3fSmrg assert(list->Intervals[i].End <= list->Intervals[i + 1].End); 8483464ebd5Sriastradh } 8493464ebd5Sriastradh } 8503464ebd5Sriastradh#endif 8513464ebd5Sriastradh} 8523464ebd5Sriastradh 8533464ebd5Sriastradh 8543464ebd5Sriastradh/** Remove the given interval from the interval list */ 8553464ebd5Sriastradhstatic void 8563464ebd5Sriastradhremove_interval(struct interval_list *list, const struct interval *inv) 8573464ebd5Sriastradh{ 8583464ebd5Sriastradh /* XXX we could binary search since list is sorted */ 8593464ebd5Sriastradh GLuint k; 8603464ebd5Sriastradh for (k = 0; k < list->Num; k++) { 8613464ebd5Sriastradh if (list->Intervals[k].Reg == inv->Reg) { 8623464ebd5Sriastradh /* found, remove it */ 86301e04c3fSmrg assert(list->Intervals[k].Start == inv->Start); 86401e04c3fSmrg assert(list->Intervals[k].End == inv->End); 8653464ebd5Sriastradh while (k < list->Num - 1) { 8663464ebd5Sriastradh list->Intervals[k] = list->Intervals[k + 1]; 8673464ebd5Sriastradh k++; 8683464ebd5Sriastradh } 8693464ebd5Sriastradh list->Num--; 8703464ebd5Sriastradh return; 8713464ebd5Sriastradh } 8723464ebd5Sriastradh } 8733464ebd5Sriastradh} 8743464ebd5Sriastradh 8753464ebd5Sriastradh 8763464ebd5Sriastradh/** called by qsort() */ 8773464ebd5Sriastradhstatic int 8783464ebd5Sriastradhcompare_start(const void *a, const void *b) 8793464ebd5Sriastradh{ 8803464ebd5Sriastradh const struct interval *ia = (const struct interval *) a; 8813464ebd5Sriastradh const struct interval *ib = (const struct interval *) b; 8823464ebd5Sriastradh if (ia->Start < ib->Start) 8833464ebd5Sriastradh return -1; 8843464ebd5Sriastradh else if (ia->Start > ib->Start) 8853464ebd5Sriastradh return +1; 8863464ebd5Sriastradh else 8873464ebd5Sriastradh return 0; 8883464ebd5Sriastradh} 8893464ebd5Sriastradh 8903464ebd5Sriastradh 8913464ebd5Sriastradh/** sort the interval list according to interval starts */ 8923464ebd5Sriastradhstatic void 8933464ebd5Sriastradhsort_interval_list_by_start(struct interval_list *list) 8943464ebd5Sriastradh{ 8953464ebd5Sriastradh qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); 8963464ebd5Sriastradh#ifdef DEBUG 8973464ebd5Sriastradh { 8983464ebd5Sriastradh GLuint i; 8993464ebd5Sriastradh for (i = 0; i + 1 < list->Num; i++) { 90001e04c3fSmrg assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start); 9013464ebd5Sriastradh } 9023464ebd5Sriastradh } 9033464ebd5Sriastradh#endif 9043464ebd5Sriastradh} 9053464ebd5Sriastradh 9063464ebd5Sriastradhstruct loop_info 9073464ebd5Sriastradh{ 9083464ebd5Sriastradh GLuint Start, End; /**< Start, end instructions of loop */ 9093464ebd5Sriastradh}; 9103464ebd5Sriastradh 9113464ebd5Sriastradh/** 9123464ebd5Sriastradh * Update the intermediate interval info for register 'index' and 9133464ebd5Sriastradh * instruction 'ic'. 9143464ebd5Sriastradh */ 9153464ebd5Sriastradhstatic void 9163464ebd5Sriastradhupdate_interval(GLint intBegin[], GLint intEnd[], 9173464ebd5Sriastradh struct loop_info *loopStack, GLuint loopStackDepth, 9183464ebd5Sriastradh GLuint index, GLuint ic) 9193464ebd5Sriastradh{ 92001e04c3fSmrg unsigned i; 9213464ebd5Sriastradh GLuint begin = ic; 9223464ebd5Sriastradh GLuint end = ic; 9233464ebd5Sriastradh 9243464ebd5Sriastradh /* If the register is used in a loop, extend its lifetime through the end 9253464ebd5Sriastradh * of the outermost loop that doesn't contain its definition. 9263464ebd5Sriastradh */ 9273464ebd5Sriastradh for (i = 0; i < loopStackDepth; i++) { 9283464ebd5Sriastradh if (intBegin[index] < loopStack[i].Start) { 9293464ebd5Sriastradh end = loopStack[i].End; 9303464ebd5Sriastradh break; 9313464ebd5Sriastradh } 9323464ebd5Sriastradh } 9333464ebd5Sriastradh 9343464ebd5Sriastradh /* Variables that are live at the end of a loop will also be live at the 9353464ebd5Sriastradh * beginning, so an instruction inside of a loop should have its live 9363464ebd5Sriastradh * interval begin at the start of the outermost loop. 9373464ebd5Sriastradh */ 9383464ebd5Sriastradh if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) { 9393464ebd5Sriastradh begin = loopStack[0].Start; 9403464ebd5Sriastradh } 9413464ebd5Sriastradh 94201e04c3fSmrg assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 9433464ebd5Sriastradh if (intBegin[index] == -1) { 94401e04c3fSmrg assert(intEnd[index] == -1); 9453464ebd5Sriastradh intBegin[index] = begin; 9463464ebd5Sriastradh intEnd[index] = end; 9473464ebd5Sriastradh } 9483464ebd5Sriastradh else { 9493464ebd5Sriastradh intEnd[index] = end; 9503464ebd5Sriastradh } 9513464ebd5Sriastradh} 9523464ebd5Sriastradh 9533464ebd5Sriastradh 9543464ebd5Sriastradh/** 9553464ebd5Sriastradh * Find first/last instruction that references each temporary register. 9563464ebd5Sriastradh */ 9577ec681f3Smrgstatic bool 9587ec681f3Smrgfind_temp_intervals(const struct prog_instruction *instructions, 9597ec681f3Smrg GLuint numInstructions, 9607ec681f3Smrg GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS], 9617ec681f3Smrg GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) 9623464ebd5Sriastradh{ 9633464ebd5Sriastradh struct loop_info loopStack[MAX_LOOP_NESTING]; 9643464ebd5Sriastradh GLuint loopStackDepth = 0; 9653464ebd5Sriastradh GLuint i; 9663464ebd5Sriastradh 9673464ebd5Sriastradh for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ 9683464ebd5Sriastradh intBegin[i] = intEnd[i] = -1; 9693464ebd5Sriastradh } 9703464ebd5Sriastradh 9713464ebd5Sriastradh /* Scan instructions looking for temporary registers */ 9723464ebd5Sriastradh for (i = 0; i < numInstructions; i++) { 9733464ebd5Sriastradh const struct prog_instruction *inst = instructions + i; 9743464ebd5Sriastradh if (inst->Opcode == OPCODE_BGNLOOP) { 9753464ebd5Sriastradh loopStack[loopStackDepth].Start = i; 9763464ebd5Sriastradh loopStack[loopStackDepth].End = inst->BranchTarget; 9773464ebd5Sriastradh loopStackDepth++; 9783464ebd5Sriastradh } 9793464ebd5Sriastradh else if (inst->Opcode == OPCODE_ENDLOOP) { 9803464ebd5Sriastradh loopStackDepth--; 9813464ebd5Sriastradh } 9823464ebd5Sriastradh else if (inst->Opcode == OPCODE_CAL) { 9833464ebd5Sriastradh return GL_FALSE; 9843464ebd5Sriastradh } 9853464ebd5Sriastradh else { 9863464ebd5Sriastradh const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ 9873464ebd5Sriastradh GLuint j; 9883464ebd5Sriastradh for (j = 0; j < numSrc; j++) { 9893464ebd5Sriastradh if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { 9903464ebd5Sriastradh const GLuint index = inst->SrcReg[j].Index; 9913464ebd5Sriastradh if (inst->SrcReg[j].RelAddr) 9923464ebd5Sriastradh return GL_FALSE; 9933464ebd5Sriastradh update_interval(intBegin, intEnd, loopStack, loopStackDepth, 9943464ebd5Sriastradh index, i); 9953464ebd5Sriastradh } 9963464ebd5Sriastradh } 9973464ebd5Sriastradh if (inst->DstReg.File == PROGRAM_TEMPORARY) { 9983464ebd5Sriastradh const GLuint index = inst->DstReg.Index; 9993464ebd5Sriastradh if (inst->DstReg.RelAddr) 10003464ebd5Sriastradh return GL_FALSE; 10013464ebd5Sriastradh update_interval(intBegin, intEnd, loopStack, loopStackDepth, 10023464ebd5Sriastradh index, i); 10033464ebd5Sriastradh } 10043464ebd5Sriastradh } 10053464ebd5Sriastradh } 10063464ebd5Sriastradh 10073464ebd5Sriastradh return GL_TRUE; 10083464ebd5Sriastradh} 10093464ebd5Sriastradh 10103464ebd5Sriastradh 10113464ebd5Sriastradh/** 10123464ebd5Sriastradh * Find the live intervals for each temporary register in the program. 10133464ebd5Sriastradh * For register R, the interval [A,B] indicates that R is referenced 10143464ebd5Sriastradh * from instruction A through instruction B. 10153464ebd5Sriastradh * Special consideration is needed for loops and subroutines. 10163464ebd5Sriastradh * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason 10173464ebd5Sriastradh */ 10183464ebd5Sriastradhstatic GLboolean 10193464ebd5Sriastradhfind_live_intervals(struct gl_program *prog, 10203464ebd5Sriastradh struct interval_list *liveIntervals) 10213464ebd5Sriastradh{ 10223464ebd5Sriastradh GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 10233464ebd5Sriastradh GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 10243464ebd5Sriastradh GLuint i; 10253464ebd5Sriastradh 10263464ebd5Sriastradh /* 10273464ebd5Sriastradh * Note: we'll return GL_FALSE below if we find relative indexing 10283464ebd5Sriastradh * into the TEMP register file. We can't handle that yet. 10293464ebd5Sriastradh * We also give up on subroutines for now. 10303464ebd5Sriastradh */ 10313464ebd5Sriastradh 10323464ebd5Sriastradh if (dbg) { 10333464ebd5Sriastradh printf("Optimize: Begin find intervals\n"); 10343464ebd5Sriastradh } 10353464ebd5Sriastradh 10363464ebd5Sriastradh /* build intermediate arrays */ 10377ec681f3Smrg if (!find_temp_intervals(prog->arb.Instructions, prog->arb.NumInstructions, 10387ec681f3Smrg intBegin, intEnd)) 10393464ebd5Sriastradh return GL_FALSE; 10403464ebd5Sriastradh 10413464ebd5Sriastradh /* Build live intervals list from intermediate arrays */ 10423464ebd5Sriastradh liveIntervals->Num = 0; 10433464ebd5Sriastradh for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { 10443464ebd5Sriastradh if (intBegin[i] >= 0) { 10453464ebd5Sriastradh struct interval inv; 10463464ebd5Sriastradh inv.Reg = i; 10473464ebd5Sriastradh inv.Start = intBegin[i]; 10483464ebd5Sriastradh inv.End = intEnd[i]; 10493464ebd5Sriastradh append_interval(liveIntervals, &inv); 10503464ebd5Sriastradh } 10513464ebd5Sriastradh } 10523464ebd5Sriastradh 10533464ebd5Sriastradh /* Sort the list according to interval starts */ 10543464ebd5Sriastradh sort_interval_list_by_start(liveIntervals); 10553464ebd5Sriastradh 10563464ebd5Sriastradh if (dbg) { 10573464ebd5Sriastradh /* print interval info */ 10583464ebd5Sriastradh for (i = 0; i < liveIntervals->Num; i++) { 10593464ebd5Sriastradh const struct interval *inv = liveIntervals->Intervals + i; 10603464ebd5Sriastradh printf("Reg[%d] live [%d, %d]:", 10613464ebd5Sriastradh inv->Reg, inv->Start, inv->End); 10623464ebd5Sriastradh if (1) { 10633464ebd5Sriastradh GLuint j; 10643464ebd5Sriastradh for (j = 0; j < inv->Start; j++) 10653464ebd5Sriastradh printf(" "); 10663464ebd5Sriastradh for (j = inv->Start; j <= inv->End; j++) 10673464ebd5Sriastradh printf("x"); 10683464ebd5Sriastradh } 10693464ebd5Sriastradh printf("\n"); 10703464ebd5Sriastradh } 10713464ebd5Sriastradh } 10723464ebd5Sriastradh 10733464ebd5Sriastradh return GL_TRUE; 10743464ebd5Sriastradh} 10753464ebd5Sriastradh 10763464ebd5Sriastradh 10773464ebd5Sriastradh/** Scan the array of used register flags to find free entry */ 10783464ebd5Sriastradhstatic GLint 10793464ebd5Sriastradhalloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) 10803464ebd5Sriastradh{ 10813464ebd5Sriastradh GLuint k; 10823464ebd5Sriastradh for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) { 10833464ebd5Sriastradh if (!usedRegs[k]) { 10843464ebd5Sriastradh usedRegs[k] = GL_TRUE; 10853464ebd5Sriastradh return k; 10863464ebd5Sriastradh } 10873464ebd5Sriastradh } 10883464ebd5Sriastradh return -1; 10893464ebd5Sriastradh} 10903464ebd5Sriastradh 10913464ebd5Sriastradh 10923464ebd5Sriastradh/** 10933464ebd5Sriastradh * This function implements "Linear Scan Register Allocation" to reduce 10943464ebd5Sriastradh * the number of temporary registers used by the program. 10953464ebd5Sriastradh * 10963464ebd5Sriastradh * We compute the "live interval" for all temporary registers then 10973464ebd5Sriastradh * examine the overlap of the intervals to allocate new registers. 10983464ebd5Sriastradh * Basically, if two intervals do not overlap, they can use the same register. 10993464ebd5Sriastradh */ 11003464ebd5Sriastradhstatic void 11013464ebd5Sriastradh_mesa_reallocate_registers(struct gl_program *prog) 11023464ebd5Sriastradh{ 11033464ebd5Sriastradh struct interval_list liveIntervals; 11043464ebd5Sriastradh GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 11053464ebd5Sriastradh GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 11063464ebd5Sriastradh GLuint i; 11073464ebd5Sriastradh GLint maxTemp = -1; 11083464ebd5Sriastradh 11093464ebd5Sriastradh if (dbg) { 11103464ebd5Sriastradh printf("Optimize: Begin live-interval register reallocation\n"); 11113464ebd5Sriastradh _mesa_print_program(prog); 11123464ebd5Sriastradh } 11133464ebd5Sriastradh 11143464ebd5Sriastradh for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ 11153464ebd5Sriastradh registerMap[i] = -1; 11163464ebd5Sriastradh usedRegs[i] = GL_FALSE; 11173464ebd5Sriastradh } 11183464ebd5Sriastradh 11193464ebd5Sriastradh if (!find_live_intervals(prog, &liveIntervals)) { 11203464ebd5Sriastradh if (dbg) 11213464ebd5Sriastradh printf("Aborting register reallocation\n"); 11223464ebd5Sriastradh return; 11233464ebd5Sriastradh } 11243464ebd5Sriastradh 11253464ebd5Sriastradh { 11263464ebd5Sriastradh struct interval_list activeIntervals; 11273464ebd5Sriastradh activeIntervals.Num = 0; 11283464ebd5Sriastradh 11293464ebd5Sriastradh /* loop over live intervals, allocating a new register for each */ 11303464ebd5Sriastradh for (i = 0; i < liveIntervals.Num; i++) { 11313464ebd5Sriastradh const struct interval *live = liveIntervals.Intervals + i; 11323464ebd5Sriastradh 11333464ebd5Sriastradh if (dbg) 11343464ebd5Sriastradh printf("Consider register %u\n", live->Reg); 11353464ebd5Sriastradh 11363464ebd5Sriastradh /* Expire old intervals. Intervals which have ended with respect 11373464ebd5Sriastradh * to the live interval can have their remapped registers freed. 11383464ebd5Sriastradh */ 11393464ebd5Sriastradh { 11403464ebd5Sriastradh GLint j; 11413464ebd5Sriastradh for (j = 0; j < (GLint) activeIntervals.Num; j++) { 11423464ebd5Sriastradh const struct interval *inv = activeIntervals.Intervals + j; 11433464ebd5Sriastradh if (inv->End >= live->Start) { 11443464ebd5Sriastradh /* Stop now. Since the activeInterval list is sorted 11453464ebd5Sriastradh * we know we don't have to go further. 11463464ebd5Sriastradh */ 11473464ebd5Sriastradh break; 11483464ebd5Sriastradh } 11493464ebd5Sriastradh else { 11503464ebd5Sriastradh /* Interval 'inv' has expired */ 11513464ebd5Sriastradh const GLint regNew = registerMap[inv->Reg]; 115201e04c3fSmrg assert(regNew >= 0); 11533464ebd5Sriastradh 11543464ebd5Sriastradh if (dbg) 11553464ebd5Sriastradh printf(" expire interval for reg %u\n", inv->Reg); 11563464ebd5Sriastradh 11573464ebd5Sriastradh /* remove interval j from active list */ 11583464ebd5Sriastradh remove_interval(&activeIntervals, inv); 11593464ebd5Sriastradh j--; /* counter-act j++ in for-loop above */ 11603464ebd5Sriastradh 11613464ebd5Sriastradh /* return register regNew to the free pool */ 11623464ebd5Sriastradh if (dbg) 11633464ebd5Sriastradh printf(" free reg %d\n", regNew); 116401e04c3fSmrg assert(usedRegs[regNew] == GL_TRUE); 11653464ebd5Sriastradh usedRegs[regNew] = GL_FALSE; 11663464ebd5Sriastradh } 11673464ebd5Sriastradh } 11683464ebd5Sriastradh } 11693464ebd5Sriastradh 11703464ebd5Sriastradh /* find a free register for this live interval */ 11713464ebd5Sriastradh { 11723464ebd5Sriastradh const GLint k = alloc_register(usedRegs); 11733464ebd5Sriastradh if (k < 0) { 11743464ebd5Sriastradh /* out of registers, give up */ 11753464ebd5Sriastradh return; 11763464ebd5Sriastradh } 11773464ebd5Sriastradh registerMap[live->Reg] = k; 11783464ebd5Sriastradh maxTemp = MAX2(maxTemp, k); 11793464ebd5Sriastradh if (dbg) 11803464ebd5Sriastradh printf(" remap register %u -> %d\n", live->Reg, k); 11813464ebd5Sriastradh } 11823464ebd5Sriastradh 11833464ebd5Sriastradh /* Insert this live interval into the active list which is sorted 11843464ebd5Sriastradh * by increasing end points. 11853464ebd5Sriastradh */ 11863464ebd5Sriastradh insert_interval_by_end(&activeIntervals, live); 11873464ebd5Sriastradh } 11883464ebd5Sriastradh } 11893464ebd5Sriastradh 11903464ebd5Sriastradh if (maxTemp + 1 < (GLint) liveIntervals.Num) { 11913464ebd5Sriastradh /* OK, we've reduced the number of registers needed. 11923464ebd5Sriastradh * Scan the program and replace all the old temporary register 11933464ebd5Sriastradh * indexes with the new indexes. 11943464ebd5Sriastradh */ 11953464ebd5Sriastradh replace_regs(prog, PROGRAM_TEMPORARY, registerMap); 11963464ebd5Sriastradh 119701e04c3fSmrg prog->arb.NumTemporaries = maxTemp + 1; 11983464ebd5Sriastradh } 11993464ebd5Sriastradh 12003464ebd5Sriastradh if (dbg) { 12013464ebd5Sriastradh printf("Optimize: End live-interval register reallocation\n"); 12023464ebd5Sriastradh printf("Num temp regs before: %u after: %u\n", 12033464ebd5Sriastradh liveIntervals.Num, maxTemp + 1); 12043464ebd5Sriastradh _mesa_print_program(prog); 12053464ebd5Sriastradh } 12063464ebd5Sriastradh} 12073464ebd5Sriastradh 12083464ebd5Sriastradh 12093464ebd5Sriastradh#if 0 12103464ebd5Sriastradhstatic void 12113464ebd5Sriastradhprint_it(struct gl_context *ctx, struct gl_program *program, const char *txt) { 121201e04c3fSmrg fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions); 12133464ebd5Sriastradh _mesa_print_program(program); 12143464ebd5Sriastradh _mesa_print_program_parameters(ctx, program); 12153464ebd5Sriastradh fprintf(stderr, "\n\n"); 12163464ebd5Sriastradh} 12173464ebd5Sriastradh#endif 12183464ebd5Sriastradh 12193464ebd5Sriastradh/** 12203464ebd5Sriastradh * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 12213464ebd5Sriastradh * instruction is the first instruction to write to register T0. The are 12223464ebd5Sriastradh * several lowering passes done in GLSL IR (e.g. branches and 12233464ebd5Sriastradh * relative addressing) that create a large number of conditional assignments 12243464ebd5Sriastradh * that ir_to_mesa converts to CMP instructions like the one mentioned above. 12253464ebd5Sriastradh * 12263464ebd5Sriastradh * Here is why this conversion is safe: 12273464ebd5Sriastradh * CMP T0, T1 T2 T0 can be expanded to: 12283464ebd5Sriastradh * if (T1 < 0.0) 12293464ebd5Sriastradh * MOV T0, T2; 12303464ebd5Sriastradh * else 12313464ebd5Sriastradh * MOV T0, T0; 12323464ebd5Sriastradh * 12333464ebd5Sriastradh * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 12343464ebd5Sriastradh * as the original program. If (T1 < 0.0) evaluates to false, executing 12353464ebd5Sriastradh * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 12363464ebd5Sriastradh * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 12373464ebd5Sriastradh * because any instruction that was going to read from T0 after this was going 12383464ebd5Sriastradh * to read a garbage value anyway. 12393464ebd5Sriastradh */ 12403464ebd5Sriastradhstatic void 12413464ebd5Sriastradh_mesa_simplify_cmp(struct gl_program * program) 12423464ebd5Sriastradh{ 12433464ebd5Sriastradh GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; 12443464ebd5Sriastradh GLuint outputWrites[MAX_PROGRAM_OUTPUTS]; 12453464ebd5Sriastradh GLuint i; 12463464ebd5Sriastradh 12473464ebd5Sriastradh if (dbg) { 12483464ebd5Sriastradh printf("Optimize: Begin reads without writes\n"); 12493464ebd5Sriastradh _mesa_print_program(program); 12503464ebd5Sriastradh } 12513464ebd5Sriastradh 12523464ebd5Sriastradh for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { 12533464ebd5Sriastradh tempWrites[i] = 0; 12543464ebd5Sriastradh } 12553464ebd5Sriastradh 12563464ebd5Sriastradh for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) { 12573464ebd5Sriastradh outputWrites[i] = 0; 12583464ebd5Sriastradh } 12593464ebd5Sriastradh 126001e04c3fSmrg for (i = 0; i < program->arb.NumInstructions; i++) { 126101e04c3fSmrg struct prog_instruction *inst = program->arb.Instructions + i; 12623464ebd5Sriastradh GLuint prevWriteMask; 12633464ebd5Sriastradh 12643464ebd5Sriastradh /* Give up if we encounter relative addressing or flow control. */ 12653464ebd5Sriastradh if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) { 12663464ebd5Sriastradh return; 12673464ebd5Sriastradh } 12683464ebd5Sriastradh 12693464ebd5Sriastradh if (inst->DstReg.File == PROGRAM_OUTPUT) { 12703464ebd5Sriastradh assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS); 12713464ebd5Sriastradh prevWriteMask = outputWrites[inst->DstReg.Index]; 12723464ebd5Sriastradh outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; 12733464ebd5Sriastradh } else if (inst->DstReg.File == PROGRAM_TEMPORARY) { 12743464ebd5Sriastradh assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); 12753464ebd5Sriastradh prevWriteMask = tempWrites[inst->DstReg.Index]; 12763464ebd5Sriastradh tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; 12773464ebd5Sriastradh } else { 12783464ebd5Sriastradh /* No other register type can be a destination register. */ 12793464ebd5Sriastradh continue; 12803464ebd5Sriastradh } 12813464ebd5Sriastradh 12823464ebd5Sriastradh /* For a CMP to be considered a conditional write, the destination 12833464ebd5Sriastradh * register and source register two must be the same. */ 12843464ebd5Sriastradh if (inst->Opcode == OPCODE_CMP 12853464ebd5Sriastradh && !(inst->DstReg.WriteMask & prevWriteMask) 12863464ebd5Sriastradh && inst->SrcReg[2].File == inst->DstReg.File 12873464ebd5Sriastradh && inst->SrcReg[2].Index == inst->DstReg.Index 12883464ebd5Sriastradh && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) { 12893464ebd5Sriastradh 12903464ebd5Sriastradh inst->Opcode = OPCODE_MOV; 12913464ebd5Sriastradh inst->SrcReg[0] = inst->SrcReg[1]; 12923464ebd5Sriastradh 12933464ebd5Sriastradh /* Unused operands are expected to have the file set to 12943464ebd5Sriastradh * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes 12953464ebd5Sriastradh * all of the sources. 12963464ebd5Sriastradh */ 12973464ebd5Sriastradh inst->SrcReg[1].File = PROGRAM_UNDEFINED; 12983464ebd5Sriastradh inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 12993464ebd5Sriastradh inst->SrcReg[2].File = PROGRAM_UNDEFINED; 13003464ebd5Sriastradh inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; 13013464ebd5Sriastradh } 13023464ebd5Sriastradh } 13033464ebd5Sriastradh if (dbg) { 13043464ebd5Sriastradh printf("Optimize: End reads without writes\n"); 13053464ebd5Sriastradh _mesa_print_program(program); 13063464ebd5Sriastradh } 13073464ebd5Sriastradh} 13083464ebd5Sriastradh 13093464ebd5Sriastradh/** 13103464ebd5Sriastradh * Apply optimizations to the given program to eliminate unnecessary 13113464ebd5Sriastradh * instructions, temp regs, etc. 13123464ebd5Sriastradh */ 13133464ebd5Sriastradhvoid 131401e04c3fSmrg_mesa_optimize_program(struct gl_program *program, void *mem_ctx) 13153464ebd5Sriastradh{ 13163464ebd5Sriastradh GLboolean any_change; 13173464ebd5Sriastradh 13183464ebd5Sriastradh _mesa_simplify_cmp(program); 13193464ebd5Sriastradh /* Stop when no modifications were output */ 13203464ebd5Sriastradh do { 13213464ebd5Sriastradh any_change = GL_FALSE; 13223464ebd5Sriastradh _mesa_remove_extra_move_use(program); 132301e04c3fSmrg if (_mesa_remove_dead_code_global(program, mem_ctx)) 13243464ebd5Sriastradh any_change = GL_TRUE; 132501e04c3fSmrg if (_mesa_remove_extra_moves(program, mem_ctx)) 13263464ebd5Sriastradh any_change = GL_TRUE; 132701e04c3fSmrg if (_mesa_remove_dead_code_local(program, mem_ctx)) 13283464ebd5Sriastradh any_change = GL_TRUE; 1329af69d88dSmrg 1330af69d88dSmrg any_change = _mesa_constant_fold(program) || any_change; 13313464ebd5Sriastradh _mesa_reallocate_registers(program); 13323464ebd5Sriastradh } while (any_change); 13333464ebd5Sriastradh} 13343464ebd5Sriastradh 1335