1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24
25
26#include "main/glheader.h"
27#include "main/context.h"
28#include "main/macros.h"
29#include "program.h"
30#include "prog_instruction.h"
31#include "prog_optimize.h"
32#include "prog_print.h"
33
34
35#define MAX_LOOP_NESTING 50
36/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
37 * register allocate many temporary values into that small number of
38 * temps.  So allow large temporary indices coming into the register
39 * allocator.
40 */
41#define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
42
43static GLboolean dbg = GL_FALSE;
44
45#define NO_MASK 0xf
46
47/**
48 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
49 * are read from the given src in this instruction, We also provide
50 * one optional masks which may mask other components in the dst
51 * register
52 */
53static GLuint
54get_src_arg_mask(const struct prog_instruction *inst,
55                 GLuint arg, GLuint dst_mask)
56{
57   GLuint read_mask, channel_mask;
58   GLuint comp;
59
60   assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
61
62   /* Form the dst register, find the written channels */
63   switch (inst->Opcode) {
64   case OPCODE_MOV:
65   case OPCODE_MIN:
66   case OPCODE_MAX:
67   case OPCODE_ABS:
68   case OPCODE_ADD:
69   case OPCODE_MAD:
70   case OPCODE_MUL:
71   case OPCODE_SUB:
72   case OPCODE_CMP:
73   case OPCODE_FLR:
74   case OPCODE_FRC:
75   case OPCODE_LRP:
76   case OPCODE_SGE:
77   case OPCODE_SLT:
78   case OPCODE_SSG:
79      channel_mask = inst->DstReg.WriteMask & dst_mask;
80      break;
81   case OPCODE_RCP:
82   case OPCODE_SIN:
83   case OPCODE_COS:
84   case OPCODE_RSQ:
85   case OPCODE_POW:
86   case OPCODE_EX2:
87   case OPCODE_LOG:
88      channel_mask = WRITEMASK_X;
89      break;
90   case OPCODE_DP2:
91      channel_mask = WRITEMASK_XY;
92      break;
93   case OPCODE_DP3:
94   case OPCODE_XPD:
95      channel_mask = WRITEMASK_XYZ;
96      break;
97   default:
98      channel_mask = WRITEMASK_XYZW;
99      break;
100   }
101
102   /* Now, given the src swizzle and the written channels, find which
103    * components are actually read
104    */
105   read_mask = 0x0;
106   for (comp = 0; comp < 4; ++comp) {
107      const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
108      if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
109         read_mask |= 1 << coord;
110   }
111
112   return read_mask;
113}
114
115
116/**
117 * For a MOV instruction, compute a write mask when src register also has
118 * a mask
119 */
120static GLuint
121get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
122{
123   const GLuint mask = mov->DstReg.WriteMask;
124   GLuint comp;
125   GLuint updated_mask = 0x0;
126
127   assert(mov->Opcode == OPCODE_MOV);
128
129   for (comp = 0; comp < 4; ++comp) {
130      GLuint src_comp;
131      if ((mask & (1 << comp)) == 0)
132         continue;
133      src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
134      if ((src_mask & (1 << src_comp)) == 0)
135         continue;
136      updated_mask |= 1 << comp;
137   }
138
139   return updated_mask;
140}
141
142
143/**
144 * Ensure that the swizzle is regular.  That is, all of the swizzle
145 * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
146 */
147static GLboolean
148is_swizzle_regular(GLuint swz)
149{
150   return GET_SWZ(swz,0) <= SWIZZLE_W &&
151          GET_SWZ(swz,1) <= SWIZZLE_W &&
152          GET_SWZ(swz,2) <= SWIZZLE_W &&
153          GET_SWZ(swz,3) <= SWIZZLE_W;
154}
155
156
157/**
158 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
159 * \return number of instructions removed
160 */
161static GLuint
162remove_instructions(struct gl_program *prog, const GLboolean *removeFlags,
163                    void *mem_ctx)
164{
165   GLint i, removeEnd = 0, removeCount = 0;
166   GLuint totalRemoved = 0;
167
168   /* go backward */
169   for (i = prog->arb.NumInstructions - 1; i >= 0; i--) {
170      if (removeFlags[i]) {
171         totalRemoved++;
172         if (removeCount == 0) {
173            /* begin a run of instructions to remove */
174            removeEnd = i;
175            removeCount = 1;
176         }
177         else {
178            /* extend the run of instructions to remove */
179            removeCount++;
180         }
181      }
182      else {
183         /* don't remove this instruction, but check if the preceeding
184          * instructions are to be removed.
185          */
186         if (removeCount > 0) {
187            GLint removeStart = removeEnd - removeCount + 1;
188            _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
189            removeStart = removeCount = 0; /* reset removal info */
190         }
191      }
192   }
193   /* Finish removing if the first instruction was to be removed. */
194   if (removeCount > 0) {
195      GLint removeStart = removeEnd - removeCount + 1;
196      _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
197   }
198   return totalRemoved;
199}
200
201
202/**
203 * Remap register indexes according to map.
204 * \param prog  the program to search/replace
205 * \param file  the type of register file to search/replace
206 * \param map  maps old register indexes to new indexes
207 */
208static void
209replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
210{
211   GLuint i;
212
213   for (i = 0; i < prog->arb.NumInstructions; i++) {
214      struct prog_instruction *inst = prog->arb.Instructions + i;
215      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
216      GLuint j;
217      for (j = 0; j < numSrc; j++) {
218         if (inst->SrcReg[j].File == file) {
219            GLuint index = inst->SrcReg[j].Index;
220            assert(map[index] >= 0);
221            inst->SrcReg[j].Index = map[index];
222         }
223      }
224      if (inst->DstReg.File == file) {
225         const GLuint index = inst->DstReg.Index;
226         assert(map[index] >= 0);
227         inst->DstReg.Index = map[index];
228      }
229   }
230}
231
232
233/**
234 * Remove dead instructions from the given program.
235 * This is very primitive for now.  Basically look for temp registers
236 * that are written to but never read.  Remove any instructions that
237 * write to such registers.  Be careful with condition code setters.
238 */
239static GLboolean
240_mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx)
241{
242   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
243   GLboolean *removeInst; /* per-instruction removal flag */
244   GLuint i, rem = 0, comp;
245
246   memset(tempRead, 0, sizeof(tempRead));
247
248   if (dbg) {
249      printf("Optimize: Begin dead code removal\n");
250      /*_mesa_print_program(prog);*/
251   }
252
253   removeInst =
254      calloc(prog->arb.NumInstructions, sizeof(GLboolean));
255
256   /* Determine which temps are read and written */
257   for (i = 0; i < prog->arb.NumInstructions; i++) {
258      const struct prog_instruction *inst = prog->arb.Instructions + i;
259      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
260      GLuint j;
261
262      /* check src regs */
263      for (j = 0; j < numSrc; j++) {
264         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
265            const GLuint index = inst->SrcReg[j].Index;
266            GLuint read_mask;
267            assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
268	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
269
270            if (inst->SrcReg[j].RelAddr) {
271               if (dbg)
272                  printf("abort remove dead code (indirect temp)\n");
273               goto done;
274            }
275
276	    for (comp = 0; comp < 4; comp++) {
277	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
278               if (swz <= SWIZZLE_W) {
279                  if ((read_mask & (1 << swz)) == 0)
280                     continue;
281                  tempRead[index][swz] = GL_TRUE;
282               }
283	    }
284         }
285      }
286
287      /* check dst reg */
288      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
289         assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
290
291         if (inst->DstReg.RelAddr) {
292            if (dbg)
293               printf("abort remove dead code (indirect temp)\n");
294            goto done;
295         }
296      }
297   }
298
299   /* find instructions that write to dead registers, flag for removal */
300   for (i = 0; i < prog->arb.NumInstructions; i++) {
301      struct prog_instruction *inst = prog->arb.Instructions + i;
302      const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
303
304      if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
305         GLint chan, index = inst->DstReg.Index;
306
307	 for (chan = 0; chan < 4; chan++) {
308	    if (!tempRead[index][chan] &&
309		inst->DstReg.WriteMask & (1 << chan)) {
310	       if (dbg) {
311		  printf("Remove writemask on %u.%c\n", i,
312			       chan == 3 ? 'w' : 'x' + chan);
313	       }
314	       inst->DstReg.WriteMask &= ~(1 << chan);
315	       rem++;
316	    }
317	 }
318
319	 if (inst->DstReg.WriteMask == 0) {
320	    /* If we cleared all writes, the instruction can be removed. */
321	    if (dbg)
322	       printf("Remove instruction %u: \n", i);
323	    removeInst[i] = GL_TRUE;
324	 }
325      }
326   }
327
328   /* now remove the instructions which aren't needed */
329   rem = remove_instructions(prog, removeInst, mem_ctx);
330
331   if (dbg) {
332      printf("Optimize: End dead code removal.\n");
333      printf("  %u channel writes removed\n", rem);
334      printf("  %u instructions removed\n", rem);
335      /*_mesa_print_program(prog);*/
336   }
337
338done:
339   free(removeInst);
340   return rem != 0;
341}
342
343
344enum inst_use
345{
346   READ,
347   WRITE,
348   FLOW,
349   END
350};
351
352
353/**
354 * Scan forward in program from 'start' for the next occurances of TEMP[index].
355 * We look if an instruction reads the component given by the masks and if they
356 * are overwritten.
357 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
358 * that we can't look further.
359 */
360static enum inst_use
361find_next_use(const struct gl_program *prog,
362              GLuint start,
363              GLuint index,
364              GLuint mask)
365{
366   GLuint i;
367
368   for (i = start; i < prog->arb.NumInstructions; i++) {
369      const struct prog_instruction *inst = prog->arb.Instructions + i;
370      switch (inst->Opcode) {
371      case OPCODE_BGNLOOP:
372      case OPCODE_BGNSUB:
373      case OPCODE_CAL:
374      case OPCODE_CONT:
375      case OPCODE_IF:
376      case OPCODE_ELSE:
377      case OPCODE_ENDIF:
378      case OPCODE_ENDLOOP:
379      case OPCODE_ENDSUB:
380      case OPCODE_RET:
381         return FLOW;
382      case OPCODE_END:
383         return END;
384      default:
385         {
386            const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
387            GLuint j;
388            for (j = 0; j < numSrc; j++) {
389               if (inst->SrcReg[j].RelAddr ||
390                   (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
391                   inst->SrcReg[j].Index == (GLint)index &&
392                   (get_src_arg_mask(inst,j,NO_MASK) & mask)))
393                  return READ;
394            }
395            if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
396                inst->DstReg.File == PROGRAM_TEMPORARY &&
397                inst->DstReg.Index == index) {
398               mask &= ~inst->DstReg.WriteMask;
399               if (mask == 0)
400                  return WRITE;
401            }
402         }
403      }
404   }
405   return END;
406}
407
408
409/**
410 * Is the given instruction opcode a flow-control opcode?
411 * XXX maybe move this into prog_instruction.[ch]
412 */
413static GLboolean
414_mesa_is_flow_control_opcode(enum prog_opcode opcode)
415{
416   switch (opcode) {
417   case OPCODE_BGNLOOP:
418   case OPCODE_BGNSUB:
419   case OPCODE_CAL:
420   case OPCODE_CONT:
421   case OPCODE_IF:
422   case OPCODE_ELSE:
423   case OPCODE_END:
424   case OPCODE_ENDIF:
425   case OPCODE_ENDLOOP:
426   case OPCODE_ENDSUB:
427   case OPCODE_RET:
428      return GL_TRUE;
429   default:
430      return GL_FALSE;
431   }
432}
433
434
435/**
436 * Test if the given instruction is a simple MOV (no conditional updating,
437 * not relative addressing, no negation/abs, etc).
438 */
439static GLboolean
440can_downward_mov_be_modifed(const struct prog_instruction *mov)
441{
442   return
443      mov->Opcode == OPCODE_MOV &&
444      mov->SrcReg[0].RelAddr == 0 &&
445      mov->SrcReg[0].Negate == 0 &&
446      mov->DstReg.RelAddr == 0;
447}
448
449
450static GLboolean
451can_upward_mov_be_modifed(const struct prog_instruction *mov)
452{
453   return
454      can_downward_mov_be_modifed(mov) &&
455      mov->DstReg.File == PROGRAM_TEMPORARY &&
456      !mov->Saturate;
457}
458
459
460/**
461 * Try to remove use of extraneous MOV instructions, to free them up for dead
462 * code removal.
463 */
464static void
465_mesa_remove_extra_move_use(struct gl_program *prog)
466{
467   GLuint i, j;
468
469   if (dbg) {
470      printf("Optimize: Begin remove extra move use\n");
471      _mesa_print_program(prog);
472   }
473
474   /*
475    * Look for sequences such as this:
476    *    MOV tmpX, arg0;
477    *    ...
478    *    FOO tmpY, tmpX, arg1;
479    * and convert into:
480    *    MOV tmpX, arg0;
481    *    ...
482    *    FOO tmpY, arg0, arg1;
483    */
484
485   for (i = 0; i + 1 < prog->arb.NumInstructions; i++) {
486      const struct prog_instruction *mov = prog->arb.Instructions + i;
487      GLuint dst_mask, src_mask;
488      if (can_upward_mov_be_modifed(mov) == GL_FALSE)
489         continue;
490
491      /* Scanning the code, we maintain the components which are still active in
492       * these two masks
493       */
494      dst_mask = mov->DstReg.WriteMask;
495      src_mask = get_src_arg_mask(mov, 0, NO_MASK);
496
497      /* Walk through remaining instructions until the or src reg gets
498       * rewritten or we get into some flow-control, eliminating the use of
499       * this MOV.
500       */
501      for (j = i + 1; j < prog->arb.NumInstructions; j++) {
502         struct prog_instruction *inst2 = prog->arb.Instructions + j;
503         GLuint arg;
504
505	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
506	     break;
507
508	 /* First rewrite this instruction's args if appropriate. */
509	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
510	    GLuint comp, read_mask;
511
512	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
513		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
514		inst2->SrcReg[arg].RelAddr)
515	       continue;
516            read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
517
518	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
519             * components read still come from the mov instructions
520             */
521            if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
522               (read_mask & dst_mask) == read_mask) {
523               for (comp = 0; comp < 4; comp++) {
524                  const GLuint inst2_swz =
525                     GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
526                  const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
527                  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
528                  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
529                  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
530                                                  inst2_swz) & 0x1) << comp);
531               }
532               inst2->SrcReg[arg].File = mov->SrcReg[0].File;
533               inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
534            }
535	 }
536
537	 /* The source of MOV is written. This potentially deactivates some
538          * components from the src and dst of the MOV instruction
539          */
540	 if (inst2->DstReg.File == mov->DstReg.File &&
541	     (inst2->DstReg.RelAddr ||
542	      inst2->DstReg.Index == mov->DstReg.Index)) {
543            dst_mask &= ~inst2->DstReg.WriteMask;
544            src_mask = get_src_arg_mask(mov, 0, dst_mask);
545         }
546
547         /* Idem when the destination of mov is written */
548	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
549	     (inst2->DstReg.RelAddr ||
550	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
551            src_mask &= ~inst2->DstReg.WriteMask;
552            dst_mask &= get_dst_mask_for_mov(mov, src_mask);
553         }
554         if (dst_mask == 0)
555            break;
556      }
557   }
558
559   if (dbg) {
560      printf("Optimize: End remove extra move use.\n");
561      /*_mesa_print_program(prog);*/
562   }
563}
564
565
566/**
567 * Complements dead_code_global. Try to remove code in block of code by
568 * carefully monitoring the swizzles. Both functions should be merged into one
569 * with a proper control flow graph
570 */
571static GLboolean
572_mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx)
573{
574   GLboolean *removeInst;
575   GLuint i, arg, rem = 0;
576
577   removeInst =
578      calloc(prog->arb.NumInstructions, sizeof(GLboolean));
579
580   for (i = 0; i < prog->arb.NumInstructions; i++) {
581      const struct prog_instruction *inst = prog->arb.Instructions + i;
582      const GLuint index = inst->DstReg.Index;
583      const GLuint mask = inst->DstReg.WriteMask;
584      enum inst_use use;
585
586      /* We must deactivate the pass as soon as some indirection is used */
587      if (inst->DstReg.RelAddr)
588         goto done;
589      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
590         if (inst->SrcReg[arg].RelAddr)
591            goto done;
592
593      if (_mesa_is_flow_control_opcode(inst->Opcode) ||
594          _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
595          inst->DstReg.File != PROGRAM_TEMPORARY ||
596          inst->DstReg.RelAddr)
597         continue;
598
599      use = find_next_use(prog, i+1, index, mask);
600      if (use == WRITE || use == END)
601         removeInst[i] = GL_TRUE;
602   }
603
604   rem = remove_instructions(prog, removeInst, mem_ctx);
605
606done:
607   free(removeInst);
608   return rem != 0;
609}
610
611
612/**
613 * Try to inject the destination of mov as the destination of inst and recompute
614 * the swizzles operators for the sources of inst if required. Return GL_TRUE
615 * of the substitution was possible, GL_FALSE otherwise
616 */
617static GLboolean
618_mesa_merge_mov_into_inst(struct prog_instruction *inst,
619                          const struct prog_instruction *mov)
620{
621   /* Indirection table which associates destination and source components for
622    * the mov instruction
623    */
624   const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
625
626   /* Some components are not written by inst. We cannot remove the mov */
627   if (mask != (inst->DstReg.WriteMask & mask))
628      return GL_FALSE;
629
630   inst->Saturate |= mov->Saturate;
631
632   /* Depending on the instruction, we may need to recompute the swizzles.
633    * Also, some other instructions (like TEX) are not linear. We will only
634    * consider completely active sources and destinations
635    */
636   switch (inst->Opcode) {
637
638   /* Carstesian instructions: we compute the swizzle */
639   case OPCODE_MOV:
640   case OPCODE_MIN:
641   case OPCODE_MAX:
642   case OPCODE_ABS:
643   case OPCODE_ADD:
644   case OPCODE_MAD:
645   case OPCODE_MUL:
646   case OPCODE_SUB:
647   {
648      GLuint dst_to_src_comp[4] = {0,0,0,0};
649      GLuint dst_comp, arg;
650      for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
651         if (mov->DstReg.WriteMask & (1 << dst_comp)) {
652            const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
653            assert(src_comp < 4);
654            dst_to_src_comp[dst_comp] = src_comp;
655         }
656      }
657
658      /* Patch each source of the instruction */
659      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
660         const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
661         inst->SrcReg[arg].Swizzle = 0;
662
663         /* Reset each active component of the swizzle */
664         for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
665            GLuint src_comp, arg_comp;
666            if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
667               continue;
668            src_comp = dst_to_src_comp[dst_comp];
669            assert(src_comp < 4);
670            arg_comp = GET_SWZ(arg_swz, src_comp);
671            assert(arg_comp < 4);
672            inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
673         }
674      }
675      inst->DstReg = mov->DstReg;
676      return GL_TRUE;
677   }
678
679   /* Dot products and scalar instructions: we only change the destination */
680   case OPCODE_RCP:
681   case OPCODE_SIN:
682   case OPCODE_COS:
683   case OPCODE_RSQ:
684   case OPCODE_POW:
685   case OPCODE_EX2:
686   case OPCODE_LOG:
687   case OPCODE_DP2:
688   case OPCODE_DP3:
689   case OPCODE_DP4:
690      inst->DstReg = mov->DstReg;
691      return GL_TRUE;
692
693   /* All other instructions require fully active components with no swizzle */
694   default:
695      if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
696          inst->DstReg.WriteMask != WRITEMASK_XYZW)
697         return GL_FALSE;
698      inst->DstReg = mov->DstReg;
699      return GL_TRUE;
700   }
701}
702
703
704/**
705 * Try to remove extraneous MOV instructions from the given program.
706 */
707static GLboolean
708_mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx)
709{
710   GLboolean *removeInst; /* per-instruction removal flag */
711   GLuint i, rem = 0, nesting = 0;
712
713   if (dbg) {
714      printf("Optimize: Begin remove extra moves\n");
715      _mesa_print_program(prog);
716   }
717
718   removeInst =
719      calloc(prog->arb.NumInstructions, sizeof(GLboolean));
720
721   /*
722    * Look for sequences such as this:
723    *    FOO tmpX, arg0, arg1;
724    *    MOV tmpY, tmpX;
725    * and convert into:
726    *    FOO tmpY, arg0, arg1;
727    */
728
729   for (i = 0; i < prog->arb.NumInstructions; i++) {
730      const struct prog_instruction *mov = prog->arb.Instructions + i;
731
732      switch (mov->Opcode) {
733      case OPCODE_BGNLOOP:
734      case OPCODE_BGNSUB:
735      case OPCODE_IF:
736         nesting++;
737         break;
738      case OPCODE_ENDLOOP:
739      case OPCODE_ENDSUB:
740      case OPCODE_ENDIF:
741         nesting--;
742         break;
743      case OPCODE_MOV:
744         if (i > 0 &&
745             can_downward_mov_be_modifed(mov) &&
746             mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
747             nesting == 0)
748         {
749
750            /* see if this MOV can be removed */
751            const GLuint id = mov->SrcReg[0].Index;
752            struct prog_instruction *prevInst;
753            GLuint prevI;
754
755            /* get pointer to previous instruction */
756            prevI = i - 1;
757            while (prevI > 0 && removeInst[prevI])
758               prevI--;
759            prevInst = prog->arb.Instructions + prevI;
760
761            if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
762                prevInst->DstReg.Index == id &&
763                prevInst->DstReg.RelAddr == 0) {
764
765               const GLuint dst_mask = prevInst->DstReg.WriteMask;
766               enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
767
768               if (next_use == WRITE || next_use == END) {
769                  /* OK, we can safely remove this MOV instruction.
770                   * Transform:
771                   *   prevI: FOO tempIndex, x, y;
772                   *       i: MOV z, tempIndex;
773                   * Into:
774                   *   prevI: FOO z, x, y;
775                   */
776                  if (_mesa_merge_mov_into_inst(prevInst, mov)) {
777                     removeInst[i] = GL_TRUE;
778                     if (dbg) {
779                        printf("Remove MOV at %u\n", i);
780                        printf("new prev inst %u: ", prevI);
781                        _mesa_print_instruction(prevInst);
782                     }
783                  }
784               }
785            }
786         }
787         break;
788      default:
789         ; /* nothing */
790      }
791   }
792
793   /* now remove the instructions which aren't needed */
794   rem = remove_instructions(prog, removeInst, mem_ctx);
795
796   free(removeInst);
797
798   if (dbg) {
799      printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
800      /*_mesa_print_program(prog);*/
801   }
802
803   return rem != 0;
804}
805
806
807/** A live register interval */
808struct interval
809{
810   GLuint Reg;         /** The temporary register index */
811   GLuint Start, End;  /** Start/end instruction numbers */
812};
813
814
815/** A list of register intervals */
816struct interval_list
817{
818   GLuint Num;
819   struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
820};
821
822
823static void
824append_interval(struct interval_list *list, const struct interval *inv)
825{
826   list->Intervals[list->Num++] = *inv;
827}
828
829
830/** Insert interval inv into list, sorted by interval end */
831static void
832insert_interval_by_end(struct interval_list *list, const struct interval *inv)
833{
834   /* XXX we could do a binary search insertion here since list is sorted */
835   GLint i = list->Num - 1;
836   while (i >= 0 && list->Intervals[i].End > inv->End) {
837      list->Intervals[i + 1] = list->Intervals[i];
838      i--;
839   }
840   list->Intervals[i + 1] = *inv;
841   list->Num++;
842
843#ifdef DEBUG
844   {
845      GLuint i;
846      for (i = 0; i + 1 < list->Num; i++) {
847         assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
848      }
849   }
850#endif
851}
852
853
854/** Remove the given interval from the interval list */
855static void
856remove_interval(struct interval_list *list, const struct interval *inv)
857{
858   /* XXX we could binary search since list is sorted */
859   GLuint k;
860   for (k = 0; k < list->Num; k++) {
861      if (list->Intervals[k].Reg == inv->Reg) {
862         /* found, remove it */
863         assert(list->Intervals[k].Start == inv->Start);
864         assert(list->Intervals[k].End == inv->End);
865         while (k < list->Num - 1) {
866            list->Intervals[k] = list->Intervals[k + 1];
867            k++;
868         }
869         list->Num--;
870         return;
871      }
872   }
873}
874
875
876/** called by qsort() */
877static int
878compare_start(const void *a, const void *b)
879{
880   const struct interval *ia = (const struct interval *) a;
881   const struct interval *ib = (const struct interval *) b;
882   if (ia->Start < ib->Start)
883      return -1;
884   else if (ia->Start > ib->Start)
885      return +1;
886   else
887      return 0;
888}
889
890
891/** sort the interval list according to interval starts */
892static void
893sort_interval_list_by_start(struct interval_list *list)
894{
895   qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
896#ifdef DEBUG
897   {
898      GLuint i;
899      for (i = 0; i + 1 < list->Num; i++) {
900         assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
901      }
902   }
903#endif
904}
905
906struct loop_info
907{
908   GLuint Start, End;  /**< Start, end instructions of loop */
909};
910
911/**
912 * Update the intermediate interval info for register 'index' and
913 * instruction 'ic'.
914 */
915static void
916update_interval(GLint intBegin[], GLint intEnd[],
917		struct loop_info *loopStack, GLuint loopStackDepth,
918		GLuint index, GLuint ic)
919{
920   unsigned i;
921   GLuint begin = ic;
922   GLuint end = ic;
923
924   /* If the register is used in a loop, extend its lifetime through the end
925    * of the outermost loop that doesn't contain its definition.
926    */
927   for (i = 0; i < loopStackDepth; i++) {
928      if (intBegin[index] < loopStack[i].Start) {
929	 end = loopStack[i].End;
930	 break;
931      }
932   }
933
934   /* Variables that are live at the end of a loop will also be live at the
935    * beginning, so an instruction inside of a loop should have its live
936    * interval begin at the start of the outermost loop.
937    */
938   if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
939      begin = loopStack[0].Start;
940   }
941
942   assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
943   if (intBegin[index] == -1) {
944      assert(intEnd[index] == -1);
945      intBegin[index] = begin;
946      intEnd[index] = end;
947   }
948   else {
949      intEnd[index] = end;
950   }
951}
952
953
954/**
955 * Find first/last instruction that references each temporary register.
956 */
957static bool
958find_temp_intervals(const struct prog_instruction *instructions,
959                    GLuint numInstructions,
960                    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
961                    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
962{
963   struct loop_info loopStack[MAX_LOOP_NESTING];
964   GLuint loopStackDepth = 0;
965   GLuint i;
966
967   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
968      intBegin[i] = intEnd[i] = -1;
969   }
970
971   /* Scan instructions looking for temporary registers */
972   for (i = 0; i < numInstructions; i++) {
973      const struct prog_instruction *inst = instructions + i;
974      if (inst->Opcode == OPCODE_BGNLOOP) {
975         loopStack[loopStackDepth].Start = i;
976         loopStack[loopStackDepth].End = inst->BranchTarget;
977         loopStackDepth++;
978      }
979      else if (inst->Opcode == OPCODE_ENDLOOP) {
980         loopStackDepth--;
981      }
982      else if (inst->Opcode == OPCODE_CAL) {
983         return GL_FALSE;
984      }
985      else {
986         const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
987         GLuint j;
988         for (j = 0; j < numSrc; j++) {
989            if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
990               const GLuint index = inst->SrcReg[j].Index;
991               if (inst->SrcReg[j].RelAddr)
992                  return GL_FALSE;
993               update_interval(intBegin, intEnd, loopStack, loopStackDepth,
994			       index, i);
995            }
996         }
997         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
998            const GLuint index = inst->DstReg.Index;
999            if (inst->DstReg.RelAddr)
1000               return GL_FALSE;
1001            update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1002			    index, i);
1003         }
1004      }
1005   }
1006
1007   return GL_TRUE;
1008}
1009
1010
1011/**
1012 * Find the live intervals for each temporary register in the program.
1013 * For register R, the interval [A,B] indicates that R is referenced
1014 * from instruction A through instruction B.
1015 * Special consideration is needed for loops and subroutines.
1016 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
1017 */
1018static GLboolean
1019find_live_intervals(struct gl_program *prog,
1020                    struct interval_list *liveIntervals)
1021{
1022   GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1023   GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1024   GLuint i;
1025
1026   /*
1027    * Note: we'll return GL_FALSE below if we find relative indexing
1028    * into the TEMP register file.  We can't handle that yet.
1029    * We also give up on subroutines for now.
1030    */
1031
1032   if (dbg) {
1033      printf("Optimize: Begin find intervals\n");
1034   }
1035
1036   /* build intermediate arrays */
1037   if (!find_temp_intervals(prog->arb.Instructions, prog->arb.NumInstructions,
1038                            intBegin, intEnd))
1039      return GL_FALSE;
1040
1041   /* Build live intervals list from intermediate arrays */
1042   liveIntervals->Num = 0;
1043   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1044      if (intBegin[i] >= 0) {
1045         struct interval inv;
1046         inv.Reg = i;
1047         inv.Start = intBegin[i];
1048         inv.End = intEnd[i];
1049         append_interval(liveIntervals, &inv);
1050      }
1051   }
1052
1053   /* Sort the list according to interval starts */
1054   sort_interval_list_by_start(liveIntervals);
1055
1056   if (dbg) {
1057      /* print interval info */
1058      for (i = 0; i < liveIntervals->Num; i++) {
1059         const struct interval *inv = liveIntervals->Intervals + i;
1060         printf("Reg[%d] live [%d, %d]:",
1061                      inv->Reg, inv->Start, inv->End);
1062         if (1) {
1063            GLuint j;
1064            for (j = 0; j < inv->Start; j++)
1065               printf(" ");
1066            for (j = inv->Start; j <= inv->End; j++)
1067               printf("x");
1068         }
1069         printf("\n");
1070      }
1071   }
1072
1073   return GL_TRUE;
1074}
1075
1076
1077/** Scan the array of used register flags to find free entry */
1078static GLint
1079alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
1080{
1081   GLuint k;
1082   for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
1083      if (!usedRegs[k]) {
1084         usedRegs[k] = GL_TRUE;
1085         return k;
1086      }
1087   }
1088   return -1;
1089}
1090
1091
1092/**
1093 * This function implements "Linear Scan Register Allocation" to reduce
1094 * the number of temporary registers used by the program.
1095 *
1096 * We compute the "live interval" for all temporary registers then
1097 * examine the overlap of the intervals to allocate new registers.
1098 * Basically, if two intervals do not overlap, they can use the same register.
1099 */
1100static void
1101_mesa_reallocate_registers(struct gl_program *prog)
1102{
1103   struct interval_list liveIntervals;
1104   GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1105   GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1106   GLuint i;
1107   GLint maxTemp = -1;
1108
1109   if (dbg) {
1110      printf("Optimize: Begin live-interval register reallocation\n");
1111      _mesa_print_program(prog);
1112   }
1113
1114   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
1115      registerMap[i] = -1;
1116      usedRegs[i] = GL_FALSE;
1117   }
1118
1119   if (!find_live_intervals(prog, &liveIntervals)) {
1120      if (dbg)
1121         printf("Aborting register reallocation\n");
1122      return;
1123   }
1124
1125   {
1126      struct interval_list activeIntervals;
1127      activeIntervals.Num = 0;
1128
1129      /* loop over live intervals, allocating a new register for each */
1130      for (i = 0; i < liveIntervals.Num; i++) {
1131         const struct interval *live = liveIntervals.Intervals + i;
1132
1133         if (dbg)
1134            printf("Consider register %u\n", live->Reg);
1135
1136         /* Expire old intervals.  Intervals which have ended with respect
1137          * to the live interval can have their remapped registers freed.
1138          */
1139         {
1140            GLint j;
1141            for (j = 0; j < (GLint) activeIntervals.Num; j++) {
1142               const struct interval *inv = activeIntervals.Intervals + j;
1143               if (inv->End >= live->Start) {
1144                  /* Stop now.  Since the activeInterval list is sorted
1145                   * we know we don't have to go further.
1146                   */
1147                  break;
1148               }
1149               else {
1150                  /* Interval 'inv' has expired */
1151                  const GLint regNew = registerMap[inv->Reg];
1152                  assert(regNew >= 0);
1153
1154                  if (dbg)
1155                     printf("  expire interval for reg %u\n", inv->Reg);
1156
1157                  /* remove interval j from active list */
1158                  remove_interval(&activeIntervals, inv);
1159                  j--;  /* counter-act j++ in for-loop above */
1160
1161                  /* return register regNew to the free pool */
1162                  if (dbg)
1163                     printf("  free reg %d\n", regNew);
1164                  assert(usedRegs[regNew] == GL_TRUE);
1165                  usedRegs[regNew] = GL_FALSE;
1166               }
1167            }
1168         }
1169
1170         /* find a free register for this live interval */
1171         {
1172            const GLint k = alloc_register(usedRegs);
1173            if (k < 0) {
1174               /* out of registers, give up */
1175               return;
1176            }
1177            registerMap[live->Reg] = k;
1178            maxTemp = MAX2(maxTemp, k);
1179            if (dbg)
1180               printf("  remap register %u -> %d\n", live->Reg, k);
1181         }
1182
1183         /* Insert this live interval into the active list which is sorted
1184          * by increasing end points.
1185          */
1186         insert_interval_by_end(&activeIntervals, live);
1187      }
1188   }
1189
1190   if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1191      /* OK, we've reduced the number of registers needed.
1192       * Scan the program and replace all the old temporary register
1193       * indexes with the new indexes.
1194       */
1195      replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1196
1197      prog->arb.NumTemporaries = maxTemp + 1;
1198   }
1199
1200   if (dbg) {
1201      printf("Optimize: End live-interval register reallocation\n");
1202      printf("Num temp regs before: %u  after: %u\n",
1203                   liveIntervals.Num, maxTemp + 1);
1204      _mesa_print_program(prog);
1205   }
1206}
1207
1208
1209#if 0
1210static void
1211print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
1212   fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions);
1213   _mesa_print_program(program);
1214   _mesa_print_program_parameters(ctx, program);
1215   fprintf(stderr, "\n\n");
1216}
1217#endif
1218
1219/**
1220 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
1221 * instruction is the first instruction to write to register T0.  The are
1222 * several lowering passes done in GLSL IR (e.g. branches and
1223 * relative addressing) that create a large number of conditional assignments
1224 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
1225 *
1226 * Here is why this conversion is safe:
1227 * CMP T0, T1 T2 T0 can be expanded to:
1228 * if (T1 < 0.0)
1229 * 	MOV T0, T2;
1230 * else
1231 * 	MOV T0, T0;
1232 *
1233 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
1234 * as the original program.  If (T1 < 0.0) evaluates to false, executing
1235 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
1236 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
1237 * because any instruction that was going to read from T0 after this was going
1238 * to read a garbage value anyway.
1239 */
1240static void
1241_mesa_simplify_cmp(struct gl_program * program)
1242{
1243   GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1244   GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
1245   GLuint i;
1246
1247   if (dbg) {
1248      printf("Optimize: Begin reads without writes\n");
1249      _mesa_print_program(program);
1250   }
1251
1252   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1253      tempWrites[i] = 0;
1254   }
1255
1256   for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
1257      outputWrites[i] = 0;
1258   }
1259
1260   for (i = 0; i < program->arb.NumInstructions; i++) {
1261      struct prog_instruction *inst = program->arb.Instructions + i;
1262      GLuint prevWriteMask;
1263
1264      /* Give up if we encounter relative addressing or flow control. */
1265      if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
1266         return;
1267      }
1268
1269      if (inst->DstReg.File == PROGRAM_OUTPUT) {
1270         assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
1271         prevWriteMask = outputWrites[inst->DstReg.Index];
1272         outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
1273      } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1274         assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
1275         prevWriteMask = tempWrites[inst->DstReg.Index];
1276         tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
1277      } else {
1278         /* No other register type can be a destination register. */
1279         continue;
1280      }
1281
1282      /* For a CMP to be considered a conditional write, the destination
1283       * register and source register two must be the same. */
1284      if (inst->Opcode == OPCODE_CMP
1285          && !(inst->DstReg.WriteMask & prevWriteMask)
1286          && inst->SrcReg[2].File == inst->DstReg.File
1287          && inst->SrcReg[2].Index == inst->DstReg.Index
1288          && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
1289
1290         inst->Opcode = OPCODE_MOV;
1291         inst->SrcReg[0] = inst->SrcReg[1];
1292
1293	 /* Unused operands are expected to have the file set to
1294	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
1295	  * all of the sources.
1296	  */
1297	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
1298	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
1299	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
1300	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
1301      }
1302   }
1303   if (dbg) {
1304      printf("Optimize: End reads without writes\n");
1305      _mesa_print_program(program);
1306   }
1307}
1308
1309/**
1310 * Apply optimizations to the given program to eliminate unnecessary
1311 * instructions, temp regs, etc.
1312 */
1313void
1314_mesa_optimize_program(struct gl_program *program, void *mem_ctx)
1315{
1316   GLboolean any_change;
1317
1318   _mesa_simplify_cmp(program);
1319   /* Stop when no modifications were output */
1320   do {
1321      any_change = GL_FALSE;
1322      _mesa_remove_extra_move_use(program);
1323      if (_mesa_remove_dead_code_global(program, mem_ctx))
1324         any_change = GL_TRUE;
1325      if (_mesa_remove_extra_moves(program, mem_ctx))
1326         any_change = GL_TRUE;
1327      if (_mesa_remove_dead_code_local(program, mem_ctx))
1328         any_change = GL_TRUE;
1329
1330      any_change = _mesa_constant_fold(program) || any_change;
1331      _mesa_reallocate_registers(program);
1332   } while (any_change);
1333}
1334
1335