1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "codegen/nv50_ir.h"
24#include "codegen/nv50_ir_target.h"
25#include "codegen/nv50_ir_driver.h"
26
27extern "C" {
28#include "nouveau_debug.h"
29#include "nv50/nv50_program.h"
30}
31
32namespace nv50_ir {
33
34Modifier::Modifier(operation op)
35{
36   switch (op) {
37   case OP_NEG: bits = NV50_IR_MOD_NEG; break;
38   case OP_ABS: bits = NV50_IR_MOD_ABS; break;
39   case OP_SAT: bits = NV50_IR_MOD_SAT; break;
40   case OP_NOT: bits = NV50_IR_MOD_NOT; break;
41   default:
42      bits = 0;
43      break;
44   }
45}
46
47Modifier Modifier::operator*(const Modifier m) const
48{
49   unsigned int a, b, c;
50
51   b = m.bits;
52   if (this->bits & NV50_IR_MOD_ABS)
53      b &= ~NV50_IR_MOD_NEG;
54
55   a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
56   c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
57
58   return Modifier(a | c);
59}
60
61ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
62{
63   indirect[0] = -1;
64   indirect[1] = -1;
65   usedAsPtr = false;
66   set(v);
67}
68
69ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
70{
71   set(ref);
72   usedAsPtr = ref.usedAsPtr;
73}
74
75ValueRef::~ValueRef()
76{
77   this->set(NULL);
78}
79
80bool ValueRef::getImmediate(ImmediateValue &imm) const
81{
82   const ValueRef *src = this;
83   Modifier m;
84   DataType type = src->insn->sType;
85
86   while (src) {
87      if (src->mod) {
88         if (src->insn->sType != type)
89            break;
90         m *= src->mod;
91      }
92      if (src->getFile() == FILE_IMMEDIATE) {
93         imm = *(src->value->asImm());
94         // The immediate's type isn't required to match its use, it's
95         // more of a hint; applying a modifier makes use of that hint.
96         imm.reg.type = type;
97         m.applyTo(imm);
98         return true;
99      }
100
101      Instruction *insn = src->value->getUniqueInsn();
102
103      if (insn && insn->op == OP_MOV) {
104         src = &insn->src(0);
105         if (src->mod)
106            WARN("OP_MOV with modifier encountered !\n");
107      } else {
108         src = NULL;
109      }
110   }
111   return false;
112}
113
114ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL)
115{
116   set(v);
117}
118
119ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL)
120{
121   set(def.get());
122}
123
124ValueDef::~ValueDef()
125{
126   this->set(NULL);
127}
128
129void
130ValueRef::set(const ValueRef &ref)
131{
132   this->set(ref.get());
133   mod = ref.mod;
134   indirect[0] = ref.indirect[0];
135   indirect[1] = ref.indirect[1];
136}
137
138void
139ValueRef::set(Value *refVal)
140{
141   if (value == refVal)
142      return;
143   if (value)
144      value->uses.erase(this);
145   if (refVal)
146      refVal->uses.insert(this);
147
148   value = refVal;
149}
150
151void
152ValueDef::set(Value *defVal)
153{
154   if (value == defVal)
155      return;
156   if (value)
157      value->defs.remove(this);
158   if (defVal)
159      defVal->defs.push_back(this);
160
161   value = defVal;
162}
163
164// Check if we can replace this definition's value by the value in @rep,
165// including the source modifiers, i.e. make sure that all uses support
166// @rep.mod.
167bool
168ValueDef::mayReplace(const ValueRef &rep)
169{
170   if (!rep.mod)
171      return true;
172
173   if (!insn || !insn->bb) // Unbound instruction ?
174      return false;
175
176   const Target *target = insn->bb->getProgram()->getTarget();
177
178   for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
179        ++it) {
180      Instruction *insn = (*it)->getInsn();
181      int s = -1;
182
183      for (int i = 0; insn->srcExists(i); ++i) {
184         if (insn->src(i).get() == value) {
185            // If there are multiple references to us we'd have to check if the
186            // combination of mods is still supported, but just bail for now.
187            if (&insn->src(i) != (*it))
188               return false;
189            s = i;
190         }
191      }
192      assert(s >= 0); // integrity of uses list
193
194      if (!target->isModSupported(insn, s, rep.mod))
195         return false;
196   }
197   return true;
198}
199
200void
201ValueDef::replace(const ValueRef &repVal, bool doSet)
202{
203   assert(mayReplace(repVal));
204
205   if (value == repVal.get())
206      return;
207
208   while (!value->uses.empty()) {
209      ValueRef *ref = *value->uses.begin();
210      ref->set(repVal.get());
211      ref->mod *= repVal.mod;
212   }
213
214   if (doSet)
215      set(repVal.get());
216}
217
218Value::Value()
219{
220  join = this;
221  memset(&reg, 0, sizeof(reg));
222  reg.size = 4;
223}
224
225LValue::LValue(Function *fn, DataFile file)
226{
227   reg.file = file;
228   reg.size = (file != FILE_PREDICATE) ? 4 : 1;
229   reg.data.id = -1;
230
231   compMask = 0;
232   compound = 0;
233   ssa = 0;
234   fixedReg = 0;
235   noSpill = 0;
236
237   fn->add(this, this->id);
238}
239
240LValue::LValue(Function *fn, LValue *lval)
241{
242   assert(lval);
243
244   reg.file = lval->reg.file;
245   reg.size = lval->reg.size;
246   reg.data.id = -1;
247
248   compMask = 0;
249   compound = 0;
250   ssa = 0;
251   fixedReg = 0;
252   noSpill = 0;
253
254   fn->add(this, this->id);
255}
256
257LValue *
258LValue::clone(ClonePolicy<Function>& pol) const
259{
260   LValue *that = new_LValue(pol.context(), reg.file);
261
262   pol.set<Value>(this, that);
263
264   that->reg.size = this->reg.size;
265   that->reg.type = this->reg.type;
266   that->reg.data = this->reg.data;
267
268   return that;
269}
270
271bool
272LValue::isUniform() const
273{
274   if (defs.size() > 1)
275      return false;
276   Instruction *insn = getInsn();
277   // let's not try too hard here for now ...
278   return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
279}
280
281Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
282{
283   baseSym = NULL;
284
285   reg.file = f;
286   reg.fileIndex = fidx;
287   reg.data.offset = 0;
288
289   prog->add(this, this->id);
290}
291
292Symbol *
293Symbol::clone(ClonePolicy<Function>& pol) const
294{
295   Program *prog = pol.context()->getProgram();
296
297   Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
298
299   pol.set<Value>(this, that);
300
301   that->reg.size = this->reg.size;
302   that->reg.type = this->reg.type;
303   that->reg.data = this->reg.data;
304
305   that->baseSym = this->baseSym;
306
307   return that;
308}
309
310bool
311Symbol::isUniform() const
312{
313   return
314      reg.file != FILE_SYSTEM_VALUE &&
315      reg.file != FILE_MEMORY_LOCAL &&
316      reg.file != FILE_SHADER_INPUT;
317}
318
319ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
320{
321   memset(&reg, 0, sizeof(reg));
322
323   reg.file = FILE_IMMEDIATE;
324   reg.size = 4;
325   reg.type = TYPE_U32;
326
327   reg.data.u32 = uval;
328
329   prog->add(this, this->id);
330}
331
332ImmediateValue::ImmediateValue(Program *prog, float fval)
333{
334   memset(&reg, 0, sizeof(reg));
335
336   reg.file = FILE_IMMEDIATE;
337   reg.size = 4;
338   reg.type = TYPE_F32;
339
340   reg.data.f32 = fval;
341
342   prog->add(this, this->id);
343}
344
345ImmediateValue::ImmediateValue(Program *prog, double dval)
346{
347   memset(&reg, 0, sizeof(reg));
348
349   reg.file = FILE_IMMEDIATE;
350   reg.size = 8;
351   reg.type = TYPE_F64;
352
353   reg.data.f64 = dval;
354
355   prog->add(this, this->id);
356}
357
358ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
359{
360   reg = proto->reg;
361
362   reg.type = ty;
363   reg.size = typeSizeof(ty);
364}
365
366ImmediateValue *
367ImmediateValue::clone(ClonePolicy<Function>& pol) const
368{
369   Program *prog = pol.context()->getProgram();
370   ImmediateValue *that = new_ImmediateValue(prog, 0u);
371
372   pol.set<Value>(this, that);
373
374   that->reg.size = this->reg.size;
375   that->reg.type = this->reg.type;
376   that->reg.data = this->reg.data;
377
378   return that;
379}
380
381bool
382ImmediateValue::isInteger(const int i) const
383{
384   switch (reg.type) {
385   case TYPE_S8:
386      return reg.data.s8 == i;
387   case TYPE_U8:
388      return reg.data.u8 == i;
389   case TYPE_S16:
390      return reg.data.s16 == i;
391   case TYPE_U16:
392      return reg.data.u16 == i;
393   case TYPE_S32:
394   case TYPE_U32:
395      return reg.data.s32 == i; // as if ...
396   case TYPE_S64:
397   case TYPE_U64:
398      return reg.data.s64 == i; // as if ...
399   case TYPE_F32:
400      return reg.data.f32 == static_cast<float>(i);
401   case TYPE_F64:
402      return reg.data.f64 == static_cast<double>(i);
403   default:
404      return false;
405   }
406}
407
408bool
409ImmediateValue::isNegative() const
410{
411   switch (reg.type) {
412   case TYPE_S8:  return reg.data.s8 < 0;
413   case TYPE_S16: return reg.data.s16 < 0;
414   case TYPE_S32:
415   case TYPE_U32: return reg.data.s32 < 0;
416   case TYPE_F32: return reg.data.u32 & (1 << 31);
417   case TYPE_F64: return reg.data.u64 & (1ULL << 63);
418   default:
419      return false;
420   }
421}
422
423bool
424ImmediateValue::isPow2() const
425{
426   if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
427      return util_is_power_of_two_or_zero64(reg.data.u64);
428   else
429      return util_is_power_of_two_or_zero(reg.data.u32);
430}
431
432void
433ImmediateValue::applyLog2()
434{
435   switch (reg.type) {
436   case TYPE_S8:
437   case TYPE_S16:
438   case TYPE_S32:
439      assert(!this->isNegative());
440      // fall through
441   case TYPE_U8:
442   case TYPE_U16:
443   case TYPE_U32:
444      reg.data.u32 = util_logbase2(reg.data.u32);
445      break;
446   case TYPE_S64:
447      assert(!this->isNegative());
448      // fall through
449   case TYPE_U64:
450      reg.data.u64 = util_logbase2_64(reg.data.u64);
451      break;
452   case TYPE_F32:
453      reg.data.f32 = log2f(reg.data.f32);
454      break;
455   case TYPE_F64:
456      reg.data.f64 = log2(reg.data.f64);
457      break;
458   default:
459      assert(0);
460      break;
461   }
462}
463
464bool
465ImmediateValue::compare(CondCode cc, float fval) const
466{
467   if (reg.type != TYPE_F32)
468      ERROR("immediate value is not of type f32");
469
470   switch (static_cast<CondCode>(cc & 7)) {
471   case CC_TR: return true;
472   case CC_FL: return false;
473   case CC_LT: return reg.data.f32 <  fval;
474   case CC_LE: return reg.data.f32 <= fval;
475   case CC_GT: return reg.data.f32 >  fval;
476   case CC_GE: return reg.data.f32 >= fval;
477   case CC_EQ: return reg.data.f32 == fval;
478   case CC_NE: return reg.data.f32 != fval;
479   default:
480      assert(0);
481      return false;
482   }
483}
484
485ImmediateValue&
486ImmediateValue::operator=(const ImmediateValue &that)
487{
488   this->reg = that.reg;
489   return (*this);
490}
491
492bool
493Value::interfers(const Value *that) const
494{
495   uint32_t idA, idB;
496
497   if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
498      return false;
499   if (this->asImm())
500      return false;
501
502   if (this->asSym()) {
503      idA = this->join->reg.data.offset;
504      idB = that->join->reg.data.offset;
505   } else {
506      idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
507      idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
508   }
509
510   if (idA < idB)
511      return (idA + this->reg.size > idB);
512   else
513   if (idA > idB)
514      return (idB + that->reg.size > idA);
515   else
516      return (idA == idB);
517}
518
519bool
520Value::equals(const Value *that, bool strict) const
521{
522   if (strict)
523      return this == that;
524
525   if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
526      return false;
527   if (that->reg.size != this->reg.size)
528      return false;
529
530   if (that->reg.data.id != this->reg.data.id)
531      return false;
532
533   return true;
534}
535
536bool
537ImmediateValue::equals(const Value *that, bool strict) const
538{
539   const ImmediateValue *imm = that->asImm();
540   if (!imm)
541      return false;
542   return reg.data.u64 == imm->reg.data.u64;
543}
544
545bool
546Symbol::equals(const Value *that, bool strict) const
547{
548   if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
549      return false;
550   assert(that->asSym());
551
552   if (this->baseSym != that->asSym()->baseSym)
553      return false;
554
555   if (reg.file == FILE_SYSTEM_VALUE)
556      return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
557              this->reg.data.sv.index == that->reg.data.sv.index);
558   return this->reg.data.offset == that->reg.data.offset;
559}
560
561void Instruction::init()
562{
563   next = prev = 0;
564
565   cc = CC_ALWAYS;
566   rnd = ROUND_N;
567   cache = CACHE_CA;
568   subOp = 0;
569
570   saturate = 0;
571   join = 0;
572   exit = 0;
573   terminator = 0;
574   ftz = 0;
575   dnz = 0;
576   perPatch = 0;
577   fixed = 0;
578   encSize = 0;
579   ipa = 0;
580   mask = 0;
581   precise = 0;
582
583   lanes = 0xf;
584
585   postFactor = 0;
586
587   predSrc = -1;
588   flagsDef = -1;
589   flagsSrc = -1;
590}
591
592Instruction::Instruction()
593{
594   init();
595
596   op = OP_NOP;
597   dType = sType = TYPE_F32;
598
599   id = -1;
600   bb = 0;
601}
602
603Instruction::Instruction(Function *fn, operation opr, DataType ty)
604{
605   init();
606
607   op = opr;
608   dType = sType = ty;
609
610   fn->add(this, id);
611}
612
613Instruction::~Instruction()
614{
615   if (bb) {
616      Function *fn = bb->getFunction();
617      bb->remove(this);
618      fn->allInsns.remove(id);
619   }
620
621   for (int s = 0; srcExists(s); ++s)
622      setSrc(s, NULL);
623   // must unlink defs too since the list pointers will get deallocated
624   for (int d = 0; defExists(d); ++d)
625      setDef(d, NULL);
626}
627
628void
629Instruction::setDef(int i, Value *val)
630{
631   int size = defs.size();
632   if (i >= size) {
633      defs.resize(i + 1);
634      while (size <= i)
635         defs[size++].setInsn(this);
636   }
637   defs[i].set(val);
638}
639
640void
641Instruction::setSrc(int s, Value *val)
642{
643   int size = srcs.size();
644   if (s >= size) {
645      srcs.resize(s + 1);
646      while (size <= s)
647         srcs[size++].setInsn(this);
648   }
649   srcs[s].set(val);
650}
651
652void
653Instruction::setSrc(int s, const ValueRef& ref)
654{
655   setSrc(s, ref.get());
656   srcs[s].mod = ref.mod;
657}
658
659void
660Instruction::swapSources(int a, int b)
661{
662   Value *value = srcs[a].get();
663   Modifier m = srcs[a].mod;
664
665   setSrc(a, srcs[b]);
666
667   srcs[b].set(value);
668   srcs[b].mod = m;
669}
670
671static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
672{
673   if (index >= s)
674      index += delta;
675   else
676   if ((delta < 0) && (index >= (s + delta)))
677      index = -1;
678}
679
680// Moves sources [@s,last_source] by @delta.
681// If @delta < 0, sources [@s - abs(@delta), @s) are erased.
682void
683Instruction::moveSources(const int s, const int delta)
684{
685   if (delta == 0)
686      return;
687   assert(s + delta >= 0);
688
689   int k;
690
691   for (k = 0; srcExists(k); ++k) {
692      for (int i = 0; i < 2; ++i)
693         moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
694   }
695   moveSourcesAdjustIndex(predSrc, s, delta);
696   moveSourcesAdjustIndex(flagsSrc, s, delta);
697   if (asTex()) {
698      TexInstruction *tex = asTex();
699      moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
700      moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
701   }
702
703   if (delta > 0) {
704      --k;
705      for (int p = k + delta; k >= s; --k, --p)
706         setSrc(p, src(k));
707   } else {
708      int p;
709      for (p = s; p < k; ++p)
710         setSrc(p + delta, src(p));
711      for (; (p + delta) < k; ++p)
712         setSrc(p + delta, NULL);
713   }
714}
715
716void
717Instruction::takeExtraSources(int s, Value *values[3])
718{
719   values[0] = getIndirect(s, 0);
720   if (values[0])
721      setIndirect(s, 0, NULL);
722
723   values[1] = getIndirect(s, 1);
724   if (values[1])
725      setIndirect(s, 1, NULL);
726
727   values[2] = getPredicate();
728   if (values[2])
729      setPredicate(cc, NULL);
730}
731
732void
733Instruction::putExtraSources(int s, Value *values[3])
734{
735   if (values[0])
736      setIndirect(s, 0, values[0]);
737   if (values[1])
738      setIndirect(s, 1, values[1]);
739   if (values[2])
740      setPredicate(cc, values[2]);
741}
742
743Instruction *
744Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
745{
746   if (!i)
747      i = new_Instruction(pol.context(), op, dType);
748#ifndef NDEBUG // non-conformant assert, so this is required
749   assert(typeid(*i) == typeid(*this));
750#endif
751
752   pol.set<Instruction>(this, i);
753
754   i->sType = sType;
755
756   i->rnd = rnd;
757   i->cache = cache;
758   i->subOp = subOp;
759
760   i->saturate = saturate;
761   i->join = join;
762   i->exit = exit;
763   i->mask = mask;
764   i->ftz = ftz;
765   i->dnz = dnz;
766   i->ipa = ipa;
767   i->lanes = lanes;
768   i->perPatch = perPatch;
769
770   i->postFactor = postFactor;
771
772   for (int d = 0; defExists(d); ++d)
773      i->setDef(d, pol.get(getDef(d)));
774
775   for (int s = 0; srcExists(s); ++s) {
776      i->setSrc(s, pol.get(getSrc(s)));
777      i->src(s).mod = src(s).mod;
778   }
779
780   i->cc = cc;
781   i->predSrc = predSrc;
782   i->flagsDef = flagsDef;
783   i->flagsSrc = flagsSrc;
784
785   return i;
786}
787
788unsigned int
789Instruction::defCount(unsigned int mask, bool singleFile) const
790{
791   unsigned int i, n;
792
793   if (singleFile) {
794      unsigned int d = ffs(mask);
795      if (!d)
796         return 0;
797      for (i = d--; defExists(i); ++i)
798         if (getDef(i)->reg.file != getDef(d)->reg.file)
799            mask &= ~(1 << i);
800   }
801
802   for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
803      n += mask & 1;
804   return n;
805}
806
807unsigned int
808Instruction::srcCount(unsigned int mask, bool singleFile) const
809{
810   unsigned int i, n;
811
812   if (singleFile) {
813      unsigned int s = ffs(mask);
814      if (!s)
815         return 0;
816      for (i = s--; srcExists(i); ++i)
817         if (getSrc(i)->reg.file != getSrc(s)->reg.file)
818            mask &= ~(1 << i);
819   }
820
821   for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
822      n += mask & 1;
823   return n;
824}
825
826bool
827Instruction::setIndirect(int s, int dim, Value *value)
828{
829   assert(this->srcExists(s));
830
831   int p = srcs[s].indirect[dim];
832   if (p < 0) {
833      if (!value)
834         return true;
835      p = srcs.size();
836      while (p > 0 && !srcExists(p - 1))
837         --p;
838   }
839   setSrc(p, value);
840   srcs[p].usedAsPtr = (value != 0);
841   srcs[s].indirect[dim] = value ? p : -1;
842   return true;
843}
844
845bool
846Instruction::setPredicate(CondCode ccode, Value *value)
847{
848   cc = ccode;
849
850   if (!value) {
851      if (predSrc >= 0) {
852         srcs[predSrc].set(NULL);
853         predSrc = -1;
854      }
855      return true;
856   }
857
858   if (predSrc < 0) {
859      predSrc = srcs.size();
860      while (predSrc > 0 && !srcExists(predSrc - 1))
861         --predSrc;
862   }
863
864   setSrc(predSrc, value);
865   return true;
866}
867
868bool
869Instruction::writesPredicate() const
870{
871   for (int d = 0; defExists(d); ++d)
872      if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
873         return true;
874   return false;
875}
876
877bool
878Instruction::canCommuteDefSrc(const Instruction *i) const
879{
880   for (int d = 0; defExists(d); ++d)
881      for (int s = 0; i->srcExists(s); ++s)
882         if (getDef(d)->interfers(i->getSrc(s)))
883            return false;
884   return true;
885}
886
887bool
888Instruction::canCommuteDefDef(const Instruction *i) const
889{
890   for (int d = 0; defExists(d); ++d)
891      for (int c = 0; i->defExists(c); ++c)
892         if (getDef(d)->interfers(i->getDef(c)))
893            return false;
894   return true;
895}
896
897bool
898Instruction::isCommutationLegal(const Instruction *i) const
899{
900   return canCommuteDefDef(i) &&
901      canCommuteDefSrc(i) &&
902      i->canCommuteDefSrc(this);
903}
904
905TexInstruction::TexInstruction(Function *fn, operation op)
906   : Instruction(fn, op, TYPE_F32)
907{
908   memset(&tex, 0, sizeof(tex));
909
910   tex.rIndirectSrc = -1;
911   tex.sIndirectSrc = -1;
912
913   if (op == OP_TXF)
914      sType = TYPE_U32;
915}
916
917TexInstruction::~TexInstruction()
918{
919   for (int c = 0; c < 3; ++c) {
920      dPdx[c].set(NULL);
921      dPdy[c].set(NULL);
922   }
923   for (int n = 0; n < 4; ++n)
924      for (int c = 0; c < 3; ++c)
925         offset[n][c].set(NULL);
926}
927
928TexInstruction *
929TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
930{
931   TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
932                          new_TexInstruction(pol.context(), op));
933
934   Instruction::clone(pol, tex);
935
936   tex->tex = this->tex;
937
938   if (op == OP_TXD) {
939      for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
940         tex->dPdx[c].set(dPdx[c]);
941         tex->dPdy[c].set(dPdy[c]);
942      }
943   }
944
945   for (int n = 0; n < tex->tex.useOffsets; ++n)
946      for (int c = 0; c < 3; ++c)
947         tex->offset[n][c].set(offset[n][c]);
948
949   return tex;
950}
951
952const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
953{
954   { "1D",                1, 1, false, false, false },
955   { "2D",                2, 2, false, false, false },
956   { "2D_MS",             2, 3, false, false, false },
957   { "3D",                3, 3, false, false, false },
958   { "CUBE",              2, 3, false, true,  false },
959   { "1D_SHADOW",         1, 1, false, false, true  },
960   { "2D_SHADOW",         2, 2, false, false, true  },
961   { "CUBE_SHADOW",       2, 3, false, true,  true  },
962   { "1D_ARRAY",          1, 2, true,  false, false },
963   { "2D_ARRAY",          2, 3, true,  false, false },
964   { "2D_MS_ARRAY",       2, 4, true,  false, false },
965   { "CUBE_ARRAY",        2, 4, true,  true,  false },
966   { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
967   { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
968   { "RECT",              2, 2, false, false, false },
969   { "RECT_SHADOW",       2, 2, false, false, true  },
970   { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
971   { "BUFFER",            1, 1, false, false, false },
972};
973
974const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
975{
976   { "NONE",         0, {  0,  0,  0,  0 },  UINT },
977
978   { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
979   { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
980   { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
981   { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
982   { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
983   { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
984   { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
985
986   { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
987   { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
988   { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
989   { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
990   { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
991   { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
992   { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
993   { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
994   { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
995   { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
996
997   { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
998   { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
999   { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
1000   { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
1001   { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
1002   { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
1003   { "R32I",         1, { 32,  0,  0,  0 },  SINT },
1004   { "R16I",         1, { 16,  0,  0,  0 },  SINT },
1005   { "R8I",          1, {  8,  0,  0,  0 },  SINT },
1006
1007   { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1008   { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1009   { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1010   { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1011   { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1012   { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1013   { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1014
1015   { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1016   { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1017   { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1018   { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1019   { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1020   { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1021
1022   { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1023};
1024
1025void
1026TexInstruction::setIndirectR(Value *v)
1027{
1028   int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1029   if (p >= 0) {
1030      tex.rIndirectSrc = p;
1031      setSrc(p, v);
1032      srcs[p].usedAsPtr = !!v;
1033   }
1034}
1035
1036void
1037TexInstruction::setIndirectS(Value *v)
1038{
1039   int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1040   if (p >= 0) {
1041      tex.sIndirectSrc = p;
1042      setSrc(p, v);
1043      srcs[p].usedAsPtr = !!v;
1044   }
1045}
1046
1047CmpInstruction::CmpInstruction(Function *fn, operation op)
1048   : Instruction(fn, op, TYPE_F32)
1049{
1050   setCond = CC_ALWAYS;
1051}
1052
1053CmpInstruction *
1054CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1055{
1056   CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1057                          new_CmpInstruction(pol.context(), op));
1058   cmp->dType = dType;
1059   Instruction::clone(pol, cmp);
1060   cmp->setCond = setCond;
1061   return cmp;
1062}
1063
1064FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1065   : Instruction(fn, op, TYPE_NONE)
1066{
1067   if (op == OP_CALL)
1068      target.fn = reinterpret_cast<Function *>(targ);
1069   else
1070      target.bb = reinterpret_cast<BasicBlock *>(targ);
1071
1072   if (op == OP_BRA ||
1073       op == OP_CONT || op == OP_BREAK ||
1074       op == OP_RET || op == OP_EXIT)
1075      terminator = 1;
1076   else
1077   if (op == OP_JOIN)
1078      terminator = targ ? 1 : 0;
1079
1080   allWarp = absolute = limit = builtin = indirect = 0;
1081}
1082
1083FlowInstruction *
1084FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1085{
1086   FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1087                            new_FlowInstruction(pol.context(), op, NULL));
1088
1089   Instruction::clone(pol, flow);
1090   flow->allWarp = allWarp;
1091   flow->absolute = absolute;
1092   flow->limit = limit;
1093   flow->builtin = builtin;
1094
1095   if (builtin)
1096      flow->target.builtin = target.builtin;
1097   else
1098   if (op == OP_CALL)
1099      flow->target.fn = target.fn;
1100   else
1101   if (target.bb)
1102      flow->target.bb = pol.get<BasicBlock>(target.bb);
1103
1104   return flow;
1105}
1106
1107Program::Program(Type type, Target *arch)
1108   : progType(type),
1109     target(arch),
1110     mem_Instruction(sizeof(Instruction), 6),
1111     mem_CmpInstruction(sizeof(CmpInstruction), 4),
1112     mem_TexInstruction(sizeof(TexInstruction), 4),
1113     mem_FlowInstruction(sizeof(FlowInstruction), 4),
1114     mem_LValue(sizeof(LValue), 8),
1115     mem_Symbol(sizeof(Symbol), 7),
1116     mem_ImmediateValue(sizeof(ImmediateValue), 7)
1117{
1118   code = NULL;
1119   binSize = 0;
1120
1121   maxGPR = -1;
1122   fp64 = false;
1123
1124   main = new Function(this, "MAIN", ~0);
1125   calls.insert(&main->call);
1126
1127   dbgFlags = 0;
1128   optLevel = 0;
1129
1130   targetPriv = NULL;
1131}
1132
1133Program::~Program()
1134{
1135   for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1136      delete reinterpret_cast<Function *>(it.get());
1137
1138   for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1139      releaseValue(reinterpret_cast<Value *>(it.get()));
1140}
1141
1142void Program::releaseInstruction(Instruction *insn)
1143{
1144   // TODO: make this not suck so much
1145
1146   insn->~Instruction();
1147
1148   if (insn->asCmp())
1149      mem_CmpInstruction.release(insn);
1150   else
1151   if (insn->asTex())
1152      mem_TexInstruction.release(insn);
1153   else
1154   if (insn->asFlow())
1155      mem_FlowInstruction.release(insn);
1156   else
1157      mem_Instruction.release(insn);
1158}
1159
1160void Program::releaseValue(Value *value)
1161{
1162   value->~Value();
1163
1164   if (value->asLValue())
1165      mem_LValue.release(value);
1166   else
1167   if (value->asImm())
1168      mem_ImmediateValue.release(value);
1169   else
1170   if (value->asSym())
1171      mem_Symbol.release(value);
1172}
1173
1174
1175} // namespace nv50_ir
1176
1177extern "C" {
1178
1179static void
1180nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
1181{
1182   if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1183      info->prop.tp.domain = PIPE_PRIM_MAX;
1184      info->prop.tp.outputPrim = PIPE_PRIM_MAX;
1185   }
1186   if (info->type == PIPE_SHADER_GEOMETRY) {
1187      info->prop.gp.instanceCount = 1;
1188      info->prop.gp.maxVertices = 1;
1189   }
1190   if (info->type == PIPE_SHADER_COMPUTE) {
1191      info->prop.cp.numThreads[0] =
1192      info->prop.cp.numThreads[1] =
1193      info->prop.cp.numThreads[2] = 1;
1194   }
1195   info->io.pointSize = 0xff;
1196   info->io.instanceId = 0xff;
1197   info->io.vertexId = 0xff;
1198   info->io.edgeFlagIn = 0xff;
1199   info->io.edgeFlagOut = 0xff;
1200   info->io.fragDepth = 0xff;
1201   info->io.sampleMask = 0xff;
1202   info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff;
1203}
1204
1205int
1206nv50_ir_generate_code(struct nv50_ir_prog_info *info)
1207{
1208   int ret = 0;
1209
1210   nv50_ir::Program::Type type;
1211
1212   nv50_ir_init_prog_info(info);
1213
1214#define PROG_TYPE_CASE(a, b)                                      \
1215   case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1216
1217   switch (info->type) {
1218   PROG_TYPE_CASE(VERTEX, VERTEX);
1219   PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1220   PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1221   PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1222   PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1223   PROG_TYPE_CASE(COMPUTE, COMPUTE);
1224   default:
1225      INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1226      return -1;
1227   }
1228   INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1229
1230   nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1231   if (!targ)
1232      return -1;
1233
1234   nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1235   if (!prog) {
1236      nv50_ir::Target::destroy(targ);
1237      return -1;
1238   }
1239   prog->driver = info;
1240   prog->dbgFlags = info->dbgFlags;
1241   prog->optLevel = info->optLevel;
1242
1243   switch (info->bin.sourceRep) {
1244   case PIPE_SHADER_IR_NIR:
1245      ret = prog->makeFromNIR(info) ? 0 : -2;
1246      break;
1247   case PIPE_SHADER_IR_TGSI:
1248      ret = prog->makeFromTGSI(info) ? 0 : -2;
1249      break;
1250   default:
1251      ret = -1;
1252      break;
1253   }
1254   if (ret < 0)
1255      goto out;
1256   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1257      prog->print();
1258
1259   targ->parseDriverInfo(info);
1260   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1261
1262   prog->convertToSSA();
1263
1264   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1265      prog->print();
1266
1267   prog->optimizeSSA(info->optLevel);
1268   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1269
1270   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1271      prog->print();
1272
1273   if (!prog->registerAllocation()) {
1274      ret = -4;
1275      goto out;
1276   }
1277   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1278
1279   prog->optimizePostRA(info->optLevel);
1280
1281   if (!prog->emitBinary(info)) {
1282      ret = -5;
1283      goto out;
1284   }
1285
1286out:
1287   INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1288
1289   info->bin.maxGPR = prog->maxGPR;
1290   info->bin.code = prog->code;
1291   info->bin.codeSize = prog->binSize;
1292   info->bin.tlsSpace = prog->tlsSize;
1293
1294   delete prog;
1295   nv50_ir::Target::destroy(targ);
1296
1297   return ret;
1298}
1299
1300} // extern "C"
1301