1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "codegen/nv50_ir.h"
24#include "codegen/nv50_ir_target.h"
25#include "codegen/nv50_ir_driver.h"
26
27extern "C" {
28#include "nouveau_debug.h"
29}
30
31namespace nv50_ir {
32
33Modifier::Modifier(operation op)
34{
35   switch (op) {
36   case OP_NEG: bits = NV50_IR_MOD_NEG; break;
37   case OP_ABS: bits = NV50_IR_MOD_ABS; break;
38   case OP_SAT: bits = NV50_IR_MOD_SAT; break;
39   case OP_NOT: bits = NV50_IR_MOD_NOT; break;
40   default:
41      bits = 0;
42      break;
43   }
44}
45
46Modifier Modifier::operator*(const Modifier m) const
47{
48   unsigned int a, b, c;
49
50   b = m.bits;
51   if (this->bits & NV50_IR_MOD_ABS)
52      b &= ~NV50_IR_MOD_NEG;
53
54   a = (this->bits ^ b)      & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG);
55   c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT);
56
57   return Modifier(a | c);
58}
59
60ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL)
61{
62   indirect[0] = -1;
63   indirect[1] = -1;
64   usedAsPtr = false;
65   set(v);
66}
67
68ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn)
69{
70   set(ref);
71   usedAsPtr = ref.usedAsPtr;
72}
73
74ValueRef::~ValueRef()
75{
76   this->set(NULL);
77}
78
79bool ValueRef::getImmediate(ImmediateValue &imm) const
80{
81   const ValueRef *src = this;
82   Modifier m;
83   DataType type = src->insn->sType;
84
85   while (src) {
86      if (src->mod) {
87         if (src->insn->sType != type)
88            break;
89         m *= src->mod;
90      }
91      if (src->getFile() == FILE_IMMEDIATE) {
92         imm = *(src->value->asImm());
93         // The immediate's type isn't required to match its use, it's
94         // more of a hint; applying a modifier makes use of that hint.
95         imm.reg.type = type;
96         m.applyTo(imm);
97         return true;
98      }
99
100      Instruction *insn = src->value->getUniqueInsn();
101
102      if (insn && insn->op == OP_MOV) {
103         src = &insn->src(0);
104         if (src->mod)
105            WARN("OP_MOV with modifier encountered !\n");
106      } else {
107         src = NULL;
108      }
109   }
110   return false;
111}
112
113ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL)
114{
115   set(v);
116}
117
118ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL)
119{
120   set(def.get());
121}
122
123ValueDef::~ValueDef()
124{
125   this->set(NULL);
126}
127
128void
129ValueRef::set(const ValueRef &ref)
130{
131   this->set(ref.get());
132   mod = ref.mod;
133   indirect[0] = ref.indirect[0];
134   indirect[1] = ref.indirect[1];
135}
136
137void
138ValueRef::set(Value *refVal)
139{
140   if (value == refVal)
141      return;
142   if (value)
143      value->uses.erase(this);
144   if (refVal)
145      refVal->uses.insert(this);
146
147   value = refVal;
148}
149
150void
151ValueDef::set(Value *defVal)
152{
153   if (value == defVal)
154      return;
155   if (value)
156      value->defs.remove(this);
157   if (defVal)
158      defVal->defs.push_back(this);
159
160   value = defVal;
161}
162
163// Check if we can replace this definition's value by the value in @rep,
164// including the source modifiers, i.e. make sure that all uses support
165// @rep.mod.
166bool
167ValueDef::mayReplace(const ValueRef &rep)
168{
169   if (!rep.mod)
170      return true;
171
172   if (!insn || !insn->bb) // Unbound instruction ?
173      return false;
174
175   const Target *target = insn->bb->getProgram()->getTarget();
176
177   for (Value::UseIterator it = value->uses.begin(); it != value->uses.end();
178        ++it) {
179      Instruction *insn = (*it)->getInsn();
180      int s = -1;
181
182      for (int i = 0; insn->srcExists(i); ++i) {
183         if (insn->src(i).get() == value) {
184            // If there are multiple references to us we'd have to check if the
185            // combination of mods is still supported, but just bail for now.
186            if (&insn->src(i) != (*it))
187               return false;
188            s = i;
189         }
190      }
191      assert(s >= 0); // integrity of uses list
192
193      if (!target->isModSupported(insn, s, rep.mod))
194         return false;
195   }
196   return true;
197}
198
199void
200ValueDef::replace(const ValueRef &repVal, bool doSet)
201{
202   assert(mayReplace(repVal));
203
204   if (value == repVal.get())
205      return;
206
207   while (!value->uses.empty()) {
208      ValueRef *ref = *value->uses.begin();
209      ref->set(repVal.get());
210      ref->mod *= repVal.mod;
211   }
212
213   if (doSet)
214      set(repVal.get());
215}
216
217Value::Value() : id(-1)
218{
219  join = this;
220  memset(&reg, 0, sizeof(reg));
221  reg.size = 4;
222}
223
224LValue::LValue(Function *fn, DataFile file)
225{
226   reg.file = file;
227   reg.size = (file != FILE_PREDICATE) ? 4 : 1;
228   reg.data.id = -1;
229
230   compMask = 0;
231   compound = 0;
232   ssa = 0;
233   fixedReg = 0;
234   noSpill = 0;
235
236   fn->add(this, this->id);
237}
238
239LValue::LValue(Function *fn, LValue *lval)
240{
241   assert(lval);
242
243   reg.file = lval->reg.file;
244   reg.size = lval->reg.size;
245   reg.data.id = -1;
246
247   compMask = 0;
248   compound = 0;
249   ssa = 0;
250   fixedReg = 0;
251   noSpill = 0;
252
253   fn->add(this, this->id);
254}
255
256LValue *
257LValue::clone(ClonePolicy<Function>& pol) const
258{
259   LValue *that = new_LValue(pol.context(), reg.file);
260
261   pol.set<Value>(this, that);
262
263   that->reg.size = this->reg.size;
264   that->reg.type = this->reg.type;
265   that->reg.data = this->reg.data;
266
267   return that;
268}
269
270bool
271LValue::isUniform() const
272{
273   if (defs.size() > 1)
274      return false;
275   Instruction *insn = getInsn();
276   if (!insn)
277      return false;
278   // let's not try too hard here for now ...
279   return !insn->srcExists(1) && insn->getSrc(0)->isUniform();
280}
281
282Symbol::Symbol(Program *prog, DataFile f, ubyte fidx)
283{
284   baseSym = NULL;
285
286   reg.file = f;
287   reg.fileIndex = fidx;
288   reg.data.offset = 0;
289
290   prog->add(this, this->id);
291}
292
293Symbol *
294Symbol::clone(ClonePolicy<Function>& pol) const
295{
296   Program *prog = pol.context()->getProgram();
297
298   Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex);
299
300   pol.set<Value>(this, that);
301
302   that->reg.size = this->reg.size;
303   that->reg.type = this->reg.type;
304   that->reg.data = this->reg.data;
305
306   that->baseSym = this->baseSym;
307
308   return that;
309}
310
311bool
312Symbol::isUniform() const
313{
314   return
315      reg.file != FILE_SYSTEM_VALUE &&
316      reg.file != FILE_MEMORY_LOCAL &&
317      reg.file != FILE_SHADER_INPUT;
318}
319
320ImmediateValue::ImmediateValue(Program *prog, uint32_t uval)
321{
322   memset(&reg, 0, sizeof(reg));
323
324   reg.file = FILE_IMMEDIATE;
325   reg.size = 4;
326   reg.type = TYPE_U32;
327
328   reg.data.u32 = uval;
329
330   prog->add(this, this->id);
331}
332
333ImmediateValue::ImmediateValue(Program *prog, float fval)
334{
335   memset(&reg, 0, sizeof(reg));
336
337   reg.file = FILE_IMMEDIATE;
338   reg.size = 4;
339   reg.type = TYPE_F32;
340
341   reg.data.f32 = fval;
342
343   prog->add(this, this->id);
344}
345
346ImmediateValue::ImmediateValue(Program *prog, double dval)
347{
348   memset(&reg, 0, sizeof(reg));
349
350   reg.file = FILE_IMMEDIATE;
351   reg.size = 8;
352   reg.type = TYPE_F64;
353
354   reg.data.f64 = dval;
355
356   prog->add(this, this->id);
357}
358
359ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty)
360{
361   reg = proto->reg;
362
363   reg.type = ty;
364   reg.size = typeSizeof(ty);
365}
366
367ImmediateValue *
368ImmediateValue::clone(ClonePolicy<Function>& pol) const
369{
370   Program *prog = pol.context()->getProgram();
371   ImmediateValue *that = new_ImmediateValue(prog, 0u);
372
373   pol.set<Value>(this, that);
374
375   that->reg.size = this->reg.size;
376   that->reg.type = this->reg.type;
377   that->reg.data = this->reg.data;
378
379   return that;
380}
381
382bool
383ImmediateValue::isInteger(const int i) const
384{
385   switch (reg.type) {
386   case TYPE_S8:
387      return reg.data.s8 == i;
388   case TYPE_U8:
389      return reg.data.u8 == i;
390   case TYPE_S16:
391      return reg.data.s16 == i;
392   case TYPE_U16:
393      return reg.data.u16 == i;
394   case TYPE_S32:
395   case TYPE_U32:
396      return reg.data.s32 == i; // as if ...
397   case TYPE_S64:
398   case TYPE_U64:
399      return reg.data.s64 == i; // as if ...
400   case TYPE_F32:
401      return reg.data.f32 == static_cast<float>(i);
402   case TYPE_F64:
403      return reg.data.f64 == static_cast<double>(i);
404   default:
405      return false;
406   }
407}
408
409bool
410ImmediateValue::isNegative() const
411{
412   switch (reg.type) {
413   case TYPE_S8:  return reg.data.s8 < 0;
414   case TYPE_S16: return reg.data.s16 < 0;
415   case TYPE_S32:
416   case TYPE_U32: return reg.data.s32 < 0;
417   case TYPE_F32: return reg.data.u32 & (1 << 31);
418   case TYPE_F64: return reg.data.u64 & (1ULL << 63);
419   default:
420      return false;
421   }
422}
423
424bool
425ImmediateValue::isPow2() const
426{
427   if (reg.type == TYPE_U64 || reg.type == TYPE_S64)
428      return util_is_power_of_two_or_zero64(reg.data.u64);
429   else
430      return util_is_power_of_two_or_zero(reg.data.u32);
431}
432
433void
434ImmediateValue::applyLog2()
435{
436   switch (reg.type) {
437   case TYPE_S8:
438   case TYPE_S16:
439   case TYPE_S32:
440      assert(!this->isNegative());
441      FALLTHROUGH;
442   case TYPE_U8:
443   case TYPE_U16:
444   case TYPE_U32:
445      reg.data.u32 = util_logbase2(reg.data.u32);
446      break;
447   case TYPE_S64:
448      assert(!this->isNegative());
449      FALLTHROUGH;
450   case TYPE_U64:
451      reg.data.u64 = util_logbase2_64(reg.data.u64);
452      break;
453   case TYPE_F32:
454      reg.data.f32 = log2f(reg.data.f32);
455      break;
456   case TYPE_F64:
457      reg.data.f64 = log2(reg.data.f64);
458      break;
459   default:
460      assert(0);
461      break;
462   }
463}
464
465bool
466ImmediateValue::compare(CondCode cc, float fval) const
467{
468   if (reg.type != TYPE_F32)
469      ERROR("immediate value is not of type f32");
470
471   switch (static_cast<CondCode>(cc & 7)) {
472   case CC_TR: return true;
473   case CC_FL: return false;
474   case CC_LT: return reg.data.f32 <  fval;
475   case CC_LE: return reg.data.f32 <= fval;
476   case CC_GT: return reg.data.f32 >  fval;
477   case CC_GE: return reg.data.f32 >= fval;
478   case CC_EQ: return reg.data.f32 == fval;
479   case CC_NE: return reg.data.f32 != fval;
480   default:
481      assert(0);
482      return false;
483   }
484}
485
486ImmediateValue&
487ImmediateValue::operator=(const ImmediateValue &that)
488{
489   this->reg = that.reg;
490   return (*this);
491}
492
493bool
494Value::interfers(const Value *that) const
495{
496   uint32_t idA, idB;
497
498   if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
499      return false;
500   if (this->asImm())
501      return false;
502
503   if (this->asSym()) {
504      idA = this->join->reg.data.offset;
505      idB = that->join->reg.data.offset;
506   } else {
507      idA = this->join->reg.data.id * MIN2(this->reg.size, 4);
508      idB = that->join->reg.data.id * MIN2(that->reg.size, 4);
509   }
510
511   if (idA < idB)
512      return (idA + this->reg.size > idB);
513   else
514   if (idA > idB)
515      return (idB + that->reg.size > idA);
516   else
517      return (idA == idB);
518}
519
520bool
521Value::equals(const Value *that, bool strict) const
522{
523   if (strict)
524      return this == that;
525
526   if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex)
527      return false;
528   if (that->reg.size != this->reg.size)
529      return false;
530
531   if (that->reg.data.id != this->reg.data.id)
532      return false;
533
534   return true;
535}
536
537bool
538ImmediateValue::equals(const Value *that, bool strict) const
539{
540   const ImmediateValue *imm = that->asImm();
541   if (!imm)
542      return false;
543   return reg.data.u64 == imm->reg.data.u64;
544}
545
546bool
547Symbol::equals(const Value *that, bool strict) const
548{
549   if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex)
550      return false;
551   assert(that->asSym());
552
553   if (this->baseSym != that->asSym()->baseSym)
554      return false;
555
556   if (reg.file == FILE_SYSTEM_VALUE)
557      return (this->reg.data.sv.sv    == that->reg.data.sv.sv &&
558              this->reg.data.sv.index == that->reg.data.sv.index);
559   return this->reg.data.offset == that->reg.data.offset;
560}
561
562void Instruction::init()
563{
564   next = prev = 0;
565   serial = 0;
566
567   cc = CC_ALWAYS;
568   rnd = ROUND_N;
569   cache = CACHE_CA;
570   subOp = 0;
571
572   saturate = 0;
573   join = 0;
574   exit = 0;
575   terminator = 0;
576   ftz = 0;
577   dnz = 0;
578   perPatch = 0;
579   fixed = 0;
580   encSize = 0;
581   ipa = 0;
582   mask = 0;
583   precise = 0;
584
585   lanes = 0xf;
586
587   postFactor = 0;
588
589   predSrc = -1;
590   flagsDef = -1;
591   flagsSrc = -1;
592
593   sched = 0;
594   bb = NULL;
595}
596
597Instruction::Instruction()
598{
599   init();
600
601   op = OP_NOP;
602   dType = sType = TYPE_F32;
603
604   id = -1;
605}
606
607Instruction::Instruction(Function *fn, operation opr, DataType ty)
608{
609   init();
610
611   op = opr;
612   dType = sType = ty;
613
614   fn->add(this, id);
615}
616
617Instruction::~Instruction()
618{
619   if (bb) {
620      Function *fn = bb->getFunction();
621      bb->remove(this);
622      fn->allInsns.remove(id);
623   }
624
625   for (int s = 0; srcExists(s); ++s)
626      setSrc(s, NULL);
627   // must unlink defs too since the list pointers will get deallocated
628   for (int d = 0; defExists(d); ++d)
629      setDef(d, NULL);
630}
631
632void
633Instruction::setDef(int i, Value *val)
634{
635   int size = defs.size();
636   if (i >= size) {
637      defs.resize(i + 1);
638      while (size <= i)
639         defs[size++].setInsn(this);
640   }
641   defs[i].set(val);
642}
643
644void
645Instruction::setSrc(int s, Value *val)
646{
647   int size = srcs.size();
648   if (s >= size) {
649      srcs.resize(s + 1);
650      while (size <= s)
651         srcs[size++].setInsn(this);
652   }
653   srcs[s].set(val);
654}
655
656void
657Instruction::setSrc(int s, const ValueRef& ref)
658{
659   setSrc(s, ref.get());
660   srcs[s].mod = ref.mod;
661}
662
663void
664Instruction::swapSources(int a, int b)
665{
666   Value *value = srcs[a].get();
667   Modifier m = srcs[a].mod;
668
669   setSrc(a, srcs[b]);
670
671   srcs[b].set(value);
672   srcs[b].mod = m;
673}
674
675static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta)
676{
677   if (index >= s)
678      index += delta;
679   else
680   if ((delta < 0) && (index >= (s + delta)))
681      index = -1;
682}
683
684// Moves sources [@s,last_source] by @delta.
685// If @delta < 0, sources [@s - abs(@delta), @s) are erased.
686void
687Instruction::moveSources(const int s, const int delta)
688{
689   if (delta == 0)
690      return;
691   assert(s + delta >= 0);
692
693   int k;
694
695   for (k = 0; srcExists(k); ++k) {
696      for (int i = 0; i < 2; ++i)
697         moveSourcesAdjustIndex(src(k).indirect[i], s, delta);
698   }
699   moveSourcesAdjustIndex(predSrc, s, delta);
700   moveSourcesAdjustIndex(flagsSrc, s, delta);
701   if (asTex()) {
702      TexInstruction *tex = asTex();
703      moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta);
704      moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta);
705   }
706
707   if (delta > 0) {
708      --k;
709      for (int p = k + delta; k >= s; --k, --p)
710         setSrc(p, src(k));
711   } else {
712      int p;
713      for (p = s; p < k; ++p)
714         setSrc(p + delta, src(p));
715      for (; (p + delta) < k; ++p)
716         setSrc(p + delta, NULL);
717   }
718}
719
720void
721Instruction::takeExtraSources(int s, Value *values[3])
722{
723   values[0] = getIndirect(s, 0);
724   if (values[0])
725      setIndirect(s, 0, NULL);
726
727   values[1] = getIndirect(s, 1);
728   if (values[1])
729      setIndirect(s, 1, NULL);
730
731   values[2] = getPredicate();
732   if (values[2])
733      setPredicate(cc, NULL);
734}
735
736void
737Instruction::putExtraSources(int s, Value *values[3])
738{
739   if (values[0])
740      setIndirect(s, 0, values[0]);
741   if (values[1])
742      setIndirect(s, 1, values[1]);
743   if (values[2])
744      setPredicate(cc, values[2]);
745}
746
747Instruction *
748Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
749{
750   if (!i)
751      i = new_Instruction(pol.context(), op, dType);
752#if !defined(NDEBUG) && defined(__cpp_rtti)
753   assert(typeid(*i) == typeid(*this));
754#endif
755
756   pol.set<Instruction>(this, i);
757
758   i->sType = sType;
759
760   i->rnd = rnd;
761   i->cache = cache;
762   i->subOp = subOp;
763
764   i->saturate = saturate;
765   i->join = join;
766   i->exit = exit;
767   i->mask = mask;
768   i->ftz = ftz;
769   i->dnz = dnz;
770   i->ipa = ipa;
771   i->lanes = lanes;
772   i->perPatch = perPatch;
773
774   i->postFactor = postFactor;
775
776   for (int d = 0; defExists(d); ++d)
777      i->setDef(d, pol.get(getDef(d)));
778
779   for (int s = 0; srcExists(s); ++s) {
780      i->setSrc(s, pol.get(getSrc(s)));
781      i->src(s).mod = src(s).mod;
782   }
783
784   i->cc = cc;
785   i->predSrc = predSrc;
786   i->flagsDef = flagsDef;
787   i->flagsSrc = flagsSrc;
788
789   return i;
790}
791
792unsigned int
793Instruction::defCount(unsigned int mask, bool singleFile) const
794{
795   unsigned int i, n;
796
797   if (singleFile) {
798      unsigned int d = ffs(mask);
799      if (!d)
800         return 0;
801      for (i = d--; defExists(i); ++i)
802         if (getDef(i)->reg.file != getDef(d)->reg.file)
803            mask &= ~(1 << i);
804   }
805
806   for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1)
807      n += mask & 1;
808   return n;
809}
810
811unsigned int
812Instruction::srcCount(unsigned int mask, bool singleFile) const
813{
814   unsigned int i, n;
815
816   if (singleFile) {
817      unsigned int s = ffs(mask);
818      if (!s)
819         return 0;
820      for (i = s--; srcExists(i); ++i)
821         if (getSrc(i)->reg.file != getSrc(s)->reg.file)
822            mask &= ~(1 << i);
823   }
824
825   for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1)
826      n += mask & 1;
827   return n;
828}
829
830bool
831Instruction::setIndirect(int s, int dim, Value *value)
832{
833   assert(this->srcExists(s));
834
835   int p = srcs[s].indirect[dim];
836   if (p < 0) {
837      if (!value)
838         return true;
839      p = srcs.size();
840      while (p > 0 && !srcExists(p - 1))
841         --p;
842   }
843   setSrc(p, value);
844   srcs[p].usedAsPtr = (value != 0);
845   srcs[s].indirect[dim] = value ? p : -1;
846   return true;
847}
848
849bool
850Instruction::setPredicate(CondCode ccode, Value *value)
851{
852   cc = ccode;
853
854   if (!value) {
855      if (predSrc >= 0) {
856         srcs[predSrc].set(NULL);
857         predSrc = -1;
858      }
859      return true;
860   }
861
862   if (predSrc < 0) {
863      predSrc = srcs.size();
864      while (predSrc > 0 && !srcExists(predSrc - 1))
865         --predSrc;
866   }
867
868   setSrc(predSrc, value);
869   return true;
870}
871
872bool
873Instruction::writesPredicate() const
874{
875   for (int d = 0; defExists(d); ++d)
876      if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS))
877         return true;
878   return false;
879}
880
881bool
882Instruction::canCommuteDefSrc(const Instruction *i) const
883{
884   for (int d = 0; defExists(d); ++d)
885      for (int s = 0; i->srcExists(s); ++s)
886         if (getDef(d)->interfers(i->getSrc(s)))
887            return false;
888   return true;
889}
890
891bool
892Instruction::canCommuteDefDef(const Instruction *i) const
893{
894   for (int d = 0; defExists(d); ++d)
895      for (int c = 0; i->defExists(c); ++c)
896         if (getDef(d)->interfers(i->getDef(c)))
897            return false;
898   return true;
899}
900
901bool
902Instruction::isCommutationLegal(const Instruction *i) const
903{
904   return canCommuteDefDef(i) &&
905      canCommuteDefSrc(i) &&
906      i->canCommuteDefSrc(this);
907}
908
909TexInstruction::TexInstruction(Function *fn, operation op)
910   : Instruction(fn, op, TYPE_F32), tex()
911{
912   tex.rIndirectSrc = -1;
913   tex.sIndirectSrc = -1;
914
915   if (op == OP_TXF)
916      sType = TYPE_U32;
917}
918
919TexInstruction::~TexInstruction()
920{
921   for (int c = 0; c < 3; ++c) {
922      dPdx[c].set(NULL);
923      dPdy[c].set(NULL);
924   }
925   for (int n = 0; n < 4; ++n)
926      for (int c = 0; c < 3; ++c)
927         offset[n][c].set(NULL);
928}
929
930TexInstruction *
931TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
932{
933   TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) :
934                          new_TexInstruction(pol.context(), op));
935
936   Instruction::clone(pol, tex);
937
938   tex->tex = this->tex;
939
940   if (op == OP_TXD) {
941      for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) {
942         tex->dPdx[c].set(dPdx[c]);
943         tex->dPdy[c].set(dPdy[c]);
944      }
945   }
946
947   for (int n = 0; n < tex->tex.useOffsets; ++n)
948      for (int c = 0; c < 3; ++c)
949         tex->offset[n][c].set(offset[n][c]);
950
951   return tex;
952}
953
954const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] =
955{
956   { "1D",                1, 1, false, false, false },
957   { "2D",                2, 2, false, false, false },
958   { "2D_MS",             2, 3, false, false, false },
959   { "3D",                3, 3, false, false, false },
960   { "CUBE",              2, 3, false, true,  false },
961   { "1D_SHADOW",         1, 1, false, false, true  },
962   { "2D_SHADOW",         2, 2, false, false, true  },
963   { "CUBE_SHADOW",       2, 3, false, true,  true  },
964   { "1D_ARRAY",          1, 2, true,  false, false },
965   { "2D_ARRAY",          2, 3, true,  false, false },
966   { "2D_MS_ARRAY",       2, 4, true,  false, false },
967   { "CUBE_ARRAY",        2, 4, true,  true,  false },
968   { "1D_ARRAY_SHADOW",   1, 2, true,  false, true  },
969   { "2D_ARRAY_SHADOW",   2, 3, true,  false, true  },
970   { "RECT",              2, 2, false, false, false },
971   { "RECT_SHADOW",       2, 2, false, false, true  },
972   { "CUBE_ARRAY_SHADOW", 2, 4, true,  true,  true  },
973   { "BUFFER",            1, 1, false, false, false },
974};
975
976const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] =
977{
978   { "NONE",         0, {  0,  0,  0,  0 },  UINT },
979
980   { "RGBA32F",      4, { 32, 32, 32, 32 }, FLOAT },
981   { "RGBA16F",      4, { 16, 16, 16, 16 }, FLOAT },
982   { "RG32F",        2, { 32, 32,  0,  0 }, FLOAT },
983   { "RG16F",        2, { 16, 16,  0,  0 }, FLOAT },
984   { "R11G11B10F",   3, { 11, 11, 10,  0 }, FLOAT },
985   { "R32F",         1, { 32,  0,  0,  0 }, FLOAT },
986   { "R16F",         1, { 16,  0,  0,  0 }, FLOAT },
987
988   { "RGBA32UI",     4, { 32, 32, 32, 32 },  UINT },
989   { "RGBA16UI",     4, { 16, 16, 16, 16 },  UINT },
990   { "RGB10A2UI",    4, { 10, 10, 10,  2 },  UINT },
991   { "RGBA8UI",      4, {  8,  8,  8,  8 },  UINT },
992   { "RG32UI",       2, { 32, 32,  0,  0 },  UINT },
993   { "RG16UI",       2, { 16, 16,  0,  0 },  UINT },
994   { "RG8UI",        2, {  8,  8,  0,  0 },  UINT },
995   { "R32UI",        1, { 32,  0,  0,  0 },  UINT },
996   { "R16UI",        1, { 16,  0,  0,  0 },  UINT },
997   { "R8UI",         1, {  8,  0,  0,  0 },  UINT },
998
999   { "RGBA32I",      4, { 32, 32, 32, 32 },  SINT },
1000   { "RGBA16I",      4, { 16, 16, 16, 16 },  SINT },
1001   { "RGBA8I",       4, {  8,  8,  8,  8 },  SINT },
1002   { "RG32I",        2, { 32, 32,  0,  0 },  SINT },
1003   { "RG16I",        2, { 16, 16,  0,  0 },  SINT },
1004   { "RG8I",         2, {  8,  8,  0,  0 },  SINT },
1005   { "R32I",         1, { 32,  0,  0,  0 },  SINT },
1006   { "R16I",         1, { 16,  0,  0,  0 },  SINT },
1007   { "R8I",          1, {  8,  0,  0,  0 },  SINT },
1008
1009   { "RGBA16",       4, { 16, 16, 16, 16 }, UNORM },
1010   { "RGB10A2",      4, { 10, 10, 10,  2 }, UNORM },
1011   { "RGBA8",        4, {  8,  8,  8,  8 }, UNORM },
1012   { "RG16",         2, { 16, 16,  0,  0 }, UNORM },
1013   { "RG8",          2, {  8,  8,  0,  0 }, UNORM },
1014   { "R16",          1, { 16,  0,  0,  0 }, UNORM },
1015   { "R8",           1, {  8,  0,  0,  0 }, UNORM },
1016
1017   { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM },
1018   { "RGBA8_SNORM",  4, {  8,  8,  8,  8 }, SNORM },
1019   { "RG16_SNORM",   2, { 16, 16,  0,  0 }, SNORM },
1020   { "RG8_SNORM",    2, {  8,  8,  0,  0 }, SNORM },
1021   { "R16_SNORM",    1, { 16,  0,  0,  0 }, SNORM },
1022   { "R8_SNORM",     1, {  8,  0,  0,  0 }, SNORM },
1023
1024   { "BGRA8",        4, {  8,  8,  8,  8 }, UNORM, true },
1025};
1026
1027const struct TexInstruction::ImgFormatDesc *
1028TexInstruction::translateImgFormat(enum pipe_format format)
1029{
1030
1031#define FMT_CASE(a, b) \
1032  case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b]
1033
1034   switch (format) {
1035   FMT_CASE(NONE, NONE);
1036
1037   FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
1038   FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
1039   FMT_CASE(R32G32_FLOAT, RG32F);
1040   FMT_CASE(R16G16_FLOAT, RG16F);
1041   FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
1042   FMT_CASE(R32_FLOAT, R32F);
1043   FMT_CASE(R16_FLOAT, R16F);
1044
1045   FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
1046   FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
1047   FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
1048   FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
1049   FMT_CASE(R32G32_UINT, RG32UI);
1050   FMT_CASE(R16G16_UINT, RG16UI);
1051   FMT_CASE(R8G8_UINT, RG8UI);
1052   FMT_CASE(R32_UINT, R32UI);
1053   FMT_CASE(R16_UINT, R16UI);
1054   FMT_CASE(R8_UINT, R8UI);
1055
1056   FMT_CASE(R32G32B32A32_SINT, RGBA32I);
1057   FMT_CASE(R16G16B16A16_SINT, RGBA16I);
1058   FMT_CASE(R8G8B8A8_SINT, RGBA8I);
1059   FMT_CASE(R32G32_SINT, RG32I);
1060   FMT_CASE(R16G16_SINT, RG16I);
1061   FMT_CASE(R8G8_SINT, RG8I);
1062   FMT_CASE(R32_SINT, R32I);
1063   FMT_CASE(R16_SINT, R16I);
1064   FMT_CASE(R8_SINT, R8I);
1065
1066   FMT_CASE(R16G16B16A16_UNORM, RGBA16);
1067   FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
1068   FMT_CASE(R8G8B8A8_UNORM, RGBA8);
1069   FMT_CASE(R16G16_UNORM, RG16);
1070   FMT_CASE(R8G8_UNORM, RG8);
1071   FMT_CASE(R16_UNORM, R16);
1072   FMT_CASE(R8_UNORM, R8);
1073
1074   FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
1075   FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
1076   FMT_CASE(R16G16_SNORM, RG16_SNORM);
1077   FMT_CASE(R8G8_SNORM, RG8_SNORM);
1078   FMT_CASE(R16_SNORM, R16_SNORM);
1079   FMT_CASE(R8_SNORM, R8_SNORM);
1080
1081   FMT_CASE(B8G8R8A8_UNORM, BGRA8);
1082
1083   default:
1084      assert(!"Unexpected format");
1085      return &formatTable[nv50_ir::FMT_NONE];
1086   }
1087}
1088
1089void
1090TexInstruction::setIndirectR(Value *v)
1091{
1092   int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc;
1093   if (p >= 0) {
1094      tex.rIndirectSrc = p;
1095      setSrc(p, v);
1096      srcs[p].usedAsPtr = !!v;
1097   }
1098}
1099
1100void
1101TexInstruction::setIndirectS(Value *v)
1102{
1103   int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc;
1104   if (p >= 0) {
1105      tex.sIndirectSrc = p;
1106      setSrc(p, v);
1107      srcs[p].usedAsPtr = !!v;
1108   }
1109}
1110
1111CmpInstruction::CmpInstruction(Function *fn, operation op)
1112   : Instruction(fn, op, TYPE_F32)
1113{
1114   setCond = CC_ALWAYS;
1115}
1116
1117CmpInstruction *
1118CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1119{
1120   CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) :
1121                          new_CmpInstruction(pol.context(), op));
1122   cmp->dType = dType;
1123   Instruction::clone(pol, cmp);
1124   cmp->setCond = setCond;
1125   return cmp;
1126}
1127
1128FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ)
1129   : Instruction(fn, op, TYPE_NONE)
1130{
1131   if (op == OP_CALL)
1132      target.fn = reinterpret_cast<Function *>(targ);
1133   else
1134      target.bb = reinterpret_cast<BasicBlock *>(targ);
1135
1136   if (op == OP_BRA ||
1137       op == OP_CONT || op == OP_BREAK ||
1138       op == OP_RET || op == OP_EXIT)
1139      terminator = 1;
1140   else
1141   if (op == OP_JOIN)
1142      terminator = targ ? 1 : 0;
1143
1144   allWarp = absolute = limit = builtin = indirect = 0;
1145}
1146
1147FlowInstruction *
1148FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
1149{
1150   FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) :
1151                            new_FlowInstruction(pol.context(), op, NULL));
1152
1153   Instruction::clone(pol, flow);
1154   flow->allWarp = allWarp;
1155   flow->absolute = absolute;
1156   flow->limit = limit;
1157   flow->builtin = builtin;
1158
1159   if (builtin)
1160      flow->target.builtin = target.builtin;
1161   else
1162   if (op == OP_CALL)
1163      flow->target.fn = target.fn;
1164   else
1165   if (target.bb)
1166      flow->target.bb = pol.get<BasicBlock>(target.bb);
1167
1168   return flow;
1169}
1170
1171Program::Program(Type type, Target *arch)
1172   : progType(type),
1173     target(arch),
1174     tlsSize(0),
1175     mem_Instruction(sizeof(Instruction), 6),
1176     mem_CmpInstruction(sizeof(CmpInstruction), 4),
1177     mem_TexInstruction(sizeof(TexInstruction), 4),
1178     mem_FlowInstruction(sizeof(FlowInstruction), 4),
1179     mem_LValue(sizeof(LValue), 8),
1180     mem_Symbol(sizeof(Symbol), 7),
1181     mem_ImmediateValue(sizeof(ImmediateValue), 7),
1182     driver(NULL),
1183     driver_out(NULL)
1184{
1185   code = NULL;
1186   binSize = 0;
1187
1188   maxGPR = -1;
1189   fp64 = false;
1190   persampleInvocation = false;
1191
1192   main = new Function(this, "MAIN", ~0);
1193   calls.insert(&main->call);
1194
1195   dbgFlags = 0;
1196   optLevel = 0;
1197
1198   targetPriv = NULL;
1199}
1200
1201Program::~Program()
1202{
1203   for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next())
1204      delete reinterpret_cast<Function *>(it.get());
1205
1206   for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next())
1207      releaseValue(reinterpret_cast<Value *>(it.get()));
1208}
1209
1210void Program::releaseInstruction(Instruction *insn)
1211{
1212   // TODO: make this not suck so much
1213
1214   insn->~Instruction();
1215
1216   if (insn->asCmp())
1217      mem_CmpInstruction.release(insn);
1218   else
1219   if (insn->asTex())
1220      mem_TexInstruction.release(insn);
1221   else
1222   if (insn->asFlow())
1223      mem_FlowInstruction.release(insn);
1224   else
1225      mem_Instruction.release(insn);
1226}
1227
1228void Program::releaseValue(Value *value)
1229{
1230   value->~Value();
1231
1232   if (value->asLValue())
1233      mem_LValue.release(value);
1234   else
1235   if (value->asImm())
1236      mem_ImmediateValue.release(value);
1237   else
1238   if (value->asSym())
1239      mem_Symbol.release(value);
1240}
1241
1242
1243} // namespace nv50_ir
1244
1245extern "C" {
1246
1247static void
1248nv50_ir_init_prog_info(struct nv50_ir_prog_info *info,
1249                       struct nv50_ir_prog_info_out *info_out)
1250{
1251   info_out->target = info->target;
1252   info_out->type = info->type;
1253   if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) {
1254      info_out->prop.tp.domain = PIPE_PRIM_MAX;
1255      info_out->prop.tp.outputPrim = PIPE_PRIM_MAX;
1256   }
1257   if (info->type == PIPE_SHADER_GEOMETRY) {
1258      info_out->prop.gp.instanceCount = 1;
1259      info_out->prop.gp.maxVertices = 1;
1260   }
1261   if (info->type == PIPE_SHADER_COMPUTE) {
1262      info->prop.cp.numThreads[0] =
1263      info->prop.cp.numThreads[1] =
1264      info->prop.cp.numThreads[2] = 1;
1265   }
1266   info_out->bin.smemSize = info->bin.smemSize;
1267   info_out->io.genUserClip = info->io.genUserClip;
1268   info_out->io.instanceId = 0xff;
1269   info_out->io.vertexId = 0xff;
1270   info_out->io.edgeFlagIn = 0xff;
1271   info_out->io.edgeFlagOut = 0xff;
1272   info_out->io.fragDepth = 0xff;
1273   info_out->io.sampleMask = 0xff;
1274}
1275
1276int
1277nv50_ir_generate_code(struct nv50_ir_prog_info *info,
1278                      struct nv50_ir_prog_info_out *info_out)
1279{
1280   int ret = 0;
1281
1282   nv50_ir::Program::Type type;
1283
1284   nv50_ir_init_prog_info(info, info_out);
1285
1286#define PROG_TYPE_CASE(a, b)                                      \
1287   case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break
1288
1289   switch (info->type) {
1290   PROG_TYPE_CASE(VERTEX, VERTEX);
1291   PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
1292   PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
1293   PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
1294   PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
1295   PROG_TYPE_CASE(COMPUTE, COMPUTE);
1296   default:
1297      INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type);
1298      return -1;
1299   }
1300   INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
1301
1302   nv50_ir::Target *targ = nv50_ir::Target::create(info->target);
1303   if (!targ)
1304      return -1;
1305
1306   nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
1307   if (!prog) {
1308      nv50_ir::Target::destroy(targ);
1309      return -1;
1310   }
1311   prog->driver = info;
1312   prog->driver_out = info_out;
1313   prog->dbgFlags = info->dbgFlags;
1314   prog->optLevel = info->optLevel;
1315
1316   switch (info->bin.sourceRep) {
1317   case PIPE_SHADER_IR_NIR:
1318      ret = prog->makeFromNIR(info, info_out) ? 0 : -2;
1319      break;
1320   case PIPE_SHADER_IR_TGSI:
1321      ret = prog->makeFromTGSI(info, info_out) ? 0 : -2;
1322      break;
1323   default:
1324      ret = -1;
1325      break;
1326   }
1327   if (ret < 0)
1328      goto out;
1329   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1330      prog->print();
1331
1332   targ->parseDriverInfo(info, info_out);
1333   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
1334
1335   prog->convertToSSA();
1336
1337   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
1338      prog->print();
1339
1340   prog->optimizeSSA(info->optLevel);
1341   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
1342
1343   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1344      prog->print();
1345
1346   if (!prog->registerAllocation()) {
1347      ret = -4;
1348      goto out;
1349   }
1350   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
1351
1352   prog->optimizePostRA(info->optLevel);
1353
1354   if (!prog->emitBinary(info_out)) {
1355      ret = -5;
1356      goto out;
1357   }
1358
1359out:
1360   INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret);
1361
1362   info_out->bin.maxGPR = prog->maxGPR;
1363   info_out->bin.code = prog->code;
1364   info_out->bin.codeSize = prog->binSize;
1365   info_out->bin.tlsSpace = prog->tlsSize;
1366
1367   delete prog;
1368   nv50_ir::Target::destroy(targ);
1369
1370   return ret;
1371}
1372
1373} // extern "C"
1374