1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23#include "codegen/nv50_ir.h"
24#include "codegen/nv50_ir_target.h"
25
26namespace nv50_ir {
27
28const uint8_t Target::operationSrcNr[] =
29{
30   0, 0,                   // NOP, PHI
31   0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32   1, 1, 2,                // MOV, LOAD, STORE
33   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34   3, 3,                   // SHLADD, XMAD
35   1, 1, 1,                // ABS, NEG, NOT
36   2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
37   2, 2, 1,                // MAX, MIN, SAT
38   1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
39   3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
40   1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
41   1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
42   0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
43   0, 0, 0,                // PRERET,CONT,BREAK
44   0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
45   1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
46   1, 1,                   // EMIT, RESTART
47   1, 1, 1,                // TEX, TXB, TXL,
48   1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
49   1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
50   3, 3, 3, 1, 3,          // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
51   0,                      // TEXBAR
52   1, 1,                   // DFDX, DFDY
53   1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
54   2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
55   2, 2,                   // ATOM, BAR
56   2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
57   2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
58   3,                      // SHFL
59   1,                      // VOTE
60   1,                      // BUFQ
61   0
62};
63
64const OpClass Target::operationClass[] =
65{
66   // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
67   OPCLASS_OTHER,
68   OPCLASS_PSEUDO,
69   OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
70   // MOV; LOAD; STORE
71   OPCLASS_MOVE,
72   OPCLASS_LOAD,
73   OPCLASS_STORE,
74   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD
75   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
76   OPCLASS_ARITH, OPCLASS_ARITH,
77   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
78   // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
79   OPCLASS_CONVERT, OPCLASS_CONVERT,
80   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
81   OPCLASS_SHIFT, OPCLASS_SHIFT,
82   // MAX, MIN
83   OPCLASS_COMPARE, OPCLASS_COMPARE,
84   // SAT, CEIL, FLOOR, TRUNC; CVT
85   OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
86   OPCLASS_CONVERT,
87   // SET(AND,OR,XOR); SELP, SLCT
88   OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
89   OPCLASS_COMPARE, OPCLASS_COMPARE,
90   // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
91   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
92   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
93   OPCLASS_SFU, OPCLASS_SFU,
94   // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
95   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
96   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
97   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
98   // DISCARD, EXIT
99   OPCLASS_FLOW, OPCLASS_FLOW,
100   // MEMBAR
101   OPCLASS_CONTROL,
102   // VFETCH, PFETCH, AFETCH, EXPORT
103   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
104   // LINTERP, PINTERP
105   OPCLASS_SFU, OPCLASS_SFU,
106   // EMIT, RESTART
107   OPCLASS_CONTROL, OPCLASS_CONTROL,
108   // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
109   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
110   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
111   OPCLASS_TEXTURE, OPCLASS_TEXTURE,
112   // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
113   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
114   OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
115   // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
116   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
117   // TEXBAR
118   OPCLASS_OTHER,
119   // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
120   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
121   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
122   // POPCNT, INSBF, EXTBF, BFIND; PERMT
123   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
124   OPCLASS_BITFIELD,
125   // ATOM, BAR
126   OPCLASS_ATOMIC, OPCLASS_CONTROL,
127   // VADD, VAVG, VMIN, VMAX
128   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
129   // VSAD, VSET, VSHR, VSHL
130   OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR,
131   // VSEL, CCTL
132   OPCLASS_VECTOR, OPCLASS_CONTROL,
133   // SHFL
134   OPCLASS_OTHER,
135   // VOTE
136   OPCLASS_OTHER,
137   // BUFQ
138   OPCLASS_OTHER,
139   OPCLASS_PSEUDO // LAST
140};
141
142
143extern Target *getTargetGM107(unsigned int chipset);
144extern Target *getTargetNVC0(unsigned int chipset);
145extern Target *getTargetNV50(unsigned int chipset);
146
147Target *Target::create(unsigned int chipset)
148{
149   STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
150   STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
151   switch (chipset & ~0xf) {
152   case 0x110:
153   case 0x120:
154   case 0x130:
155      return getTargetGM107(chipset);
156   case 0xc0:
157   case 0xd0:
158   case 0xe0:
159   case 0xf0:
160   case 0x100:
161      return getTargetNVC0(chipset);
162   case 0x50:
163   case 0x80:
164   case 0x90:
165   case 0xa0:
166      return getTargetNV50(chipset);
167   default:
168      ERROR("unsupported target: NV%x\n", chipset);
169      return 0;
170   }
171}
172
173void Target::destroy(Target *targ)
174{
175   delete targ;
176}
177
178CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL)
179{
180}
181
182void
183CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
184{
185   code = reinterpret_cast<uint32_t *>(ptr);
186   codeSize = 0;
187   codeSizeLimit = size;
188}
189
190void
191CodeEmitter::printBinary() const
192{
193   uint32_t *bin = code - codeSize / 4;
194   INFO("program binary (%u bytes)", codeSize);
195   for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
196      if ((pos % 8) == 0)
197         INFO("\n");
198      INFO("%08x ", bin[pos]);
199   }
200   INFO("\n");
201}
202
203static inline uint32_t sizeToBundlesNVE4(uint32_t size)
204{
205   return (size + 55) / 56;
206}
207
208void
209CodeEmitter::prepareEmission(Program *prog)
210{
211   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
212        !fi.end(); fi.next()) {
213      Function *func = reinterpret_cast<Function *>(fi.get());
214      func->binPos = prog->binSize;
215      prepareEmission(func);
216
217      // adjust sizes & positions for schedulding info:
218      if (prog->getTarget()->hasSWSched) {
219         uint32_t adjPos = func->binPos;
220         BasicBlock *bb = NULL;
221         for (int i = 0; i < func->bbCount; ++i) {
222            bb = func->bbArray[i];
223            int32_t adjSize = bb->binSize;
224            if (adjPos % 64) {
225               adjSize -= 64 - adjPos % 64;
226               if (adjSize < 0)
227                  adjSize = 0;
228            }
229            adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8;
230            bb->binPos = adjPos;
231            bb->binSize = adjSize;
232            adjPos += adjSize;
233         }
234         if (bb)
235            func->binSize = adjPos - func->binPos;
236      }
237
238      prog->binSize += func->binSize;
239   }
240}
241
242void
243CodeEmitter::prepareEmission(Function *func)
244{
245   func->bbCount = 0;
246   func->bbArray = new BasicBlock * [func->cfg.getSize()];
247
248   BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
249
250   for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
251      prepareEmission(BasicBlock::get(*it));
252}
253
254void
255CodeEmitter::prepareEmission(BasicBlock *bb)
256{
257   Instruction *i, *next;
258   Function *func = bb->getFunction();
259   int j;
260   unsigned int nShort;
261
262   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
263
264   for (; j >= 0; --j) {
265      BasicBlock *in = func->bbArray[j];
266      Instruction *exit = in->getExit();
267
268      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
269         in->binSize -= 8;
270         func->binSize -= 8;
271
272         for (++j; j < func->bbCount; ++j)
273            func->bbArray[j]->binPos -= 8;
274
275         in->remove(exit);
276      }
277      bb->binPos = in->binPos + in->binSize;
278      if (in->binSize) // no more no-op branches to bb
279         break;
280   }
281   func->bbArray[func->bbCount++] = bb;
282
283   if (!bb->getExit())
284      return;
285
286   // determine encoding size, try to group short instructions
287   nShort = 0;
288   for (i = bb->getEntry(); i; i = next) {
289      next = i->next;
290
291      if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) {
292         bb->remove(i);
293         continue;
294      }
295
296      i->encSize = getMinEncodingSize(i);
297      if (next && i->encSize < 8)
298         ++nShort;
299      else
300      if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
301         if (i->isCommutationLegal(i->next)) {
302            bb->permuteAdjacent(i, next);
303            next->encSize = 4;
304            next = i;
305            i = i->prev;
306            ++nShort;
307         } else
308         if (i->isCommutationLegal(i->prev) && next->next) {
309            bb->permuteAdjacent(i->prev, i);
310            next->encSize = 4;
311            next = next->next;
312            bb->binSize += 4;
313            ++nShort;
314         } else {
315            i->encSize = 8;
316            i->prev->encSize = 8;
317            bb->binSize += 4;
318            nShort = 0;
319         }
320      } else {
321         i->encSize = 8;
322         if (nShort & 1) {
323            i->prev->encSize = 8;
324            bb->binSize += 4;
325         }
326         nShort = 0;
327      }
328      bb->binSize += i->encSize;
329   }
330
331   if (bb->getExit()->encSize == 4) {
332      assert(nShort);
333      bb->getExit()->encSize = 8;
334      bb->binSize += 4;
335
336      if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
337         bb->binSize += 8;
338         bb->getExit()->prev->encSize = 8;
339      }
340   }
341   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
342
343   func->binSize += bb->binSize;
344}
345
346void
347Program::emitSymbolTable(struct nv50_ir_prog_info *info)
348{
349   unsigned int n = 0, nMax = allFuncs.getSize();
350
351   info->bin.syms =
352      (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
353
354   for (ArrayList::Iterator fi = allFuncs.iterator();
355        !fi.end();
356        fi.next(), ++n) {
357      Function *f = (Function *)fi.get();
358      assert(n < nMax);
359
360      info->bin.syms[n].label = f->getLabel();
361      info->bin.syms[n].offset = f->binPos;
362   }
363
364   info->bin.numSyms = n;
365}
366
367bool
368Program::emitBinary(struct nv50_ir_prog_info *info)
369{
370   CodeEmitter *emit = target->getCodeEmitter(progType);
371
372   emit->prepareEmission(this);
373
374   if (dbgFlags & NV50_IR_DEBUG_BASIC)
375      this->print();
376
377   if (!binSize) {
378      code = NULL;
379      return false;
380   }
381   code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
382   if (!code)
383      return false;
384   emit->setCodeLocation(code, binSize);
385   info->bin.instructions = 0;
386
387   for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
388      Function *fn = reinterpret_cast<Function *>(fi.get());
389
390      assert(emit->getCodeSize() == fn->binPos);
391
392      for (int b = 0; b < fn->bbCount; ++b) {
393         for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
394            emit->emitInstruction(i);
395            info->bin.instructions++;
396            if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) &&
397                (isFloatType(i->sType) || isFloatType(i->dType)))
398               info->io.fp64 = true;
399         }
400      }
401   }
402   info->io.fp64 |= fp64;
403   info->bin.relocData = emit->getRelocInfo();
404   info->bin.fixupData = emit->getFixupInfo();
405
406   emitSymbolTable(info);
407
408   // the nvc0 driver will print the binary iself together with the header
409   if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
410      emit->printBinary();
411
412   delete emit;
413   return true;
414}
415
416#define RELOC_ALLOC_INCREMENT 8
417
418bool
419CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
420                      int s)
421{
422   unsigned int n = relocInfo ? relocInfo->count : 0;
423
424   if (!(n % RELOC_ALLOC_INCREMENT)) {
425      size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
426      relocInfo = reinterpret_cast<RelocInfo *>(
427         REALLOC(relocInfo, n ? size : 0,
428                 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
429      if (!relocInfo)
430         return false;
431      if (n == 0)
432         memset(relocInfo, 0, sizeof(RelocInfo));
433   }
434   ++relocInfo->count;
435
436   relocInfo->entry[n].data = data;
437   relocInfo->entry[n].mask = m;
438   relocInfo->entry[n].offset = codeSize + w * 4;
439   relocInfo->entry[n].bitPos = s;
440   relocInfo->entry[n].type = ty;
441
442   return true;
443}
444
445bool
446CodeEmitter::addInterp(int ipa, int reg, FixupApply apply)
447{
448   unsigned int n = fixupInfo ? fixupInfo->count : 0;
449
450   if (!(n % RELOC_ALLOC_INCREMENT)) {
451      size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry);
452      fixupInfo = reinterpret_cast<FixupInfo *>(
453         REALLOC(fixupInfo, n ? size : 0,
454                 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry)));
455      if (!fixupInfo)
456         return false;
457      if (n == 0)
458         memset(fixupInfo, 0, sizeof(FixupInfo));
459   }
460   ++fixupInfo->count;
461
462   fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2);
463
464   return true;
465}
466
467void
468RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
469{
470   uint32_t value = 0;
471
472   switch (type) {
473   case TYPE_CODE: value = info->codePos; break;
474   case TYPE_BUILTIN: value = info->libPos; break;
475   case TYPE_DATA: value = info->dataPos; break;
476   default:
477      assert(0);
478      break;
479   }
480   value += data;
481   value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
482
483   binary[offset / 4] &= ~mask;
484   binary[offset / 4] |= value & mask;
485}
486
487} // namespace nv50_ir
488
489
490#include "codegen/nv50_ir_driver.h"
491
492extern "C" {
493
494void
495nv50_ir_relocate_code(void *relocData, uint32_t *code,
496                      uint32_t codePos,
497                      uint32_t libPos,
498                      uint32_t dataPos)
499{
500   nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
501
502   info->codePos = codePos;
503   info->libPos = libPos;
504   info->dataPos = dataPos;
505
506   for (unsigned int i = 0; i < info->count; ++i)
507      info->entry[i].apply(code, info);
508}
509
510void
511nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
512                     bool force_persample_interp, bool flatshade,
513                     uint8_t alphatest)
514{
515   nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
516      fixupData);
517
518   // force_persample_interp: all non-flat -> per-sample
519   // flatshade: all color -> flat
520   // alphatest: PIPE_FUNC_* to use with alphatest
521   nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
522   for (unsigned i = 0; i < info->count; ++i)
523      info->entry[i].apply(&info->entry[i], code, data);
524}
525
526void
527nv50_ir_get_target_library(uint32_t chipset,
528                           const uint32_t **code, uint32_t *size)
529{
530   nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
531   targ->getBuiltinCode(code, size);
532   nv50_ir::Target::destroy(targ);
533}
534
535}
536