1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "codegen/nv50_ir.h" 24#include "codegen/nv50_ir_target.h" 25 26namespace nv50_ir { 27 28const uint8_t Target::operationSrcNr[] = 29{ 30 0, 0, // NOP, PHI 31 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT 32 1, 1, 2, // MOV, LOAD, STORE 33 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD 34 3, 3, // SHLADD, XMAD 35 1, 1, 1, // ABS, NEG, NOT 36 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR 37 2, 2, 1, // MAX, MIN, SAT 38 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT 39 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT 40 1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 41 1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW 42 0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, 43 0, 0, 0, // PRERET,CONT,BREAK 44 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR 45 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP 46 1, 1, // EMIT, RESTART 47 1, 1, 1, // TEX, TXB, TXL, 48 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP 49 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA 50 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 51 0, // TEXBAR 52 1, 1, // DFDX, DFDY 53 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP 54 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT 55 2, 2, // ATOM, BAR 56 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 57 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL 58 3, // SHFL 59 1, // VOTE 60 1, // BUFQ 61 0 62}; 63 64const OpClass Target::operationClass[] = 65{ 66 // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT 67 OPCLASS_OTHER, 68 OPCLASS_PSEUDO, 69 OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, 70 // MOV; LOAD; STORE 71 OPCLASS_MOVE, 72 OPCLASS_LOAD, 73 OPCLASS_STORE, 74 // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD, XMAD 75 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 76 OPCLASS_ARITH, OPCLASS_ARITH, 77 OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, 78 // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR 79 OPCLASS_CONVERT, OPCLASS_CONVERT, 80 OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, 81 OPCLASS_SHIFT, OPCLASS_SHIFT, 82 // MAX, MIN 83 OPCLASS_COMPARE, OPCLASS_COMPARE, 84 // SAT, CEIL, FLOOR, TRUNC; CVT 85 OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, 86 OPCLASS_CONVERT, 87 // SET(AND,OR,XOR); SELP, SLCT 88 OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, 89 OPCLASS_COMPARE, OPCLASS_COMPARE, 90 // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW 91 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 92 OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, 93 OPCLASS_SFU, OPCLASS_SFU, 94 // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN 95 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 96 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 97 OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, 98 // DISCARD, EXIT 99 OPCLASS_FLOW, OPCLASS_FLOW, 100 // MEMBAR 101 OPCLASS_CONTROL, 102 // VFETCH, PFETCH, AFETCH, EXPORT 103 OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, 104 // LINTERP, PINTERP 105 OPCLASS_SFU, OPCLASS_SFU, 106 // EMIT, RESTART 107 OPCLASS_CONTROL, OPCLASS_CONTROL, 108 // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP 109 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 110 OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, 111 OPCLASS_TEXTURE, OPCLASS_TEXTURE, 112 // SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA 113 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, 114 OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, 115 // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP 116 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, 117 // TEXBAR 118 OPCLASS_OTHER, 119 // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP 120 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, 121 OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, 122 // POPCNT, INSBF, EXTBF, BFIND; PERMT 123 OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, 124 OPCLASS_BITFIELD, 125 // ATOM, BAR 126 OPCLASS_ATOMIC, OPCLASS_CONTROL, 127 // VADD, VAVG, VMIN, VMAX 128 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 129 // VSAD, VSET, VSHR, VSHL 130 OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, 131 // VSEL, CCTL 132 OPCLASS_VECTOR, OPCLASS_CONTROL, 133 // SHFL 134 OPCLASS_OTHER, 135 // VOTE 136 OPCLASS_OTHER, 137 // BUFQ 138 OPCLASS_OTHER, 139 OPCLASS_PSEUDO // LAST 140}; 141 142 143extern Target *getTargetGM107(unsigned int chipset); 144extern Target *getTargetNVC0(unsigned int chipset); 145extern Target *getTargetNV50(unsigned int chipset); 146 147Target *Target::create(unsigned int chipset) 148{ 149 STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); 150 STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); 151 switch (chipset & ~0xf) { 152 case 0x110: 153 case 0x120: 154 case 0x130: 155 return getTargetGM107(chipset); 156 case 0xc0: 157 case 0xd0: 158 case 0xe0: 159 case 0xf0: 160 case 0x100: 161 return getTargetNVC0(chipset); 162 case 0x50: 163 case 0x80: 164 case 0x90: 165 case 0xa0: 166 return getTargetNV50(chipset); 167 default: 168 ERROR("unsupported target: NV%x\n", chipset); 169 return 0; 170 } 171} 172 173void Target::destroy(Target *targ) 174{ 175 delete targ; 176} 177 178CodeEmitter::CodeEmitter(const Target *target) : targ(target), fixupInfo(NULL) 179{ 180} 181 182void 183CodeEmitter::setCodeLocation(void *ptr, uint32_t size) 184{ 185 code = reinterpret_cast<uint32_t *>(ptr); 186 codeSize = 0; 187 codeSizeLimit = size; 188} 189 190void 191CodeEmitter::printBinary() const 192{ 193 uint32_t *bin = code - codeSize / 4; 194 INFO("program binary (%u bytes)", codeSize); 195 for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { 196 if ((pos % 8) == 0) 197 INFO("\n"); 198 INFO("%08x ", bin[pos]); 199 } 200 INFO("\n"); 201} 202 203static inline uint32_t sizeToBundlesNVE4(uint32_t size) 204{ 205 return (size + 55) / 56; 206} 207 208void 209CodeEmitter::prepareEmission(Program *prog) 210{ 211 for (ArrayList::Iterator fi = prog->allFuncs.iterator(); 212 !fi.end(); fi.next()) { 213 Function *func = reinterpret_cast<Function *>(fi.get()); 214 func->binPos = prog->binSize; 215 prepareEmission(func); 216 217 // adjust sizes & positions for schedulding info: 218 if (prog->getTarget()->hasSWSched) { 219 uint32_t adjPos = func->binPos; 220 BasicBlock *bb = NULL; 221 for (int i = 0; i < func->bbCount; ++i) { 222 bb = func->bbArray[i]; 223 int32_t adjSize = bb->binSize; 224 if (adjPos % 64) { 225 adjSize -= 64 - adjPos % 64; 226 if (adjSize < 0) 227 adjSize = 0; 228 } 229 adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8; 230 bb->binPos = adjPos; 231 bb->binSize = adjSize; 232 adjPos += adjSize; 233 } 234 if (bb) 235 func->binSize = adjPos - func->binPos; 236 } 237 238 prog->binSize += func->binSize; 239 } 240} 241 242void 243CodeEmitter::prepareEmission(Function *func) 244{ 245 func->bbCount = 0; 246 func->bbArray = new BasicBlock * [func->cfg.getSize()]; 247 248 BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; 249 250 for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) 251 prepareEmission(BasicBlock::get(*it)); 252} 253 254void 255CodeEmitter::prepareEmission(BasicBlock *bb) 256{ 257 Instruction *i, *next; 258 Function *func = bb->getFunction(); 259 int j; 260 unsigned int nShort; 261 262 for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); 263 264 for (; j >= 0; --j) { 265 BasicBlock *in = func->bbArray[j]; 266 Instruction *exit = in->getExit(); 267 268 if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { 269 in->binSize -= 8; 270 func->binSize -= 8; 271 272 for (++j; j < func->bbCount; ++j) 273 func->bbArray[j]->binPos -= 8; 274 275 in->remove(exit); 276 } 277 bb->binPos = in->binPos + in->binSize; 278 if (in->binSize) // no more no-op branches to bb 279 break; 280 } 281 func->bbArray[func->bbCount++] = bb; 282 283 if (!bb->getExit()) 284 return; 285 286 // determine encoding size, try to group short instructions 287 nShort = 0; 288 for (i = bb->getEntry(); i; i = next) { 289 next = i->next; 290 291 if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) { 292 bb->remove(i); 293 continue; 294 } 295 296 i->encSize = getMinEncodingSize(i); 297 if (next && i->encSize < 8) 298 ++nShort; 299 else 300 if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { 301 if (i->isCommutationLegal(i->next)) { 302 bb->permuteAdjacent(i, next); 303 next->encSize = 4; 304 next = i; 305 i = i->prev; 306 ++nShort; 307 } else 308 if (i->isCommutationLegal(i->prev) && next->next) { 309 bb->permuteAdjacent(i->prev, i); 310 next->encSize = 4; 311 next = next->next; 312 bb->binSize += 4; 313 ++nShort; 314 } else { 315 i->encSize = 8; 316 i->prev->encSize = 8; 317 bb->binSize += 4; 318 nShort = 0; 319 } 320 } else { 321 i->encSize = 8; 322 if (nShort & 1) { 323 i->prev->encSize = 8; 324 bb->binSize += 4; 325 } 326 nShort = 0; 327 } 328 bb->binSize += i->encSize; 329 } 330 331 if (bb->getExit()->encSize == 4) { 332 assert(nShort); 333 bb->getExit()->encSize = 8; 334 bb->binSize += 4; 335 336 if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { 337 bb->binSize += 8; 338 bb->getExit()->prev->encSize = 8; 339 } 340 } 341 assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); 342 343 func->binSize += bb->binSize; 344} 345 346void 347Program::emitSymbolTable(struct nv50_ir_prog_info *info) 348{ 349 unsigned int n = 0, nMax = allFuncs.getSize(); 350 351 info->bin.syms = 352 (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); 353 354 for (ArrayList::Iterator fi = allFuncs.iterator(); 355 !fi.end(); 356 fi.next(), ++n) { 357 Function *f = (Function *)fi.get(); 358 assert(n < nMax); 359 360 info->bin.syms[n].label = f->getLabel(); 361 info->bin.syms[n].offset = f->binPos; 362 } 363 364 info->bin.numSyms = n; 365} 366 367bool 368Program::emitBinary(struct nv50_ir_prog_info *info) 369{ 370 CodeEmitter *emit = target->getCodeEmitter(progType); 371 372 emit->prepareEmission(this); 373 374 if (dbgFlags & NV50_IR_DEBUG_BASIC) 375 this->print(); 376 377 if (!binSize) { 378 code = NULL; 379 return false; 380 } 381 code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); 382 if (!code) 383 return false; 384 emit->setCodeLocation(code, binSize); 385 info->bin.instructions = 0; 386 387 for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { 388 Function *fn = reinterpret_cast<Function *>(fi.get()); 389 390 assert(emit->getCodeSize() == fn->binPos); 391 392 for (int b = 0; b < fn->bbCount; ++b) { 393 for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) { 394 emit->emitInstruction(i); 395 info->bin.instructions++; 396 if ((typeSizeof(i->sType) == 8 || typeSizeof(i->dType) == 8) && 397 (isFloatType(i->sType) || isFloatType(i->dType))) 398 info->io.fp64 = true; 399 } 400 } 401 } 402 info->io.fp64 |= fp64; 403 info->bin.relocData = emit->getRelocInfo(); 404 info->bin.fixupData = emit->getFixupInfo(); 405 406 emitSymbolTable(info); 407 408 // the nvc0 driver will print the binary iself together with the header 409 if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) 410 emit->printBinary(); 411 412 delete emit; 413 return true; 414} 415 416#define RELOC_ALLOC_INCREMENT 8 417 418bool 419CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, 420 int s) 421{ 422 unsigned int n = relocInfo ? relocInfo->count : 0; 423 424 if (!(n % RELOC_ALLOC_INCREMENT)) { 425 size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); 426 relocInfo = reinterpret_cast<RelocInfo *>( 427 REALLOC(relocInfo, n ? size : 0, 428 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); 429 if (!relocInfo) 430 return false; 431 if (n == 0) 432 memset(relocInfo, 0, sizeof(RelocInfo)); 433 } 434 ++relocInfo->count; 435 436 relocInfo->entry[n].data = data; 437 relocInfo->entry[n].mask = m; 438 relocInfo->entry[n].offset = codeSize + w * 4; 439 relocInfo->entry[n].bitPos = s; 440 relocInfo->entry[n].type = ty; 441 442 return true; 443} 444 445bool 446CodeEmitter::addInterp(int ipa, int reg, FixupApply apply) 447{ 448 unsigned int n = fixupInfo ? fixupInfo->count : 0; 449 450 if (!(n % RELOC_ALLOC_INCREMENT)) { 451 size_t size = sizeof(FixupInfo) + n * sizeof(FixupEntry); 452 fixupInfo = reinterpret_cast<FixupInfo *>( 453 REALLOC(fixupInfo, n ? size : 0, 454 size + RELOC_ALLOC_INCREMENT * sizeof(FixupEntry))); 455 if (!fixupInfo) 456 return false; 457 if (n == 0) 458 memset(fixupInfo, 0, sizeof(FixupInfo)); 459 } 460 ++fixupInfo->count; 461 462 fixupInfo->entry[n] = FixupEntry(apply, ipa, reg, codeSize >> 2); 463 464 return true; 465} 466 467void 468RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const 469{ 470 uint32_t value = 0; 471 472 switch (type) { 473 case TYPE_CODE: value = info->codePos; break; 474 case TYPE_BUILTIN: value = info->libPos; break; 475 case TYPE_DATA: value = info->dataPos; break; 476 default: 477 assert(0); 478 break; 479 } 480 value += data; 481 value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); 482 483 binary[offset / 4] &= ~mask; 484 binary[offset / 4] |= value & mask; 485} 486 487} // namespace nv50_ir 488 489 490#include "codegen/nv50_ir_driver.h" 491 492extern "C" { 493 494void 495nv50_ir_relocate_code(void *relocData, uint32_t *code, 496 uint32_t codePos, 497 uint32_t libPos, 498 uint32_t dataPos) 499{ 500 nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); 501 502 info->codePos = codePos; 503 info->libPos = libPos; 504 info->dataPos = dataPos; 505 506 for (unsigned int i = 0; i < info->count; ++i) 507 info->entry[i].apply(code, info); 508} 509 510void 511nv50_ir_apply_fixups(void *fixupData, uint32_t *code, 512 bool force_persample_interp, bool flatshade, 513 uint8_t alphatest) 514{ 515 nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>( 516 fixupData); 517 518 // force_persample_interp: all non-flat -> per-sample 519 // flatshade: all color -> flat 520 // alphatest: PIPE_FUNC_* to use with alphatest 521 nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest); 522 for (unsigned i = 0; i < info->count; ++i) 523 info->entry[i].apply(&info->entry[i], code, data); 524} 525 526void 527nv50_ir_get_target_library(uint32_t chipset, 528 const uint32_t **code, uint32_t *size) 529{ 530 nv50_ir::Target *targ = nv50_ir::Target::create(chipset); 531 targ->getBuiltinCode(code, size); 532 nv50_ir::Target::destroy(targ); 533} 534 535} 536