1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "codegen/nv50_ir.h" 24#include "codegen/nv50_ir_target.h" 25 26#include <algorithm> 27#include <stack> 28#include <limits> 29#if __cplusplus >= 201103L 30#include <unordered_map> 31#else 32#include <tr1/unordered_map> 33#endif 34 35namespace nv50_ir { 36 37#if __cplusplus >= 201103L 38using std::hash; 39using std::unordered_map; 40#else 41using std::tr1::hash; 42using std::tr1::unordered_map; 43#endif 44 45#define MAX_REGISTER_FILE_SIZE 256 46 47class RegisterSet 48{ 49public: 50 RegisterSet(const Target *); 51 52 void init(const Target *); 53 void reset(DataFile, bool resetMax = false); 54 55 void periodicMask(DataFile f, uint32_t lock, uint32_t unlock); 56 void intersect(DataFile f, const RegisterSet *); 57 58 bool assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg); 59 void release(DataFile f, int32_t reg, unsigned int size); 60 void occupy(DataFile f, int32_t reg, unsigned int size); 61 void occupy(const Value *); 62 void occupyMask(DataFile f, int32_t reg, uint8_t mask); 63 bool isOccupied(DataFile f, int32_t reg, unsigned int size) const; 64 bool testOccupy(const Value *); 65 bool testOccupy(DataFile f, int32_t reg, unsigned int size); 66 67 inline int getMaxAssigned(DataFile f) const { return fill[f]; } 68 69 inline unsigned int getFileSize(DataFile f) const 70 { 71 return last[f] + 1; 72 } 73 74 inline unsigned int units(DataFile f, unsigned int size) const 75 { 76 return size >> unit[f]; 77 } 78 // for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary) 79 inline unsigned int idToBytes(const Value *v) const 80 { 81 return v->reg.data.id * MIN2(v->reg.size, 4); 82 } 83 inline unsigned int idToUnits(const Value *v) const 84 { 85 return units(v->reg.file, idToBytes(v)); 86 } 87 inline int bytesToId(Value *v, unsigned int bytes) const 88 { 89 if (v->reg.size < 4) 90 return units(v->reg.file, bytes); 91 return bytes / 4; 92 } 93 inline int unitsToId(DataFile f, int u, uint8_t size) const 94 { 95 if (u < 0) 96 return -1; 97 return (size < 4) ? u : ((u << unit[f]) / 4); 98 } 99 100 void print(DataFile f) const; 101 102 const bool restrictedGPR16Range; 103 104private: 105 BitSet bits[LAST_REGISTER_FILE + 1]; 106 107 int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity 108 109 int last[LAST_REGISTER_FILE + 1]; 110 int fill[LAST_REGISTER_FILE + 1]; 111}; 112 113void 114RegisterSet::reset(DataFile f, bool resetMax) 115{ 116 bits[f].fill(0); 117 if (resetMax) 118 fill[f] = -1; 119} 120 121void 122RegisterSet::init(const Target *targ) 123{ 124 for (unsigned int rf = 0; rf <= LAST_REGISTER_FILE; ++rf) { 125 DataFile f = static_cast<DataFile>(rf); 126 last[rf] = targ->getFileSize(f) - 1; 127 unit[rf] = targ->getFileUnit(f); 128 fill[rf] = -1; 129 assert(last[rf] < MAX_REGISTER_FILE_SIZE); 130 bits[rf].allocate(last[rf] + 1, true); 131 } 132} 133 134RegisterSet::RegisterSet(const Target *targ) 135 : restrictedGPR16Range(targ->getChipset() < 0xc0) 136{ 137 init(targ); 138 for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i) 139 reset(static_cast<DataFile>(i)); 140} 141 142void 143RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock) 144{ 145 bits[f].periodicMask32(lock, unlock); 146} 147 148void 149RegisterSet::intersect(DataFile f, const RegisterSet *set) 150{ 151 bits[f] |= set->bits[f]; 152} 153 154void 155RegisterSet::print(DataFile f) const 156{ 157 INFO("GPR:"); 158 bits[f].print(); 159 INFO("\n"); 160} 161 162bool 163RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg) 164{ 165 reg = bits[f].findFreeRange(size, maxReg); 166 if (reg < 0) 167 return false; 168 fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); 169 return true; 170} 171 172bool 173RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const 174{ 175 return bits[f].testRange(reg, size); 176} 177 178void 179RegisterSet::occupy(const Value *v) 180{ 181 occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]); 182} 183 184void 185RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask) 186{ 187 bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32)); 188} 189 190void 191RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size) 192{ 193 bits[f].setRange(reg, size); 194 195 INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size); 196 197 fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); 198} 199 200bool 201RegisterSet::testOccupy(const Value *v) 202{ 203 return testOccupy(v->reg.file, 204 idToUnits(v), v->reg.size >> unit[v->reg.file]); 205} 206 207bool 208RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size) 209{ 210 if (isOccupied(f, reg, size)) 211 return false; 212 occupy(f, reg, size); 213 return true; 214} 215 216void 217RegisterSet::release(DataFile f, int32_t reg, unsigned int size) 218{ 219 bits[f].clrRange(reg, size); 220 221 INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size); 222} 223 224class RegAlloc 225{ 226public: 227 RegAlloc(Program *program) : prog(program), func(NULL), sequence(0) { } 228 229 bool exec(); 230 bool execFunc(); 231 232private: 233 class PhiMovesPass : public Pass { 234 private: 235 virtual bool visit(BasicBlock *); 236 inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p); 237 inline void splitEdges(BasicBlock *b); 238 }; 239 240 class ArgumentMovesPass : public Pass { 241 private: 242 virtual bool visit(BasicBlock *); 243 }; 244 245 class BuildIntervalsPass : public Pass { 246 private: 247 virtual bool visit(BasicBlock *); 248 void collectLiveValues(BasicBlock *); 249 void addLiveRange(Value *, const BasicBlock *, int end); 250 }; 251 252 class InsertConstraintsPass : public Pass { 253 public: 254 InsertConstraintsPass() : targ(NULL) { } 255 bool exec(Function *func); 256 private: 257 virtual bool visit(BasicBlock *); 258 259 void insertConstraintMove(Instruction *, int s); 260 bool insertConstraintMoves(); 261 262 void condenseDefs(Instruction *); 263 void condenseDefs(Instruction *, const int first, const int last); 264 void condenseSrcs(Instruction *, const int first, const int last); 265 266 void addHazard(Instruction *i, const ValueRef *src); 267 void textureMask(TexInstruction *); 268 void addConstraint(Instruction *, int s, int n); 269 bool detectConflict(Instruction *, int s); 270 271 // target specific functions, TODO: put in subclass or Target 272 void texConstraintNV50(TexInstruction *); 273 void texConstraintNVC0(TexInstruction *); 274 void texConstraintNVE0(TexInstruction *); 275 void texConstraintGM107(TexInstruction *); 276 277 bool isScalarTexGM107(TexInstruction *); 278 void handleScalarTexGM107(TexInstruction *); 279 280 std::list<Instruction *> constrList; 281 282 const Target *targ; 283 }; 284 285 bool buildLiveSets(BasicBlock *); 286 287private: 288 Program *prog; 289 Function *func; 290 291 // instructions in control flow / chronological order 292 ArrayList insns; 293 294 int sequence; // for manual passes through CFG 295}; 296 297typedef std::pair<Value *, Value *> ValuePair; 298 299class MergedDefs 300{ 301private: 302 std::list<ValueDef *>& entry(Value *val) { 303 auto it = defs.find(val); 304 305 if (it == defs.end()) { 306 std::list<ValueDef *> &res = defs[val]; 307 res = val->defs; 308 return res; 309 } else { 310 return (*it).second; 311 } 312 } 313 314 std::unordered_map<Value *, std::list<ValueDef *> > defs; 315 316public: 317 std::list<ValueDef *>& operator()(Value *val) { 318 return entry(val); 319 } 320 321 void add(Value *val, const std::list<ValueDef *> &vals) { 322 assert(val); 323 std::list<ValueDef *> &valdefs = entry(val); 324 valdefs.insert(valdefs.end(), vals.begin(), vals.end()); 325 } 326 327 void removeDefsOfInstruction(Instruction *insn) { 328 for (int d = 0; insn->defExists(d); ++d) { 329 ValueDef *def = &insn->def(d); 330 defs.erase(def->get()); 331 for (auto &p : defs) 332 p.second.remove(def); 333 } 334 } 335 336 void merge() { 337 for (auto &p : defs) 338 p.first->defs = p.second; 339 } 340}; 341 342class SpillCodeInserter 343{ 344public: 345 SpillCodeInserter(Function *fn, MergedDefs &mergedDefs) : func(fn), mergedDefs(mergedDefs), stackSize(0), stackBase(0) { } 346 347 bool run(const std::list<ValuePair>&); 348 349 Symbol *assignSlot(const Interval&, const unsigned int size); 350 Value *offsetSlot(Value *, const LValue *); 351 inline int32_t getStackSize() const { return stackSize; } 352 353private: 354 Function *func; 355 MergedDefs &mergedDefs; 356 357 struct SpillSlot 358 { 359 Interval occup; 360 std::list<Value *> residents; // needed to recalculate occup 361 Symbol *sym; 362 int32_t offset; 363 inline uint8_t size() const { return sym->reg.size; } 364 }; 365 std::list<SpillSlot> slots; 366 int32_t stackSize; 367 int32_t stackBase; 368 369 LValue *unspill(Instruction *usei, LValue *, Value *slot); 370 void spill(Instruction *defi, Value *slot, LValue *); 371}; 372 373void 374RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, 375 const BasicBlock *bb, 376 int end) 377{ 378 Instruction *insn = val->getUniqueInsn(); 379 380 if (!insn) 381 insn = bb->getFirst(); 382 383 assert(bb->getFirst()->serial <= bb->getExit()->serial); 384 assert(bb->getExit()->serial + 1 >= end); 385 386 int begin = insn->serial; 387 if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial) 388 begin = bb->getEntry()->serial; 389 390 INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n", 391 val->id, begin, insn->serial, end); 392 393 if (begin != end) // empty ranges are only added as hazards for fixed regs 394 val->livei.extend(begin, end); 395} 396 397bool 398RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p) 399{ 400 if (b->cfg.incidentCount() <= 1) 401 return false; 402 403 int n = 0; 404 for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next()) 405 if (ei.getType() == Graph::Edge::TREE || 406 ei.getType() == Graph::Edge::FORWARD) 407 ++n; 408 return (n == 2); 409} 410 411struct PhiMapHash { 412 size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const { 413 return hash<Instruction*>()(val.first) * 31 + 414 hash<BasicBlock*>()(val.second); 415 } 416}; 417 418typedef unordered_map< 419 std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap; 420 421// Critical edges need to be split up so that work can be inserted along 422// specific edge transitions. Unfortunately manipulating incident edges into a 423// BB invalidates all the PHI nodes since their sources are implicitly ordered 424// by incident edge order. 425// 426// TODO: Make it so that that is not the case, and PHI nodes store pointers to 427// the original BBs. 428void 429RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb) 430{ 431 BasicBlock *pb, *pn; 432 Instruction *phi; 433 Graph::EdgeIterator ei; 434 std::stack<BasicBlock *> stack; 435 int j = 0; 436 437 for (ei = bb->cfg.incident(); !ei.end(); ei.next()) { 438 pb = BasicBlock::get(ei.getNode()); 439 assert(pb); 440 if (needNewElseBlock(bb, pb)) 441 stack.push(pb); 442 } 443 444 // No critical edges were found, no need to perform any work. 445 if (stack.empty()) 446 return; 447 448 // We're about to, potentially, reorder the inbound edges. This means that 449 // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi 450 // nodes after the graph has been modified. 451 PhiMap phis; 452 453 j = 0; 454 for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) { 455 pb = BasicBlock::get(ei.getNode()); 456 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) 457 phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j))); 458 } 459 460 while (!stack.empty()) { 461 pb = stack.top(); 462 pn = new BasicBlock(func); 463 stack.pop(); 464 465 pb->cfg.detach(&bb->cfg); 466 pb->cfg.attach(&pn->cfg, Graph::Edge::TREE); 467 pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); 468 469 assert(pb->getExit()->op != OP_CALL); 470 if (pb->getExit()->asFlow()->target.bb == bb) 471 pb->getExit()->asFlow()->target.bb = pn; 472 473 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { 474 PhiMap::iterator it = phis.find(std::make_pair(phi, pb)); 475 assert(it != phis.end()); 476 phis.insert(std::make_pair(std::make_pair(phi, pn), it->second)); 477 phis.erase(it); 478 } 479 } 480 481 // Now go through and fix up all of the phi node sources. 482 j = 0; 483 for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) { 484 pb = BasicBlock::get(ei.getNode()); 485 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { 486 PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb)); 487 assert(it != phis.end()); 488 489 phi->setSrc(j, it->second); 490 } 491 } 492} 493 494// For each operand of each PHI in b, generate a new value by inserting a MOV 495// at the end of the block it is coming from and replace the operand with its 496// result. This eliminates liveness conflicts and enables us to let values be 497// copied to the right register if such a conflict exists nonetheless. 498// 499// These MOVs are also crucial in making sure the live intervals of phi srces 500// are extended until the end of the loop, since they are not included in the 501// live-in sets. 502bool 503RegAlloc::PhiMovesPass::visit(BasicBlock *bb) 504{ 505 Instruction *phi, *mov; 506 507 splitEdges(bb); 508 509 // insert MOVs (phi->src(j) should stem from j-th in-BB) 510 int j = 0; 511 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 512 BasicBlock *pb = BasicBlock::get(ei.getNode()); 513 if (!pb->isTerminated()) 514 pb->insertTail(new_FlowInstruction(func, OP_BRA, bb)); 515 516 for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { 517 LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue()); 518 mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); 519 520 mov->setSrc(0, phi->getSrc(j)); 521 mov->setDef(0, tmp); 522 phi->setSrc(j, tmp); 523 524 pb->insertBefore(pb->getExit(), mov); 525 } 526 ++j; 527 } 528 529 return true; 530} 531 532bool 533RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb) 534{ 535 // Bind function call inputs/outputs to the same physical register 536 // the callee uses, inserting moves as appropriate for the case a 537 // conflict arises. 538 for (Instruction *i = bb->getEntry(); i; i = i->next) { 539 FlowInstruction *cal = i->asFlow(); 540 // TODO: Handle indirect calls. 541 // Right now they should only be generated for builtins. 542 if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect) 543 continue; 544 RegisterSet clobberSet(prog->getTarget()); 545 546 // Bind input values. 547 for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) { 548 const int t = cal->indirect ? (s - 1) : s; 549 LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue()); 550 tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id; 551 552 Instruction *mov = 553 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); 554 mov->setDef(0, tmp); 555 mov->setSrc(0, cal->getSrc(s)); 556 cal->setSrc(s, tmp); 557 558 bb->insertBefore(cal, mov); 559 } 560 561 // Bind output values. 562 for (int d = 0; cal->defExists(d); ++d) { 563 LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue()); 564 tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id; 565 566 Instruction *mov = 567 new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); 568 mov->setSrc(0, tmp); 569 mov->setDef(0, cal->getDef(d)); 570 cal->setDef(d, tmp); 571 572 bb->insertAfter(cal, mov); 573 clobberSet.occupy(tmp); 574 } 575 576 // Bind clobbered values. 577 for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin(); 578 it != cal->target.fn->clobbers.end(); 579 ++it) { 580 if (clobberSet.testOccupy(*it)) { 581 Value *tmp = new_LValue(func, (*it)->asLValue()); 582 tmp->reg.data.id = (*it)->reg.data.id; 583 cal->setDef(cal->defCount(), tmp); 584 } 585 } 586 } 587 588 // Update the clobber set of the function. 589 if (BasicBlock::get(func->cfgExit) == bb) { 590 func->buildDefSets(); 591 for (unsigned int i = 0; i < bb->defSet.getSize(); ++i) 592 if (bb->defSet.test(i)) 593 func->clobbers.push_back(func->getLValue(i)); 594 } 595 596 return true; 597} 598 599// Build the set of live-in variables of bb. 600bool 601RegAlloc::buildLiveSets(BasicBlock *bb) 602{ 603 Function *f = bb->getFunction(); 604 BasicBlock *bn; 605 Instruction *i; 606 unsigned int s, d; 607 608 INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId()); 609 610 bb->liveSet.allocate(func->allLValues.getSize(), false); 611 612 int n = 0; 613 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 614 bn = BasicBlock::get(ei.getNode()); 615 if (bn == bb) 616 continue; 617 if (bn->cfg.visit(sequence)) 618 if (!buildLiveSets(bn)) 619 return false; 620 if (n++ || bb->liveSet.marker) 621 bb->liveSet |= bn->liveSet; 622 else 623 bb->liveSet = bn->liveSet; 624 } 625 if (!n && !bb->liveSet.marker) 626 bb->liveSet.fill(0); 627 bb->liveSet.marker = true; 628 629 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { 630 INFO("BB:%i live set of out blocks:\n", bb->getId()); 631 bb->liveSet.print(); 632 } 633 634 // if (!bb->getEntry()) 635 // return true; 636 637 if (bb == BasicBlock::get(f->cfgExit)) { 638 for (std::deque<ValueRef>::iterator it = f->outs.begin(); 639 it != f->outs.end(); ++it) { 640 assert(it->get()->asLValue()); 641 bb->liveSet.set(it->get()->id); 642 } 643 } 644 645 for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) { 646 for (d = 0; i->defExists(d); ++d) 647 bb->liveSet.clr(i->getDef(d)->id); 648 for (s = 0; i->srcExists(s); ++s) 649 if (i->getSrc(s)->asLValue()) 650 bb->liveSet.set(i->getSrc(s)->id); 651 } 652 for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next) 653 bb->liveSet.clr(i->getDef(0)->id); 654 655 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { 656 INFO("BB:%i live set after propagation:\n", bb->getId()); 657 bb->liveSet.print(); 658 } 659 660 return true; 661} 662 663void 664RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb) 665{ 666 BasicBlock *bbA = NULL, *bbB = NULL; 667 668 if (bb->cfg.outgoingCount()) { 669 // trickery to save a loop of OR'ing liveSets 670 // aliasing works fine with BitSet::setOr 671 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 672 if (bbA) { 673 bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet); 674 bbA = bb; 675 } else { 676 bbA = bbB; 677 } 678 bbB = BasicBlock::get(ei.getNode()); 679 } 680 bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL); 681 } else 682 if (bb->cfg.incidentCount()) { 683 bb->liveSet.fill(0); 684 } 685} 686 687bool 688RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) 689{ 690 collectLiveValues(bb); 691 692 INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId()); 693 694 // go through out blocks and delete phi sources that do not originate from 695 // the current block from the live set 696 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { 697 BasicBlock *out = BasicBlock::get(ei.getNode()); 698 699 for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) { 700 bb->liveSet.clr(i->getDef(0)->id); 701 702 for (int s = 0; i->srcExists(s); ++s) { 703 assert(i->src(s).getInsn()); 704 if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? 705 bb->liveSet.set(i->getSrc(s)->id); 706 else 707 bb->liveSet.clr(i->getSrc(s)->id); 708 } 709 } 710 } 711 712 // remaining live-outs are live until end 713 if (bb->getExit()) { 714 for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j) 715 if (bb->liveSet.test(j)) 716 addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1); 717 } 718 719 for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) { 720 for (int d = 0; i->defExists(d); ++d) { 721 bb->liveSet.clr(i->getDef(d)->id); 722 if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs 723 i->getDef(d)->livei.extend(i->serial, i->serial); 724 } 725 726 for (int s = 0; i->srcExists(s); ++s) { 727 if (!i->getSrc(s)->asLValue()) 728 continue; 729 if (!bb->liveSet.test(i->getSrc(s)->id)) { 730 bb->liveSet.set(i->getSrc(s)->id); 731 addLiveRange(i->getSrc(s), bb, i->serial); 732 } 733 } 734 } 735 736 if (bb == BasicBlock::get(func->cfg.getRoot())) { 737 for (std::deque<ValueDef>::iterator it = func->ins.begin(); 738 it != func->ins.end(); ++it) { 739 if (it->get()->reg.data.id >= 0) // add hazard for fixed regs 740 it->get()->livei.extend(0, 1); 741 } 742 } 743 744 return true; 745} 746 747 748#define JOIN_MASK_PHI (1 << 0) 749#define JOIN_MASK_UNION (1 << 1) 750#define JOIN_MASK_MOV (1 << 2) 751#define JOIN_MASK_TEX (1 << 3) 752 753class GCRA 754{ 755public: 756 GCRA(Function *, SpillCodeInserter&, MergedDefs&); 757 ~GCRA(); 758 759 bool allocateRegisters(ArrayList& insns); 760 761 void printNodeInfo() const; 762 763private: 764 class RIG_Node : public Graph::Node 765 { 766 public: 767 RIG_Node(); 768 769 void init(const RegisterSet&, LValue *); 770 771 void addInterference(RIG_Node *); 772 void addRegPreference(RIG_Node *); 773 774 inline LValue *getValue() const 775 { 776 return reinterpret_cast<LValue *>(data); 777 } 778 inline void setValue(LValue *lval) { data = lval; } 779 780 inline uint8_t getCompMask() const 781 { 782 return ((1 << colors) - 1) << (reg & 7); 783 } 784 785 static inline RIG_Node *get(const Graph::EdgeIterator& ei) 786 { 787 return static_cast<RIG_Node *>(ei.getNode()); 788 } 789 790 public: 791 uint32_t degree; 792 uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable 793 uint16_t maxReg; 794 uint16_t colors; 795 796 DataFile f; 797 int32_t reg; 798 799 float weight; 800 801 // list pointers for simplify() phase 802 RIG_Node *next; 803 RIG_Node *prev; 804 805 // union of the live intervals of all coalesced values (we want to retain 806 // the separate intervals for testing interference of compound values) 807 Interval livei; 808 809 std::list<RIG_Node *> prefRegs; 810 }; 811 812private: 813 inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; } 814 815 void buildRIG(ArrayList&); 816 bool coalesce(ArrayList&); 817 bool doCoalesce(ArrayList&, unsigned int mask); 818 void calculateSpillWeights(); 819 bool simplify(); 820 bool selectRegisters(); 821 void cleanup(const bool success); 822 823 void simplifyEdge(RIG_Node *, RIG_Node *); 824 void simplifyNode(RIG_Node *); 825 826 bool coalesceValues(Value *, Value *, bool force); 827 void resolveSplitsAndMerges(); 828 void makeCompound(Instruction *, bool isSplit); 829 830 inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&); 831 832 inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *); 833 void checkList(std::list<RIG_Node *>&); 834 835private: 836 std::stack<uint32_t> stack; 837 838 // list headers for simplify() phase 839 RIG_Node lo[2]; 840 RIG_Node hi; 841 842 Graph RIG; 843 RIG_Node *nodes; 844 unsigned int nodeCount; 845 846 Function *func; 847 Program *prog; 848 849 struct RelDegree { 850 uint8_t data[17][17]; 851 852 RelDegree() { 853 for (int i = 1; i <= 16; ++i) 854 for (int j = 1; j <= 16; ++j) 855 data[i][j] = j * ((i + j - 1) / j); 856 } 857 858 const uint8_t* operator[](std::size_t i) const { 859 return data[i]; 860 } 861 }; 862 863 static const RelDegree relDegree; 864 865 RegisterSet regs; 866 867 // need to fixup register id for participants of OP_MERGE/SPLIT 868 std::list<Instruction *> merges; 869 std::list<Instruction *> splits; 870 871 SpillCodeInserter& spill; 872 std::list<ValuePair> mustSpill; 873 874 MergedDefs &mergedDefs; 875}; 876 877const GCRA::RelDegree GCRA::relDegree; 878 879GCRA::RIG_Node::RIG_Node() : Node(NULL), degree(0), degreeLimit(0), maxReg(0), 880 colors(0), f(FILE_NULL), reg(0), weight(0), next(this), prev(this) 881{ 882} 883 884void 885GCRA::printNodeInfo() const 886{ 887 for (unsigned int i = 0; i < nodeCount; ++i) { 888 if (!nodes[i].colors) 889 continue; 890 INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X", 891 i, 892 nodes[i].f,nodes[i].reg,nodes[i].colors, 893 nodes[i].weight, 894 nodes[i].degree, nodes[i].degreeLimit); 895 896 for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next()) 897 INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); 898 for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next()) 899 INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); 900 INFO("\n"); 901 } 902} 903 904static bool 905isShortRegOp(Instruction *insn) 906{ 907 // Immediates are always in src1 (except zeroes, which end up getting 908 // replaced with a zero reg). Every other situation can be resolved by 909 // using a long encoding. 910 return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE && 911 insn->getSrc(1)->reg.data.u64; 912} 913 914// Check if this LValue is ever used in an instruction that can't be encoded 915// with long registers (i.e. > r63) 916static bool 917isShortRegVal(LValue *lval) 918{ 919 if (lval->getInsn() == NULL) 920 return false; 921 for (Value::DefCIterator def = lval->defs.begin(); 922 def != lval->defs.end(); ++def) 923 if (isShortRegOp((*def)->getInsn())) 924 return true; 925 for (Value::UseCIterator use = lval->uses.begin(); 926 use != lval->uses.end(); ++use) 927 if (isShortRegOp((*use)->getInsn())) 928 return true; 929 return false; 930} 931 932void 933GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) 934{ 935 setValue(lval); 936 if (lval->reg.data.id >= 0) 937 lval->noSpill = lval->fixedReg = 1; 938 939 colors = regs.units(lval->reg.file, lval->reg.size); 940 f = lval->reg.file; 941 reg = -1; 942 if (lval->reg.data.id >= 0) 943 reg = regs.idToUnits(lval); 944 945 weight = std::numeric_limits<float>::infinity(); 946 degree = 0; 947 maxReg = regs.getFileSize(f); 948 // On nv50, we lose a bit of gpr encoding when there's an embedded 949 // immediate. 950 if (regs.restrictedGPR16Range && f == FILE_GPR && (lval->reg.size == 2 || isShortRegVal(lval))) 951 maxReg /= 2; 952 degreeLimit = maxReg; 953 degreeLimit -= relDegree[1][colors] - 1; 954 955 livei.insert(lval->livei); 956} 957 958bool 959GCRA::coalesceValues(Value *dst, Value *src, bool force) 960{ 961 LValue *rep = dst->join->asLValue(); 962 LValue *val = src->join->asLValue(); 963 964 if (!force && val->reg.data.id >= 0) { 965 rep = src->join->asLValue(); 966 val = dst->join->asLValue(); 967 } 968 RIG_Node *nRep = &nodes[rep->id]; 969 RIG_Node *nVal = &nodes[val->id]; 970 971 if (src->reg.file != dst->reg.file) { 972 if (!force) 973 return false; 974 WARN("forced coalescing of values in different files !\n"); 975 } 976 if (!force && dst->reg.size != src->reg.size) 977 return false; 978 979 if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) { 980 if (force) { 981 if (val->reg.data.id >= 0) 982 WARN("forced coalescing of values in different fixed regs !\n"); 983 } else { 984 if (val->reg.data.id >= 0) 985 return false; 986 // make sure that there is no overlap with the fixed register of rep 987 for (ArrayList::Iterator it = func->allLValues.iterator(); 988 !it.end(); it.next()) { 989 Value *reg = reinterpret_cast<Value *>(it.get())->asLValue(); 990 assert(reg); 991 if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei)) 992 return false; 993 } 994 } 995 } 996 997 if (!force && nRep->livei.overlaps(nVal->livei)) 998 return false; 999 1000 INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", 1001 rep->id, rep->reg.data.id, val->id); 1002 1003 // set join pointer of all values joined with val 1004 const std::list<ValueDef *> &defs = mergedDefs(val); 1005 for (ValueDef *def : defs) 1006 def->get()->join = rep; 1007 assert(rep->join == rep && val->join == rep); 1008 1009 // add val's definitions to rep and extend the live interval of its RIG node 1010 mergedDefs.add(rep, defs); 1011 nRep->livei.unify(nVal->livei); 1012 nRep->degreeLimit = MIN2(nRep->degreeLimit, nVal->degreeLimit); 1013 nRep->maxReg = MIN2(nRep->maxReg, nVal->maxReg); 1014 return true; 1015} 1016 1017bool 1018GCRA::coalesce(ArrayList& insns) 1019{ 1020 bool ret = doCoalesce(insns, JOIN_MASK_PHI); 1021 if (!ret) 1022 return false; 1023 switch (func->getProgram()->getTarget()->getChipset() & ~0xf) { 1024 case 0x50: 1025 case 0x80: 1026 case 0x90: 1027 case 0xa0: 1028 ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); 1029 break; 1030 case 0xc0: 1031 case 0xd0: 1032 case 0xe0: 1033 case 0xf0: 1034 case 0x100: 1035 case 0x110: 1036 case 0x120: 1037 case 0x130: 1038 case 0x140: 1039 case 0x160: 1040 ret = doCoalesce(insns, JOIN_MASK_UNION); 1041 break; 1042 default: 1043 break; 1044 } 1045 if (!ret) 1046 return false; 1047 return doCoalesce(insns, JOIN_MASK_MOV); 1048} 1049 1050static inline uint8_t makeCompMask(int compSize, int base, int size) 1051{ 1052 uint8_t m = ((1 << size) - 1) << base; 1053 1054 switch (compSize) { 1055 case 1: 1056 return 0xff; 1057 case 2: 1058 m |= (m << 2); 1059 return (m << 4) | m; 1060 case 3: 1061 case 4: 1062 return (m << 4) | m; 1063 default: 1064 assert(compSize <= 8); 1065 return m; 1066 } 1067} 1068 1069// Used when coalescing moves. The non-compound value will become one, e.g.: 1070// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 } 1071// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d 1072static inline void copyCompound(Value *dst, Value *src) 1073{ 1074 LValue *ldst = dst->asLValue(); 1075 LValue *lsrc = src->asLValue(); 1076 1077 if (ldst->compound && !lsrc->compound) { 1078 LValue *swap = lsrc; 1079 lsrc = ldst; 1080 ldst = swap; 1081 } 1082 1083 ldst->compound = lsrc->compound; 1084 ldst->compMask = lsrc->compMask; 1085} 1086 1087void 1088GCRA::makeCompound(Instruction *insn, bool split) 1089{ 1090 LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue(); 1091 1092 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { 1093 INFO("makeCompound(split = %i): ", split); 1094 insn->print(); 1095 } 1096 1097 const unsigned int size = getNode(rep)->colors; 1098 unsigned int base = 0; 1099 1100 if (!rep->compound) 1101 rep->compMask = 0xff; 1102 rep->compound = 1; 1103 1104 for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) { 1105 LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue(); 1106 1107 val->compound = 1; 1108 if (!val->compMask) 1109 val->compMask = 0xff; 1110 val->compMask &= makeCompMask(size, base, getNode(val)->colors); 1111 assert(val->compMask); 1112 1113 INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n", 1114 rep->id, rep->compMask, val->id, val->compMask); 1115 1116 base += getNode(val)->colors; 1117 } 1118 assert(base == size); 1119} 1120 1121bool 1122GCRA::doCoalesce(ArrayList& insns, unsigned int mask) 1123{ 1124 int c, n; 1125 1126 for (n = 0; n < insns.getSize(); ++n) { 1127 Instruction *i; 1128 Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n)); 1129 1130 switch (insn->op) { 1131 case OP_PHI: 1132 if (!(mask & JOIN_MASK_PHI)) 1133 break; 1134 for (c = 0; insn->srcExists(c); ++c) 1135 if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) { 1136 // this is bad 1137 ERROR("failed to coalesce phi operands\n"); 1138 return false; 1139 } 1140 break; 1141 case OP_UNION: 1142 case OP_MERGE: 1143 if (!(mask & JOIN_MASK_UNION)) 1144 break; 1145 for (c = 0; insn->srcExists(c); ++c) 1146 coalesceValues(insn->getDef(0), insn->getSrc(c), true); 1147 if (insn->op == OP_MERGE) { 1148 merges.push_back(insn); 1149 if (insn->srcExists(1)) 1150 makeCompound(insn, false); 1151 } 1152 break; 1153 case OP_SPLIT: 1154 if (!(mask & JOIN_MASK_UNION)) 1155 break; 1156 splits.push_back(insn); 1157 for (c = 0; insn->defExists(c); ++c) 1158 coalesceValues(insn->getSrc(0), insn->getDef(c), true); 1159 makeCompound(insn, true); 1160 break; 1161 case OP_MOV: 1162 if (!(mask & JOIN_MASK_MOV)) 1163 break; 1164 i = NULL; 1165 if (!insn->getDef(0)->uses.empty()) 1166 i = (*insn->getDef(0)->uses.begin())->getInsn(); 1167 // if this is a contraint-move there will only be a single use 1168 if (i && i->op == OP_MERGE) // do we really still need this ? 1169 break; 1170 i = insn->getSrc(0)->getUniqueInsn(); 1171 if (i && !i->constrainedDefs()) { 1172 if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) 1173 copyCompound(insn->getSrc(0), insn->getDef(0)); 1174 } 1175 break; 1176 case OP_TEX: 1177 case OP_TXB: 1178 case OP_TXL: 1179 case OP_TXF: 1180 case OP_TXQ: 1181 case OP_TXD: 1182 case OP_TXG: 1183 case OP_TXLQ: 1184 case OP_TEXCSAA: 1185 case OP_TEXPREP: 1186 if (!(mask & JOIN_MASK_TEX)) 1187 break; 1188 for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c) 1189 coalesceValues(insn->getDef(c), insn->getSrc(c), true); 1190 break; 1191 default: 1192 break; 1193 } 1194 } 1195 return true; 1196} 1197 1198void 1199GCRA::RIG_Node::addInterference(RIG_Node *node) 1200{ 1201 this->degree += relDegree[node->colors][colors]; 1202 node->degree += relDegree[colors][node->colors]; 1203 1204 this->attach(node, Graph::Edge::CROSS); 1205} 1206 1207void 1208GCRA::RIG_Node::addRegPreference(RIG_Node *node) 1209{ 1210 prefRegs.push_back(node); 1211} 1212 1213GCRA::GCRA(Function *fn, SpillCodeInserter& spill, MergedDefs& mergedDefs) : 1214 nodes(NULL), 1215 nodeCount(0), 1216 func(fn), 1217 regs(fn->getProgram()->getTarget()), 1218 spill(spill), 1219 mergedDefs(mergedDefs) 1220{ 1221 prog = func->getProgram(); 1222} 1223 1224GCRA::~GCRA() 1225{ 1226 if (nodes) 1227 delete[] nodes; 1228} 1229 1230void 1231GCRA::checkList(std::list<RIG_Node *>& lst) 1232{ 1233 GCRA::RIG_Node *prev = NULL; 1234 1235 for (std::list<RIG_Node *>::iterator it = lst.begin(); 1236 it != lst.end(); 1237 ++it) { 1238 assert((*it)->getValue()->join == (*it)->getValue()); 1239 if (prev) 1240 assert(prev->livei.begin() <= (*it)->livei.begin()); 1241 prev = *it; 1242 } 1243} 1244 1245void 1246GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node) 1247{ 1248 if (node->livei.isEmpty()) 1249 return; 1250 // only the intervals of joined values don't necessarily arrive in order 1251 std::list<RIG_Node *>::iterator prev, it; 1252 for (it = list.end(); it != list.begin(); it = prev) { 1253 prev = it; 1254 --prev; 1255 if ((*prev)->livei.begin() <= node->livei.begin()) 1256 break; 1257 } 1258 list.insert(it, node); 1259} 1260 1261void 1262GCRA::buildRIG(ArrayList& insns) 1263{ 1264 std::list<RIG_Node *> values, active; 1265 1266 for (std::deque<ValueDef>::iterator it = func->ins.begin(); 1267 it != func->ins.end(); ++it) 1268 insertOrderedTail(values, getNode(it->get()->asLValue())); 1269 1270 for (int i = 0; i < insns.getSize(); ++i) { 1271 Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i)); 1272 for (int d = 0; insn->defExists(d); ++d) 1273 if (insn->getDef(d)->reg.file <= LAST_REGISTER_FILE && 1274 insn->getDef(d)->rep() == insn->getDef(d)) 1275 insertOrderedTail(values, getNode(insn->getDef(d)->asLValue())); 1276 } 1277 checkList(values); 1278 1279 while (!values.empty()) { 1280 RIG_Node *cur = values.front(); 1281 1282 for (std::list<RIG_Node *>::iterator it = active.begin(); 1283 it != active.end();) { 1284 RIG_Node *node = *it; 1285 1286 if (node->livei.end() <= cur->livei.begin()) { 1287 it = active.erase(it); 1288 } else { 1289 if (node->f == cur->f && node->livei.overlaps(cur->livei)) 1290 cur->addInterference(node); 1291 ++it; 1292 } 1293 } 1294 values.pop_front(); 1295 active.push_back(cur); 1296 } 1297} 1298 1299void 1300GCRA::calculateSpillWeights() 1301{ 1302 for (unsigned int i = 0; i < nodeCount; ++i) { 1303 RIG_Node *const n = &nodes[i]; 1304 if (!nodes[i].colors || nodes[i].livei.isEmpty()) 1305 continue; 1306 if (nodes[i].reg >= 0) { 1307 // update max reg 1308 regs.occupy(n->f, n->reg, n->colors); 1309 continue; 1310 } 1311 LValue *val = nodes[i].getValue(); 1312 1313 if (!val->noSpill) { 1314 int rc = 0; 1315 for (ValueDef *def : mergedDefs(val)) 1316 rc += def->get()->refCount(); 1317 1318 nodes[i].weight = 1319 (float)rc * (float)rc / (float)nodes[i].livei.extent(); 1320 } 1321 1322 if (nodes[i].degree < nodes[i].degreeLimit) { 1323 int l = 0; 1324 if (val->reg.size > 4) 1325 l = 1; 1326 DLLIST_ADDHEAD(&lo[l], &nodes[i]); 1327 } else { 1328 DLLIST_ADDHEAD(&hi, &nodes[i]); 1329 } 1330 } 1331 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1332 printNodeInfo(); 1333} 1334 1335void 1336GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b) 1337{ 1338 bool move = b->degree >= b->degreeLimit; 1339 1340 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1341 "edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n", 1342 a->getValue()->id, a->degree, a->degreeLimit, 1343 b->getValue()->id, b->degree, b->degreeLimit); 1344 1345 b->degree -= relDegree[a->colors][b->colors]; 1346 1347 move = move && b->degree < b->degreeLimit; 1348 if (move && !DLLIST_EMPTY(b)) { 1349 int l = (b->getValue()->reg.size > 4) ? 1 : 0; 1350 DLLIST_DEL(b); 1351 DLLIST_ADDTAIL(&lo[l], b); 1352 } 1353} 1354 1355void 1356GCRA::simplifyNode(RIG_Node *node) 1357{ 1358 for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) 1359 simplifyEdge(node, RIG_Node::get(ei)); 1360 1361 for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) 1362 simplifyEdge(node, RIG_Node::get(ei)); 1363 1364 DLLIST_DEL(node); 1365 stack.push(node->getValue()->id); 1366 1367 INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n", 1368 node->getValue()->id, 1369 (node->degree < node->degreeLimit) ? "" : "(spill)"); 1370} 1371 1372bool 1373GCRA::simplify() 1374{ 1375 for (;;) { 1376 if (!DLLIST_EMPTY(&lo[0])) { 1377 do { 1378 simplifyNode(lo[0].next); 1379 } while (!DLLIST_EMPTY(&lo[0])); 1380 } else 1381 if (!DLLIST_EMPTY(&lo[1])) { 1382 simplifyNode(lo[1].next); 1383 } else 1384 if (!DLLIST_EMPTY(&hi)) { 1385 RIG_Node *best = hi.next; 1386 unsigned bestMaxReg = best->maxReg; 1387 float bestScore = best->weight / (float)best->degree; 1388 // Spill candidate. First go through the ones with the highest max 1389 // register, then the ones with lower. That way the ones with the 1390 // lowest requirement will be allocated first, since it's a stack. 1391 for (RIG_Node *it = best->next; it != &hi; it = it->next) { 1392 float score = it->weight / (float)it->degree; 1393 if (score < bestScore || it->maxReg > bestMaxReg) { 1394 best = it; 1395 bestScore = score; 1396 bestMaxReg = it->maxReg; 1397 } 1398 } 1399 if (isinf(bestScore)) { 1400 ERROR("no viable spill candidates left\n"); 1401 return false; 1402 } 1403 simplifyNode(best); 1404 } else { 1405 return true; 1406 } 1407 } 1408} 1409 1410void 1411GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei) 1412{ 1413 const RIG_Node *intf = RIG_Node::get(ei); 1414 1415 if (intf->reg < 0) 1416 return; 1417 LValue *vA = node->getValue(); 1418 LValue *vB = intf->getValue(); 1419 1420 const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7); 1421 1422 if (vA->compound | vB->compound) { 1423 // NOTE: this only works for >aligned< register tuples ! 1424 for (const ValueDef *D : mergedDefs(vA)) { 1425 for (const ValueDef *d : mergedDefs(vB)) { 1426 const LValue *vD = D->get()->asLValue(); 1427 const LValue *vd = d->get()->asLValue(); 1428 1429 if (!vD->livei.overlaps(vd->livei)) { 1430 INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n", 1431 vD->id, vd->id); 1432 continue; 1433 } 1434 1435 uint8_t mask = vD->compound ? vD->compMask : ~0; 1436 if (vd->compound) { 1437 assert(vB->compound); 1438 mask &= vd->compMask & vB->compMask; 1439 } else { 1440 mask &= intfMask; 1441 } 1442 1443 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1444 "(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n", 1445 vD->id, 1446 vD->compound ? vD->compMask : 0xff, 1447 vd->id, 1448 vd->compound ? vd->compMask : intfMask, 1449 vB->compMask, intf->reg & ~7, mask); 1450 if (mask) 1451 regs.occupyMask(node->f, intf->reg & ~7, mask); 1452 } 1453 } 1454 } else { 1455 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1456 "(%%%i) X (%%%i): $r%i + %u\n", 1457 vA->id, vB->id, intf->reg, intf->colors); 1458 regs.occupy(node->f, intf->reg, intf->colors); 1459 } 1460} 1461 1462bool 1463GCRA::selectRegisters() 1464{ 1465 INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n"); 1466 1467 while (!stack.empty()) { 1468 RIG_Node *node = &nodes[stack.top()]; 1469 stack.pop(); 1470 1471 regs.reset(node->f); 1472 1473 INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n", 1474 node->getValue()->id, node->colors); 1475 1476 for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) 1477 checkInterference(node, ei); 1478 for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) 1479 checkInterference(node, ei); 1480 1481 if (!node->prefRegs.empty()) { 1482 for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin(); 1483 it != node->prefRegs.end(); 1484 ++it) { 1485 if ((*it)->reg >= 0 && 1486 regs.testOccupy(node->f, (*it)->reg, node->colors)) { 1487 node->reg = (*it)->reg; 1488 break; 1489 } 1490 } 1491 } 1492 if (node->reg >= 0) 1493 continue; 1494 LValue *lval = node->getValue(); 1495 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1496 regs.print(node->f); 1497 bool ret = regs.assign(node->reg, node->f, node->colors, node->maxReg); 1498 if (ret) { 1499 INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); 1500 lval->compMask = node->getCompMask(); 1501 } else { 1502 INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n", 1503 lval->id, lval->reg.size); 1504 Symbol *slot = NULL; 1505 if (lval->reg.file == FILE_GPR) 1506 slot = spill.assignSlot(node->livei, lval->reg.size); 1507 mustSpill.push_back(ValuePair(lval, slot)); 1508 } 1509 } 1510 if (!mustSpill.empty()) 1511 return false; 1512 for (unsigned int i = 0; i < nodeCount; ++i) { 1513 LValue *lval = nodes[i].getValue(); 1514 if (nodes[i].reg >= 0 && nodes[i].colors > 0) 1515 lval->reg.data.id = 1516 regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size); 1517 } 1518 return true; 1519} 1520 1521bool 1522GCRA::allocateRegisters(ArrayList& insns) 1523{ 1524 bool ret; 1525 1526 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1527 "allocateRegisters to %u instructions\n", insns.getSize()); 1528 1529 nodeCount = func->allLValues.getSize(); 1530 nodes = new RIG_Node[nodeCount]; 1531 if (!nodes) 1532 return false; 1533 for (unsigned int i = 0; i < nodeCount; ++i) { 1534 LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i)); 1535 if (lval) { 1536 nodes[i].init(regs, lval); 1537 RIG.insert(&nodes[i]); 1538 1539 if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL) { 1540 Instruction *insn = lval->getInsn(); 1541 if (insn->op != OP_MAD && insn->op != OP_FMA && insn->op != OP_SAD) 1542 continue; 1543 // For both of the cases below, we only want to add the preference 1544 // if all arguments are in registers. 1545 if (insn->src(0).getFile() != FILE_GPR || 1546 insn->src(1).getFile() != FILE_GPR || 1547 insn->src(2).getFile() != FILE_GPR) 1548 continue; 1549 if (prog->getTarget()->getChipset() < 0xc0) { 1550 // Outputting a flag is not supported with short encodings nor 1551 // with immediate arguments. 1552 // See handleMADforNV50. 1553 if (insn->flagsDef >= 0) 1554 continue; 1555 } else { 1556 // We can only fold immediate arguments if dst == src2. This 1557 // only matters if one of the first two arguments is an 1558 // immediate. This form is also only supported for floats. 1559 // See handleMADforNVC0. 1560 ImmediateValue imm; 1561 if (insn->dType != TYPE_F32) 1562 continue; 1563 if (!insn->src(0).getImmediate(imm) && 1564 !insn->src(1).getImmediate(imm)) 1565 continue; 1566 } 1567 1568 nodes[i].addRegPreference(getNode(insn->getSrc(2)->asLValue())); 1569 } 1570 } 1571 } 1572 1573 // coalesce first, we use only 1 RIG node for a group of joined values 1574 ret = coalesce(insns); 1575 if (!ret) 1576 goto out; 1577 1578 if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1579 func->printLiveIntervals(); 1580 1581 buildRIG(insns); 1582 calculateSpillWeights(); 1583 ret = simplify(); 1584 if (!ret) 1585 goto out; 1586 1587 ret = selectRegisters(); 1588 if (!ret) { 1589 INFO_DBG(prog->dbgFlags, REG_ALLOC, 1590 "selectRegisters failed, inserting spill code ...\n"); 1591 regs.reset(FILE_GPR, true); 1592 spill.run(mustSpill); 1593 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1594 func->print(); 1595 } else { 1596 mergedDefs.merge(); 1597 prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR)); 1598 } 1599 1600out: 1601 cleanup(ret); 1602 return ret; 1603} 1604 1605void 1606GCRA::cleanup(const bool success) 1607{ 1608 mustSpill.clear(); 1609 1610 for (ArrayList::Iterator it = func->allLValues.iterator(); 1611 !it.end(); it.next()) { 1612 LValue *lval = reinterpret_cast<LValue *>(it.get()); 1613 1614 lval->livei.clear(); 1615 1616 lval->compound = 0; 1617 lval->compMask = 0; 1618 1619 if (lval->join == lval) 1620 continue; 1621 1622 if (success) 1623 lval->reg.data.id = lval->join->reg.data.id; 1624 else 1625 lval->join = lval; 1626 } 1627 1628 if (success) 1629 resolveSplitsAndMerges(); 1630 splits.clear(); // avoid duplicate entries on next coalesce pass 1631 merges.clear(); 1632 1633 delete[] nodes; 1634 nodes = NULL; 1635 hi.next = hi.prev = &hi; 1636 lo[0].next = lo[0].prev = &lo[0]; 1637 lo[1].next = lo[1].prev = &lo[1]; 1638} 1639 1640Symbol * 1641SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size) 1642{ 1643 SpillSlot slot; 1644 int32_t offsetBase = stackSize; 1645 int32_t offset; 1646 std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin(); 1647 1648 if (offsetBase % size) 1649 offsetBase += size - (offsetBase % size); 1650 1651 slot.sym = NULL; 1652 1653 for (offset = offsetBase; offset < stackSize; offset += size) { 1654 const int32_t entryEnd = offset + size; 1655 while (it != slots.end() && it->offset < offset) 1656 ++it; 1657 if (it == slots.end()) // no slots left 1658 break; 1659 std::list<SpillSlot>::iterator bgn = it; 1660 1661 while (it != slots.end() && it->offset < entryEnd) { 1662 it->occup.print(); 1663 if (it->occup.overlaps(livei)) 1664 break; 1665 ++it; 1666 } 1667 if (it == slots.end() || it->offset >= entryEnd) { 1668 // fits 1669 for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) { 1670 bgn->occup.insert(livei); 1671 if (bgn->size() == size) 1672 slot.sym = bgn->sym; 1673 } 1674 break; 1675 } 1676 } 1677 if (!slot.sym) { 1678 stackSize = offset + size; 1679 slot.offset = offset; 1680 slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL); 1681 if (!func->stackPtr) 1682 offset += func->tlsBase; 1683 slot.sym->setAddress(NULL, offset); 1684 slot.sym->reg.size = size; 1685 slots.insert(pos, slot)->occup.insert(livei); 1686 } 1687 return slot.sym; 1688} 1689 1690Value * 1691SpillCodeInserter::offsetSlot(Value *base, const LValue *lval) 1692{ 1693 if (!lval->compound || (lval->compMask & 0x1)) 1694 return base; 1695 Value *slot = cloneShallow(func, base); 1696 1697 slot->reg.data.offset += (ffs(lval->compMask) - 1) * lval->reg.size; 1698 slot->reg.size = lval->reg.size; 1699 1700 return slot; 1701} 1702 1703void 1704SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) 1705{ 1706 const DataType ty = typeOfSize(lval->reg.size); 1707 1708 slot = offsetSlot(slot, lval); 1709 1710 Instruction *st; 1711 if (slot->reg.file == FILE_MEMORY_LOCAL) { 1712 lval->noSpill = 1; 1713 if (ty != TYPE_B96) { 1714 st = new_Instruction(func, OP_STORE, ty); 1715 st->setSrc(0, slot); 1716 st->setSrc(1, lval); 1717 } else { 1718 st = new_Instruction(func, OP_SPLIT, ty); 1719 st->setSrc(0, lval); 1720 for (int d = 0; d < lval->reg.size / 4; ++d) 1721 st->setDef(d, new_LValue(func, FILE_GPR)); 1722 1723 for (int d = lval->reg.size / 4 - 1; d >= 0; --d) { 1724 Value *tmp = cloneShallow(func, slot); 1725 tmp->reg.size = 4; 1726 tmp->reg.data.offset += 4 * d; 1727 1728 Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32); 1729 s->setSrc(0, tmp); 1730 s->setSrc(1, st->getDef(d)); 1731 defi->bb->insertAfter(defi, s); 1732 } 1733 } 1734 } else { 1735 st = new_Instruction(func, OP_CVT, ty); 1736 st->setDef(0, slot); 1737 st->setSrc(0, lval); 1738 if (lval->reg.file == FILE_FLAGS) 1739 st->flagsSrc = 0; 1740 } 1741 defi->bb->insertAfter(defi, st); 1742} 1743 1744LValue * 1745SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) 1746{ 1747 const DataType ty = typeOfSize(lval->reg.size); 1748 1749 slot = offsetSlot(slot, lval); 1750 lval = cloneShallow(func, lval); 1751 1752 Instruction *ld; 1753 if (slot->reg.file == FILE_MEMORY_LOCAL) { 1754 lval->noSpill = 1; 1755 if (ty != TYPE_B96) { 1756 ld = new_Instruction(func, OP_LOAD, ty); 1757 } else { 1758 ld = new_Instruction(func, OP_MERGE, ty); 1759 for (int d = 0; d < lval->reg.size / 4; ++d) { 1760 Value *tmp = cloneShallow(func, slot); 1761 LValue *val; 1762 tmp->reg.size = 4; 1763 tmp->reg.data.offset += 4 * d; 1764 1765 Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32); 1766 l->setDef(0, (val = new_LValue(func, FILE_GPR))); 1767 l->setSrc(0, tmp); 1768 usei->bb->insertBefore(usei, l); 1769 ld->setSrc(d, val); 1770 val->noSpill = 1; 1771 } 1772 ld->setDef(0, lval); 1773 usei->bb->insertBefore(usei, ld); 1774 return lval; 1775 } 1776 } else { 1777 ld = new_Instruction(func, OP_CVT, ty); 1778 } 1779 ld->setDef(0, lval); 1780 ld->setSrc(0, slot); 1781 if (lval->reg.file == FILE_FLAGS) 1782 ld->flagsDef = 0; 1783 1784 usei->bb->insertBefore(usei, ld); 1785 return lval; 1786} 1787 1788static bool 1789value_cmp(ValueRef *a, ValueRef *b) { 1790 Instruction *ai = a->getInsn(), *bi = b->getInsn(); 1791 if (ai->bb != bi->bb) 1792 return ai->bb->getId() < bi->bb->getId(); 1793 return ai->serial < bi->serial; 1794} 1795 1796// For each value that is to be spilled, go through all its definitions. 1797// A value can have multiple definitions if it has been coalesced before. 1798// For each definition, first go through all its uses and insert an unspill 1799// instruction before it, then replace the use with the temporary register. 1800// Unspill can be either a load from memory or simply a move to another 1801// register file. 1802// For "Pseudo" instructions (like PHI, SPLIT, MERGE) we can erase the use 1803// if we have spilled to a memory location, or simply with the new register. 1804// No load or conversion instruction should be needed. 1805bool 1806SpillCodeInserter::run(const std::list<ValuePair>& lst) 1807{ 1808 for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end(); 1809 ++it) { 1810 LValue *lval = it->first->asLValue(); 1811 Symbol *mem = it->second ? it->second->asSym() : NULL; 1812 1813 // Keep track of which instructions to delete later. Deleting them 1814 // inside the loop is unsafe since a single instruction may have 1815 // multiple destinations that all need to be spilled (like OP_SPLIT). 1816 unordered_set<Instruction *> to_del; 1817 1818 std::list<ValueDef *> &defs = mergedDefs(lval); 1819 for (Value::DefIterator d = defs.begin(); d != defs.end(); 1820 ++d) { 1821 Value *slot = mem ? 1822 static_cast<Value *>(mem) : new_LValue(func, FILE_GPR); 1823 Value *tmp = NULL; 1824 Instruction *last = NULL; 1825 1826 LValue *dval = (*d)->get()->asLValue(); 1827 Instruction *defi = (*d)->getInsn(); 1828 1829 // Sort all the uses by BB/instruction so that we don't unspill 1830 // multiple times in a row, and also remove a source of 1831 // non-determinism. 1832 std::vector<ValueRef *> refs(dval->uses.begin(), dval->uses.end()); 1833 std::sort(refs.begin(), refs.end(), value_cmp); 1834 1835 // Unspill at each use *before* inserting spill instructions, 1836 // we don't want to have the spill instructions in the use list here. 1837 for (std::vector<ValueRef*>::const_iterator it = refs.begin(); 1838 it != refs.end(); ++it) { 1839 ValueRef *u = *it; 1840 Instruction *usei = u->getInsn(); 1841 assert(usei); 1842 if (usei->isPseudo()) { 1843 tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot; 1844 last = NULL; 1845 } else { 1846 if (!last || (usei != last->next && usei != last)) 1847 tmp = unspill(usei, dval, slot); 1848 last = usei; 1849 } 1850 u->set(tmp); 1851 } 1852 1853 assert(defi); 1854 if (defi->isPseudo()) { 1855 d = defs.erase(d); 1856 --d; 1857 if (slot->reg.file == FILE_MEMORY_LOCAL) 1858 to_del.insert(defi); 1859 else 1860 defi->setDef(0, slot); 1861 } else { 1862 spill(defi, slot, dval); 1863 } 1864 } 1865 1866 for (unordered_set<Instruction *>::const_iterator it = to_del.begin(); 1867 it != to_del.end(); ++it) { 1868 mergedDefs.removeDefsOfInstruction(*it); 1869 delete_Instruction(func->getProgram(), *it); 1870 } 1871 } 1872 1873 // TODO: We're not trying to reuse old slots in a potential next iteration. 1874 // We have to update the slots' livei intervals to be able to do that. 1875 stackBase = stackSize; 1876 slots.clear(); 1877 return true; 1878} 1879 1880bool 1881RegAlloc::exec() 1882{ 1883 for (IteratorRef it = prog->calls.iteratorDFS(false); 1884 !it->end(); it->next()) { 1885 func = Function::get(reinterpret_cast<Graph::Node *>(it->get())); 1886 1887 func->tlsBase = prog->tlsSize; 1888 if (!execFunc()) 1889 return false; 1890 prog->tlsSize += func->tlsSize; 1891 } 1892 return true; 1893} 1894 1895bool 1896RegAlloc::execFunc() 1897{ 1898 MergedDefs mergedDefs; 1899 InsertConstraintsPass insertConstr; 1900 PhiMovesPass insertPhiMoves; 1901 ArgumentMovesPass insertArgMoves; 1902 BuildIntervalsPass buildIntervals; 1903 SpillCodeInserter insertSpills(func, mergedDefs); 1904 1905 GCRA gcra(func, insertSpills, mergedDefs); 1906 1907 unsigned int i, retries; 1908 bool ret; 1909 1910 if (!func->ins.empty()) { 1911 // Insert a nop at the entry so inputs only used by the first instruction 1912 // don't count as having an empty live range. 1913 Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE); 1914 BasicBlock::get(func->cfg.getRoot())->insertHead(nop); 1915 } 1916 1917 ret = insertConstr.exec(func); 1918 if (!ret) 1919 goto out; 1920 1921 ret = insertPhiMoves.run(func); 1922 if (!ret) 1923 goto out; 1924 1925 ret = insertArgMoves.run(func); 1926 if (!ret) 1927 goto out; 1928 1929 // TODO: need to fix up spill slot usage ranges to support > 1 retry 1930 for (retries = 0; retries < 3; ++retries) { 1931 if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)) 1932 INFO("Retry: %i\n", retries); 1933 if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) 1934 func->print(); 1935 1936 // spilling to registers may add live ranges, need to rebuild everything 1937 ret = true; 1938 for (sequence = func->cfg.nextSequence(), i = 0; 1939 ret && i <= func->loopNestingBound; 1940 sequence = func->cfg.nextSequence(), ++i) 1941 ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot())); 1942 // reset marker 1943 for (ArrayList::Iterator bi = func->allBBlocks.iterator(); 1944 !bi.end(); bi.next()) 1945 BasicBlock::get(bi)->liveSet.marker = false; 1946 if (!ret) 1947 break; 1948 func->orderInstructions(this->insns); 1949 1950 ret = buildIntervals.run(func); 1951 if (!ret) 1952 break; 1953 ret = gcra.allocateRegisters(insns); 1954 if (ret) 1955 break; // success 1956 } 1957 INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret); 1958 1959 func->tlsSize = insertSpills.getStackSize(); 1960out: 1961 return ret; 1962} 1963 1964// TODO: check if modifying Instruction::join here breaks anything 1965void 1966GCRA::resolveSplitsAndMerges() 1967{ 1968 for (std::list<Instruction *>::iterator it = splits.begin(); 1969 it != splits.end(); 1970 ++it) { 1971 Instruction *split = *it; 1972 unsigned int reg = regs.idToBytes(split->getSrc(0)); 1973 for (int d = 0; split->defExists(d); ++d) { 1974 Value *v = split->getDef(d); 1975 v->reg.data.id = regs.bytesToId(v, reg); 1976 v->join = v; 1977 reg += v->reg.size; 1978 } 1979 } 1980 splits.clear(); 1981 1982 for (std::list<Instruction *>::iterator it = merges.begin(); 1983 it != merges.end(); 1984 ++it) { 1985 Instruction *merge = *it; 1986 unsigned int reg = regs.idToBytes(merge->getDef(0)); 1987 for (int s = 0; merge->srcExists(s); ++s) { 1988 Value *v = merge->getSrc(s); 1989 v->reg.data.id = regs.bytesToId(v, reg); 1990 v->join = v; 1991 // If the value is defined by a phi/union node, we also need to 1992 // perform the same fixup on that node's sources, since after RA 1993 // their registers should be identical. 1994 if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) { 1995 Instruction *phi = v->getInsn(); 1996 for (int phis = 0; phi->srcExists(phis); ++phis) { 1997 phi->getSrc(phis)->join = v; 1998 phi->getSrc(phis)->reg.data.id = v->reg.data.id; 1999 } 2000 } 2001 reg += v->reg.size; 2002 } 2003 } 2004 merges.clear(); 2005} 2006 2007bool Program::registerAllocation() 2008{ 2009 RegAlloc ra(this); 2010 return ra.exec(); 2011} 2012 2013bool 2014RegAlloc::InsertConstraintsPass::exec(Function *ir) 2015{ 2016 constrList.clear(); 2017 2018 bool ret = run(ir, true, true); 2019 if (ret) 2020 ret = insertConstraintMoves(); 2021 return ret; 2022} 2023 2024// TODO: make part of texture insn 2025void 2026RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex) 2027{ 2028 Value *def[4]; 2029 int c, k, d; 2030 uint8_t mask = 0; 2031 2032 for (d = 0, k = 0, c = 0; c < 4; ++c) { 2033 if (!(tex->tex.mask & (1 << c))) 2034 continue; 2035 if (tex->getDef(k)->refCount()) { 2036 mask |= 1 << c; 2037 def[d++] = tex->getDef(k); 2038 } 2039 ++k; 2040 } 2041 tex->tex.mask = mask; 2042 2043 for (c = 0; c < d; ++c) 2044 tex->setDef(c, def[c]); 2045 for (; c < 4; ++c) 2046 tex->setDef(c, NULL); 2047} 2048 2049bool 2050RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s) 2051{ 2052 Value *v = cst->getSrc(s); 2053 2054 // current register allocation can't handle it if a value participates in 2055 // multiple constraints 2056 for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) { 2057 if (cst != (*it)->getInsn()) 2058 return true; 2059 } 2060 2061 // can start at s + 1 because detectConflict is called on all sources 2062 for (int c = s + 1; cst->srcExists(c); ++c) 2063 if (v == cst->getSrc(c)) 2064 return true; 2065 2066 Instruction *defi = v->getInsn(); 2067 2068 return (!defi || defi->constrainedDefs()); 2069} 2070 2071void 2072RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n) 2073{ 2074 Instruction *cst; 2075 int d; 2076 2077 // first, look for an existing identical constraint op 2078 for (std::list<Instruction *>::iterator it = constrList.begin(); 2079 it != constrList.end(); 2080 ++it) { 2081 cst = (*it); 2082 if (!i->bb->dominatedBy(cst->bb)) 2083 break; 2084 for (d = 0; d < n; ++d) 2085 if (cst->getSrc(d) != i->getSrc(d + s)) 2086 break; 2087 if (d >= n) { 2088 for (d = 0; d < n; ++d, ++s) 2089 i->setSrc(s, cst->getDef(d)); 2090 return; 2091 } 2092 } 2093 cst = new_Instruction(func, OP_CONSTRAINT, i->dType); 2094 2095 for (d = 0; d < n; ++s, ++d) { 2096 cst->setDef(d, new_LValue(func, FILE_GPR)); 2097 cst->setSrc(d, i->getSrc(s)); 2098 i->setSrc(s, cst->getDef(d)); 2099 } 2100 i->bb->insertBefore(i, cst); 2101 2102 constrList.push_back(cst); 2103} 2104 2105// Add a dummy use of the pointer source of >= 8 byte loads after the load 2106// to prevent it from being assigned a register which overlapping the load's 2107// destination, which would produce random corruptions. 2108void 2109RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src) 2110{ 2111 Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE); 2112 hzd->setSrc(0, src->get()); 2113 i->bb->insertAfter(i, hzd); 2114 2115} 2116 2117// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q 2118void 2119RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn) 2120{ 2121 int n; 2122 for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n); 2123 condenseDefs(insn, 0, n - 1); 2124} 2125 2126void 2127RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn, 2128 const int a, const int b) 2129{ 2130 uint8_t size = 0; 2131 if (a >= b) 2132 return; 2133 for (int s = a; s <= b; ++s) 2134 size += insn->getDef(s)->reg.size; 2135 if (!size) 2136 return; 2137 2138 LValue *lval = new_LValue(func, FILE_GPR); 2139 lval->reg.size = size; 2140 2141 Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size)); 2142 split->setSrc(0, lval); 2143 for (int d = a; d <= b; ++d) { 2144 split->setDef(d - a, insn->getDef(d)); 2145 insn->setDef(d, NULL); 2146 } 2147 insn->setDef(a, lval); 2148 2149 for (int k = a + 1, d = b + 1; insn->defExists(d); ++d, ++k) { 2150 insn->setDef(k, insn->getDef(d)); 2151 insn->setDef(d, NULL); 2152 } 2153 // carry over predicate if any (mainly for OP_UNION uses) 2154 split->setPredicate(insn->cc, insn->getPredicate()); 2155 2156 insn->bb->insertAfter(insn, split); 2157 constrList.push_back(split); 2158} 2159 2160void 2161RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, 2162 const int a, const int b) 2163{ 2164 uint8_t size = 0; 2165 if (a >= b) 2166 return; 2167 for (int s = a; s <= b; ++s) 2168 size += insn->getSrc(s)->reg.size; 2169 if (!size) 2170 return; 2171 LValue *lval = new_LValue(func, FILE_GPR); 2172 lval->reg.size = size; 2173 2174 Value *save[3]; 2175 insn->takeExtraSources(0, save); 2176 2177 Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size)); 2178 merge->setDef(0, lval); 2179 for (int s = a, i = 0; s <= b; ++s, ++i) { 2180 merge->setSrc(i, insn->getSrc(s)); 2181 } 2182 insn->moveSources(b + 1, a - b); 2183 insn->setSrc(a, lval); 2184 insn->bb->insertBefore(insn, merge); 2185 2186 insn->putExtraSources(0, save); 2187 2188 constrList.push_back(merge); 2189} 2190 2191bool 2192RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex) 2193{ 2194 if (tex->tex.sIndirectSrc >= 0 || 2195 tex->tex.rIndirectSrc >= 0 || 2196 tex->tex.derivAll) 2197 return false; 2198 2199 if (tex->tex.mask == 5 || tex->tex.mask == 6) 2200 return false; 2201 2202 switch (tex->op) { 2203 case OP_TEX: 2204 case OP_TXF: 2205 case OP_TXG: 2206 case OP_TXL: 2207 break; 2208 default: 2209 return false; 2210 } 2211 2212 // legal variants: 2213 // TEXS.1D.LZ 2214 // TEXS.2D 2215 // TEXS.2D.LZ 2216 // TEXS.2D.LL 2217 // TEXS.2D.DC 2218 // TEXS.2D.LL.DC 2219 // TEXS.2D.LZ.DC 2220 // TEXS.A2D 2221 // TEXS.A2D.LZ 2222 // TEXS.A2D.LZ.DC 2223 // TEXS.3D 2224 // TEXS.3D.LZ 2225 // TEXS.CUBE 2226 // TEXS.CUBE.LL 2227 2228 // TLDS.1D.LZ 2229 // TLDS.1D.LL 2230 // TLDS.2D.LZ 2231 // TLSD.2D.LZ.AOFFI 2232 // TLDS.2D.LZ.MZ 2233 // TLDS.2D.LL 2234 // TLDS.2D.LL.AOFFI 2235 // TLDS.A2D.LZ 2236 // TLDS.3D.LZ 2237 2238 // TLD4S: all 2D/RECT variants and only offset 2239 2240 switch (tex->op) { 2241 case OP_TEX: 2242 if (tex->tex.useOffsets) 2243 return false; 2244 2245 switch (tex->tex.target.getEnum()) { 2246 case TEX_TARGET_1D: 2247 case TEX_TARGET_2D_ARRAY_SHADOW: 2248 return tex->tex.levelZero; 2249 case TEX_TARGET_CUBE: 2250 return !tex->tex.levelZero; 2251 case TEX_TARGET_2D: 2252 case TEX_TARGET_2D_ARRAY: 2253 case TEX_TARGET_2D_SHADOW: 2254 case TEX_TARGET_3D: 2255 case TEX_TARGET_RECT: 2256 case TEX_TARGET_RECT_SHADOW: 2257 return true; 2258 default: 2259 return false; 2260 } 2261 2262 case OP_TXL: 2263 if (tex->tex.useOffsets) 2264 return false; 2265 2266 switch (tex->tex.target.getEnum()) { 2267 case TEX_TARGET_2D: 2268 case TEX_TARGET_2D_SHADOW: 2269 case TEX_TARGET_RECT: 2270 case TEX_TARGET_RECT_SHADOW: 2271 case TEX_TARGET_CUBE: 2272 return true; 2273 default: 2274 return false; 2275 } 2276 2277 case OP_TXF: 2278 switch (tex->tex.target.getEnum()) { 2279 case TEX_TARGET_1D: 2280 return !tex->tex.useOffsets; 2281 case TEX_TARGET_2D: 2282 case TEX_TARGET_RECT: 2283 return true; 2284 case TEX_TARGET_2D_ARRAY: 2285 case TEX_TARGET_2D_MS: 2286 case TEX_TARGET_3D: 2287 return !tex->tex.useOffsets && tex->tex.levelZero; 2288 default: 2289 return false; 2290 } 2291 2292 case OP_TXG: 2293 if (tex->tex.useOffsets > 1) 2294 return false; 2295 if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf) 2296 return false; 2297 2298 switch (tex->tex.target.getEnum()) { 2299 case TEX_TARGET_2D: 2300 case TEX_TARGET_2D_MS: 2301 case TEX_TARGET_2D_SHADOW: 2302 case TEX_TARGET_RECT: 2303 case TEX_TARGET_RECT_SHADOW: 2304 return true; 2305 default: 2306 return false; 2307 } 2308 2309 default: 2310 return false; 2311 } 2312} 2313 2314void 2315RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex) 2316{ 2317 int defCount = tex->defCount(0xff); 2318 int srcCount = tex->srcCount(0xff); 2319 2320 tex->tex.scalar = true; 2321 2322 // 1. handle defs 2323 if (defCount > 3) 2324 condenseDefs(tex, 2, 3); 2325 if (defCount > 1) 2326 condenseDefs(tex, 0, 1); 2327 2328 // 2. handle srcs 2329 // special case for TXF.A2D 2330 if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) { 2331 assert(srcCount >= 3); 2332 condenseSrcs(tex, 1, 2); 2333 } else { 2334 if (srcCount > 3) 2335 condenseSrcs(tex, 2, 3); 2336 // only if we have more than 2 sources 2337 if (srcCount > 2) 2338 condenseSrcs(tex, 0, 1); 2339 } 2340 2341 assert(!tex->defExists(2) && !tex->srcExists(2)); 2342} 2343 2344void 2345RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) 2346{ 2347 int n, s; 2348 2349 if (isTextureOp(tex->op)) 2350 textureMask(tex); 2351 2352 if (targ->getChipset() < NVISA_GV100_CHIPSET) { 2353 if (isScalarTexGM107(tex)) { 2354 handleScalarTexGM107(tex); 2355 return; 2356 } 2357 2358 assert(!tex->tex.scalar); 2359 condenseDefs(tex); 2360 } else { 2361 if (isTextureOp(tex->op)) { 2362 int defCount = tex->defCount(0xff); 2363 if (defCount > 3) 2364 condenseDefs(tex, 2, 3); 2365 if (defCount > 1) 2366 condenseDefs(tex, 0, 1); 2367 } else { 2368 condenseDefs(tex); 2369 } 2370 } 2371 2372 if (isSurfaceOp(tex->op)) { 2373 int s = tex->tex.target.getDim() + 2374 (tex->tex.target.isArray() || tex->tex.target.isCube()); 2375 int n = 0; 2376 2377 switch (tex->op) { 2378 case OP_SUSTB: 2379 case OP_SUSTP: 2380 n = 4; 2381 break; 2382 case OP_SUREDB: 2383 case OP_SUREDP: 2384 if (tex->subOp == NV50_IR_SUBOP_ATOM_CAS) 2385 n = 2; 2386 break; 2387 default: 2388 break; 2389 } 2390 2391 if (s > 1) 2392 condenseSrcs(tex, 0, s - 1); 2393 if (n > 1) 2394 condenseSrcs(tex, 1, n); // do not condense the tex handle 2395 } else 2396 if (isTextureOp(tex->op)) { 2397 if (tex->op != OP_TXQ) { 2398 s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); 2399 if (tex->op == OP_TXD) { 2400 // Indirect handle belongs in the first arg 2401 if (tex->tex.rIndirectSrc >= 0) 2402 s++; 2403 if (!tex->tex.target.isArray() && tex->tex.useOffsets) 2404 s++; 2405 } 2406 n = tex->srcCount(0xff, true) - s; 2407 // TODO: Is this necessary? Perhaps just has to be aligned to the 2408 // level that the first arg is, not necessarily to 4. This 2409 // requirement has not been rigorously verified, as it has been on 2410 // Kepler. 2411 if (n > 0 && n < 3) { 2412 if (tex->srcExists(n + s)) // move potential predicate out of the way 2413 tex->moveSources(n + s, 3 - n); 2414 while (n < 3) 2415 tex->setSrc(s + n++, new_LValue(func, FILE_GPR)); 2416 } 2417 } else { 2418 s = tex->srcCount(0xff, true); 2419 n = 0; 2420 } 2421 2422 if (s > 1) 2423 condenseSrcs(tex, 0, s - 1); 2424 if (n > 1) // NOTE: first call modified positions already 2425 condenseSrcs(tex, 1, n); 2426 } 2427} 2428 2429void 2430RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) 2431{ 2432 if (isTextureOp(tex->op)) 2433 textureMask(tex); 2434 condenseDefs(tex); 2435 2436 if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { 2437 condenseSrcs(tex, 3, 6); 2438 } else 2439 if (isTextureOp(tex->op)) { 2440 int n = tex->srcCount(0xff, true); 2441 int s = n > 4 ? 4 : n; 2442 if (n > 4 && n < 7) { 2443 if (tex->srcExists(n)) // move potential predicate out of the way 2444 tex->moveSources(n, 7 - n); 2445 2446 while (n < 7) 2447 tex->setSrc(n++, new_LValue(func, FILE_GPR)); 2448 } 2449 if (s > 1) 2450 condenseSrcs(tex, 0, s - 1); 2451 if (n > 4) 2452 condenseSrcs(tex, 1, n - s); 2453 } 2454} 2455 2456void 2457RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) 2458{ 2459 int n, s; 2460 2461 if (isTextureOp(tex->op)) 2462 textureMask(tex); 2463 2464 if (tex->op == OP_TXQ) { 2465 s = tex->srcCount(0xff); 2466 n = 0; 2467 } else if (isSurfaceOp(tex->op)) { 2468 s = tex->tex.target.getDim() + (tex->tex.target.isArray() || tex->tex.target.isCube()); 2469 if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) 2470 n = 4; 2471 else 2472 n = 0; 2473 } else { 2474 s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); 2475 if (!tex->tex.target.isArray() && 2476 (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0)) 2477 ++s; 2478 if (tex->op == OP_TXD && tex->tex.useOffsets) 2479 ++s; 2480 n = tex->srcCount(0xff) - s; 2481 assert(n <= 4); 2482 } 2483 2484 if (s > 1) 2485 condenseSrcs(tex, 0, s - 1); 2486 if (n > 1) // NOTE: first call modified positions already 2487 condenseSrcs(tex, 1, n); 2488 2489 condenseDefs(tex); 2490} 2491 2492void 2493RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex) 2494{ 2495 Value *pred = tex->getPredicate(); 2496 if (pred) 2497 tex->setPredicate(tex->cc, NULL); 2498 2499 textureMask(tex); 2500 2501 assert(tex->defExists(0) && tex->srcExists(0)); 2502 // make src and def count match 2503 int c; 2504 for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) { 2505 if (!tex->srcExists(c)) 2506 tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue())); 2507 else 2508 insertConstraintMove(tex, c); 2509 if (!tex->defExists(c)) 2510 tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue())); 2511 } 2512 if (pred) 2513 tex->setPredicate(tex->cc, pred); 2514 condenseDefs(tex); 2515 condenseSrcs(tex, 0, c - 1); 2516} 2517 2518// Insert constraint markers for instructions whose multiple sources must be 2519// located in consecutive registers. 2520bool 2521RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) 2522{ 2523 TexInstruction *tex; 2524 Instruction *next; 2525 int s, size; 2526 2527 targ = bb->getProgram()->getTarget(); 2528 2529 for (Instruction *i = bb->getEntry(); i; i = next) { 2530 next = i->next; 2531 2532 if ((tex = i->asTex())) { 2533 switch (targ->getChipset() & ~0xf) { 2534 case 0x50: 2535 case 0x80: 2536 case 0x90: 2537 case 0xa0: 2538 texConstraintNV50(tex); 2539 break; 2540 case 0xc0: 2541 case 0xd0: 2542 texConstraintNVC0(tex); 2543 break; 2544 case 0xe0: 2545 case 0xf0: 2546 case 0x100: 2547 texConstraintNVE0(tex); 2548 break; 2549 case 0x110: 2550 case 0x120: 2551 case 0x130: 2552 case 0x140: 2553 case 0x160: 2554 texConstraintGM107(tex); 2555 break; 2556 default: 2557 break; 2558 } 2559 } else 2560 if (i->op == OP_EXPORT || i->op == OP_STORE) { 2561 for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) { 2562 assert(i->srcExists(s)); 2563 size -= i->getSrc(s)->reg.size; 2564 } 2565 condenseSrcs(i, 1, s - 1); 2566 } else 2567 if (i->op == OP_LOAD || i->op == OP_VFETCH) { 2568 condenseDefs(i); 2569 if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8) 2570 addHazard(i, i->src(0).getIndirect(0)); 2571 if (i->src(0).isIndirect(1) && typeSizeof(i->dType) >= 8) 2572 addHazard(i, i->src(0).getIndirect(1)); 2573 if (i->op == OP_LOAD && i->fixed && targ->getChipset() < 0xc0) { 2574 // Add a hazard to make sure we keep the op around. These are used 2575 // for membars. 2576 Instruction *nop = new_Instruction(func, OP_NOP, i->dType); 2577 nop->setSrc(0, i->getDef(0)); 2578 i->bb->insertAfter(i, nop); 2579 } 2580 } else 2581 if (i->op == OP_UNION || 2582 i->op == OP_MERGE || 2583 i->op == OP_SPLIT) { 2584 constrList.push_back(i); 2585 } else 2586 if (i->op == OP_ATOM && i->subOp == NV50_IR_SUBOP_ATOM_CAS && 2587 targ->getChipset() < 0xc0) { 2588 // Like a hazard, but for a def. 2589 Instruction *nop = new_Instruction(func, OP_NOP, i->dType); 2590 nop->setSrc(0, i->getDef(0)); 2591 i->bb->insertAfter(i, nop); 2592 } 2593 } 2594 return true; 2595} 2596 2597void 2598RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s) 2599{ 2600 const uint8_t size = cst->src(s).getSize(); 2601 2602 assert(cst->getSrc(s)->defs.size() == 1); // still SSA 2603 2604 Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); 2605 2606 bool imm = defi->op == OP_MOV && 2607 defi->src(0).getFile() == FILE_IMMEDIATE; 2608 bool load = defi->op == OP_LOAD && 2609 defi->src(0).getFile() == FILE_MEMORY_CONST && 2610 !defi->src(0).isIndirect(0); 2611 // catch some cases where don't really need MOVs 2612 if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) { 2613 if (imm || load) { 2614 // Move the defi right before the cst. No point in expanding 2615 // the range. 2616 defi->bb->remove(defi); 2617 cst->bb->insertBefore(cst, defi); 2618 } 2619 return; 2620 } 2621 2622 LValue *lval = new_LValue(func, cst->src(s).getFile()); 2623 lval->reg.size = size; 2624 2625 Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(size)); 2626 mov->setDef(0, lval); 2627 mov->setSrc(0, cst->getSrc(s)); 2628 2629 if (load) { 2630 mov->op = OP_LOAD; 2631 mov->setSrc(0, defi->getSrc(0)); 2632 } else if (imm) { 2633 mov->setSrc(0, defi->getSrc(0)); 2634 } 2635 2636 if (defi->getPredicate()) 2637 mov->setPredicate(defi->cc, defi->getPredicate()); 2638 2639 cst->setSrc(s, mov->getDef(0)); 2640 cst->bb->insertBefore(cst, mov); 2641 2642 cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help 2643} 2644 2645// Insert extra moves so that, if multiple register constraints on a value are 2646// in conflict, these conflicts can be resolved. 2647bool 2648RegAlloc::InsertConstraintsPass::insertConstraintMoves() 2649{ 2650 for (std::list<Instruction *>::iterator it = constrList.begin(); 2651 it != constrList.end(); 2652 ++it) { 2653 Instruction *cst = *it; 2654 Instruction *mov; 2655 2656 if (cst->op == OP_SPLIT && false) { 2657 // spilling splits is annoying, just make sure they're separate 2658 for (int d = 0; cst->defExists(d); ++d) { 2659 if (!cst->getDef(d)->refCount()) 2660 continue; 2661 LValue *lval = new_LValue(func, cst->def(d).getFile()); 2662 const uint8_t size = cst->def(d).getSize(); 2663 lval->reg.size = size; 2664 2665 mov = new_Instruction(func, OP_MOV, typeOfSize(size)); 2666 mov->setSrc(0, lval); 2667 mov->setDef(0, cst->getDef(d)); 2668 cst->setDef(d, mov->getSrc(0)); 2669 cst->bb->insertAfter(cst, mov); 2670 2671 cst->getSrc(0)->asLValue()->noSpill = 1; 2672 mov->getSrc(0)->asLValue()->noSpill = 1; 2673 } 2674 } else 2675 if (cst->op == OP_MERGE || cst->op == OP_UNION) { 2676 for (int s = 0; cst->srcExists(s); ++s) { 2677 const uint8_t size = cst->src(s).getSize(); 2678 2679 if (!cst->getSrc(s)->defs.size()) { 2680 mov = new_Instruction(func, OP_NOP, typeOfSize(size)); 2681 mov->setDef(0, cst->getSrc(s)); 2682 cst->bb->insertBefore(cst, mov); 2683 continue; 2684 } 2685 2686 insertConstraintMove(cst, s); 2687 } 2688 } 2689 } 2690 2691 return true; 2692} 2693 2694} // namespace nv50_ir 2695