1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "codegen/nv50_ir.h" 24#include "codegen/nv50_ir_target.h" 25#include "codegen/nv50_ir_driver.h" 26 27extern "C" { 28#include "nouveau_debug.h" 29} 30 31namespace nv50_ir { 32 33Modifier::Modifier(operation op) 34{ 35 switch (op) { 36 case OP_NEG: bits = NV50_IR_MOD_NEG; break; 37 case OP_ABS: bits = NV50_IR_MOD_ABS; break; 38 case OP_SAT: bits = NV50_IR_MOD_SAT; break; 39 case OP_NOT: bits = NV50_IR_MOD_NOT; break; 40 default: 41 bits = 0; 42 break; 43 } 44} 45 46Modifier Modifier::operator*(const Modifier m) const 47{ 48 unsigned int a, b, c; 49 50 b = m.bits; 51 if (this->bits & NV50_IR_MOD_ABS) 52 b &= ~NV50_IR_MOD_NEG; 53 54 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); 55 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); 56 57 return Modifier(a | c); 58} 59 60ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) 61{ 62 indirect[0] = -1; 63 indirect[1] = -1; 64 usedAsPtr = false; 65 set(v); 66} 67 68ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) 69{ 70 set(ref); 71 usedAsPtr = ref.usedAsPtr; 72} 73 74ValueRef::~ValueRef() 75{ 76 this->set(NULL); 77} 78 79bool ValueRef::getImmediate(ImmediateValue &imm) const 80{ 81 const ValueRef *src = this; 82 Modifier m; 83 DataType type = src->insn->sType; 84 85 while (src) { 86 if (src->mod) { 87 if (src->insn->sType != type) 88 break; 89 m *= src->mod; 90 } 91 if (src->getFile() == FILE_IMMEDIATE) { 92 imm = *(src->value->asImm()); 93 // The immediate's type isn't required to match its use, it's 94 // more of a hint; applying a modifier makes use of that hint. 95 imm.reg.type = type; 96 m.applyTo(imm); 97 return true; 98 } 99 100 Instruction *insn = src->value->getUniqueInsn(); 101 102 if (insn && insn->op == OP_MOV) { 103 src = &insn->src(0); 104 if (src->mod) 105 WARN("OP_MOV with modifier encountered !\n"); 106 } else { 107 src = NULL; 108 } 109 } 110 return false; 111} 112 113ValueDef::ValueDef(Value *v) : value(NULL), origin(NULL), insn(NULL) 114{ 115 set(v); 116} 117 118ValueDef::ValueDef(const ValueDef& def) : value(NULL), origin(NULL), insn(NULL) 119{ 120 set(def.get()); 121} 122 123ValueDef::~ValueDef() 124{ 125 this->set(NULL); 126} 127 128void 129ValueRef::set(const ValueRef &ref) 130{ 131 this->set(ref.get()); 132 mod = ref.mod; 133 indirect[0] = ref.indirect[0]; 134 indirect[1] = ref.indirect[1]; 135} 136 137void 138ValueRef::set(Value *refVal) 139{ 140 if (value == refVal) 141 return; 142 if (value) 143 value->uses.erase(this); 144 if (refVal) 145 refVal->uses.insert(this); 146 147 value = refVal; 148} 149 150void 151ValueDef::set(Value *defVal) 152{ 153 if (value == defVal) 154 return; 155 if (value) 156 value->defs.remove(this); 157 if (defVal) 158 defVal->defs.push_back(this); 159 160 value = defVal; 161} 162 163// Check if we can replace this definition's value by the value in @rep, 164// including the source modifiers, i.e. make sure that all uses support 165// @rep.mod. 166bool 167ValueDef::mayReplace(const ValueRef &rep) 168{ 169 if (!rep.mod) 170 return true; 171 172 if (!insn || !insn->bb) // Unbound instruction ? 173 return false; 174 175 const Target *target = insn->bb->getProgram()->getTarget(); 176 177 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); 178 ++it) { 179 Instruction *insn = (*it)->getInsn(); 180 int s = -1; 181 182 for (int i = 0; insn->srcExists(i); ++i) { 183 if (insn->src(i).get() == value) { 184 // If there are multiple references to us we'd have to check if the 185 // combination of mods is still supported, but just bail for now. 186 if (&insn->src(i) != (*it)) 187 return false; 188 s = i; 189 } 190 } 191 assert(s >= 0); // integrity of uses list 192 193 if (!target->isModSupported(insn, s, rep.mod)) 194 return false; 195 } 196 return true; 197} 198 199void 200ValueDef::replace(const ValueRef &repVal, bool doSet) 201{ 202 assert(mayReplace(repVal)); 203 204 if (value == repVal.get()) 205 return; 206 207 while (!value->uses.empty()) { 208 ValueRef *ref = *value->uses.begin(); 209 ref->set(repVal.get()); 210 ref->mod *= repVal.mod; 211 } 212 213 if (doSet) 214 set(repVal.get()); 215} 216 217Value::Value() : id(-1) 218{ 219 join = this; 220 memset(®, 0, sizeof(reg)); 221 reg.size = 4; 222} 223 224LValue::LValue(Function *fn, DataFile file) 225{ 226 reg.file = file; 227 reg.size = (file != FILE_PREDICATE) ? 4 : 1; 228 reg.data.id = -1; 229 230 compMask = 0; 231 compound = 0; 232 ssa = 0; 233 fixedReg = 0; 234 noSpill = 0; 235 236 fn->add(this, this->id); 237} 238 239LValue::LValue(Function *fn, LValue *lval) 240{ 241 assert(lval); 242 243 reg.file = lval->reg.file; 244 reg.size = lval->reg.size; 245 reg.data.id = -1; 246 247 compMask = 0; 248 compound = 0; 249 ssa = 0; 250 fixedReg = 0; 251 noSpill = 0; 252 253 fn->add(this, this->id); 254} 255 256LValue * 257LValue::clone(ClonePolicy<Function>& pol) const 258{ 259 LValue *that = new_LValue(pol.context(), reg.file); 260 261 pol.set<Value>(this, that); 262 263 that->reg.size = this->reg.size; 264 that->reg.type = this->reg.type; 265 that->reg.data = this->reg.data; 266 267 return that; 268} 269 270bool 271LValue::isUniform() const 272{ 273 if (defs.size() > 1) 274 return false; 275 Instruction *insn = getInsn(); 276 if (!insn) 277 return false; 278 // let's not try too hard here for now ... 279 return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); 280} 281 282Symbol::Symbol(Program *prog, DataFile f, ubyte fidx) 283{ 284 baseSym = NULL; 285 286 reg.file = f; 287 reg.fileIndex = fidx; 288 reg.data.offset = 0; 289 290 prog->add(this, this->id); 291} 292 293Symbol * 294Symbol::clone(ClonePolicy<Function>& pol) const 295{ 296 Program *prog = pol.context()->getProgram(); 297 298 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); 299 300 pol.set<Value>(this, that); 301 302 that->reg.size = this->reg.size; 303 that->reg.type = this->reg.type; 304 that->reg.data = this->reg.data; 305 306 that->baseSym = this->baseSym; 307 308 return that; 309} 310 311bool 312Symbol::isUniform() const 313{ 314 return 315 reg.file != FILE_SYSTEM_VALUE && 316 reg.file != FILE_MEMORY_LOCAL && 317 reg.file != FILE_SHADER_INPUT; 318} 319 320ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) 321{ 322 memset(®, 0, sizeof(reg)); 323 324 reg.file = FILE_IMMEDIATE; 325 reg.size = 4; 326 reg.type = TYPE_U32; 327 328 reg.data.u32 = uval; 329 330 prog->add(this, this->id); 331} 332 333ImmediateValue::ImmediateValue(Program *prog, float fval) 334{ 335 memset(®, 0, sizeof(reg)); 336 337 reg.file = FILE_IMMEDIATE; 338 reg.size = 4; 339 reg.type = TYPE_F32; 340 341 reg.data.f32 = fval; 342 343 prog->add(this, this->id); 344} 345 346ImmediateValue::ImmediateValue(Program *prog, double dval) 347{ 348 memset(®, 0, sizeof(reg)); 349 350 reg.file = FILE_IMMEDIATE; 351 reg.size = 8; 352 reg.type = TYPE_F64; 353 354 reg.data.f64 = dval; 355 356 prog->add(this, this->id); 357} 358 359ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) 360{ 361 reg = proto->reg; 362 363 reg.type = ty; 364 reg.size = typeSizeof(ty); 365} 366 367ImmediateValue * 368ImmediateValue::clone(ClonePolicy<Function>& pol) const 369{ 370 Program *prog = pol.context()->getProgram(); 371 ImmediateValue *that = new_ImmediateValue(prog, 0u); 372 373 pol.set<Value>(this, that); 374 375 that->reg.size = this->reg.size; 376 that->reg.type = this->reg.type; 377 that->reg.data = this->reg.data; 378 379 return that; 380} 381 382bool 383ImmediateValue::isInteger(const int i) const 384{ 385 switch (reg.type) { 386 case TYPE_S8: 387 return reg.data.s8 == i; 388 case TYPE_U8: 389 return reg.data.u8 == i; 390 case TYPE_S16: 391 return reg.data.s16 == i; 392 case TYPE_U16: 393 return reg.data.u16 == i; 394 case TYPE_S32: 395 case TYPE_U32: 396 return reg.data.s32 == i; // as if ... 397 case TYPE_S64: 398 case TYPE_U64: 399 return reg.data.s64 == i; // as if ... 400 case TYPE_F32: 401 return reg.data.f32 == static_cast<float>(i); 402 case TYPE_F64: 403 return reg.data.f64 == static_cast<double>(i); 404 default: 405 return false; 406 } 407} 408 409bool 410ImmediateValue::isNegative() const 411{ 412 switch (reg.type) { 413 case TYPE_S8: return reg.data.s8 < 0; 414 case TYPE_S16: return reg.data.s16 < 0; 415 case TYPE_S32: 416 case TYPE_U32: return reg.data.s32 < 0; 417 case TYPE_F32: return reg.data.u32 & (1 << 31); 418 case TYPE_F64: return reg.data.u64 & (1ULL << 63); 419 default: 420 return false; 421 } 422} 423 424bool 425ImmediateValue::isPow2() const 426{ 427 if (reg.type == TYPE_U64 || reg.type == TYPE_S64) 428 return util_is_power_of_two_or_zero64(reg.data.u64); 429 else 430 return util_is_power_of_two_or_zero(reg.data.u32); 431} 432 433void 434ImmediateValue::applyLog2() 435{ 436 switch (reg.type) { 437 case TYPE_S8: 438 case TYPE_S16: 439 case TYPE_S32: 440 assert(!this->isNegative()); 441 FALLTHROUGH; 442 case TYPE_U8: 443 case TYPE_U16: 444 case TYPE_U32: 445 reg.data.u32 = util_logbase2(reg.data.u32); 446 break; 447 case TYPE_S64: 448 assert(!this->isNegative()); 449 FALLTHROUGH; 450 case TYPE_U64: 451 reg.data.u64 = util_logbase2_64(reg.data.u64); 452 break; 453 case TYPE_F32: 454 reg.data.f32 = log2f(reg.data.f32); 455 break; 456 case TYPE_F64: 457 reg.data.f64 = log2(reg.data.f64); 458 break; 459 default: 460 assert(0); 461 break; 462 } 463} 464 465bool 466ImmediateValue::compare(CondCode cc, float fval) const 467{ 468 if (reg.type != TYPE_F32) 469 ERROR("immediate value is not of type f32"); 470 471 switch (static_cast<CondCode>(cc & 7)) { 472 case CC_TR: return true; 473 case CC_FL: return false; 474 case CC_LT: return reg.data.f32 < fval; 475 case CC_LE: return reg.data.f32 <= fval; 476 case CC_GT: return reg.data.f32 > fval; 477 case CC_GE: return reg.data.f32 >= fval; 478 case CC_EQ: return reg.data.f32 == fval; 479 case CC_NE: return reg.data.f32 != fval; 480 default: 481 assert(0); 482 return false; 483 } 484} 485 486ImmediateValue& 487ImmediateValue::operator=(const ImmediateValue &that) 488{ 489 this->reg = that.reg; 490 return (*this); 491} 492 493bool 494Value::interfers(const Value *that) const 495{ 496 uint32_t idA, idB; 497 498 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 499 return false; 500 if (this->asImm()) 501 return false; 502 503 if (this->asSym()) { 504 idA = this->join->reg.data.offset; 505 idB = that->join->reg.data.offset; 506 } else { 507 idA = this->join->reg.data.id * MIN2(this->reg.size, 4); 508 idB = that->join->reg.data.id * MIN2(that->reg.size, 4); 509 } 510 511 if (idA < idB) 512 return (idA + this->reg.size > idB); 513 else 514 if (idA > idB) 515 return (idB + that->reg.size > idA); 516 else 517 return (idA == idB); 518} 519 520bool 521Value::equals(const Value *that, bool strict) const 522{ 523 if (strict) 524 return this == that; 525 526 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 527 return false; 528 if (that->reg.size != this->reg.size) 529 return false; 530 531 if (that->reg.data.id != this->reg.data.id) 532 return false; 533 534 return true; 535} 536 537bool 538ImmediateValue::equals(const Value *that, bool strict) const 539{ 540 const ImmediateValue *imm = that->asImm(); 541 if (!imm) 542 return false; 543 return reg.data.u64 == imm->reg.data.u64; 544} 545 546bool 547Symbol::equals(const Value *that, bool strict) const 548{ 549 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) 550 return false; 551 assert(that->asSym()); 552 553 if (this->baseSym != that->asSym()->baseSym) 554 return false; 555 556 if (reg.file == FILE_SYSTEM_VALUE) 557 return (this->reg.data.sv.sv == that->reg.data.sv.sv && 558 this->reg.data.sv.index == that->reg.data.sv.index); 559 return this->reg.data.offset == that->reg.data.offset; 560} 561 562void Instruction::init() 563{ 564 next = prev = 0; 565 serial = 0; 566 567 cc = CC_ALWAYS; 568 rnd = ROUND_N; 569 cache = CACHE_CA; 570 subOp = 0; 571 572 saturate = 0; 573 join = 0; 574 exit = 0; 575 terminator = 0; 576 ftz = 0; 577 dnz = 0; 578 perPatch = 0; 579 fixed = 0; 580 encSize = 0; 581 ipa = 0; 582 mask = 0; 583 precise = 0; 584 585 lanes = 0xf; 586 587 postFactor = 0; 588 589 predSrc = -1; 590 flagsDef = -1; 591 flagsSrc = -1; 592 593 sched = 0; 594 bb = NULL; 595} 596 597Instruction::Instruction() 598{ 599 init(); 600 601 op = OP_NOP; 602 dType = sType = TYPE_F32; 603 604 id = -1; 605} 606 607Instruction::Instruction(Function *fn, operation opr, DataType ty) 608{ 609 init(); 610 611 op = opr; 612 dType = sType = ty; 613 614 fn->add(this, id); 615} 616 617Instruction::~Instruction() 618{ 619 if (bb) { 620 Function *fn = bb->getFunction(); 621 bb->remove(this); 622 fn->allInsns.remove(id); 623 } 624 625 for (int s = 0; srcExists(s); ++s) 626 setSrc(s, NULL); 627 // must unlink defs too since the list pointers will get deallocated 628 for (int d = 0; defExists(d); ++d) 629 setDef(d, NULL); 630} 631 632void 633Instruction::setDef(int i, Value *val) 634{ 635 int size = defs.size(); 636 if (i >= size) { 637 defs.resize(i + 1); 638 while (size <= i) 639 defs[size++].setInsn(this); 640 } 641 defs[i].set(val); 642} 643 644void 645Instruction::setSrc(int s, Value *val) 646{ 647 int size = srcs.size(); 648 if (s >= size) { 649 srcs.resize(s + 1); 650 while (size <= s) 651 srcs[size++].setInsn(this); 652 } 653 srcs[s].set(val); 654} 655 656void 657Instruction::setSrc(int s, const ValueRef& ref) 658{ 659 setSrc(s, ref.get()); 660 srcs[s].mod = ref.mod; 661} 662 663void 664Instruction::swapSources(int a, int b) 665{ 666 Value *value = srcs[a].get(); 667 Modifier m = srcs[a].mod; 668 669 setSrc(a, srcs[b]); 670 671 srcs[b].set(value); 672 srcs[b].mod = m; 673} 674 675static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta) 676{ 677 if (index >= s) 678 index += delta; 679 else 680 if ((delta < 0) && (index >= (s + delta))) 681 index = -1; 682} 683 684// Moves sources [@s,last_source] by @delta. 685// If @delta < 0, sources [@s - abs(@delta), @s) are erased. 686void 687Instruction::moveSources(const int s, const int delta) 688{ 689 if (delta == 0) 690 return; 691 assert(s + delta >= 0); 692 693 int k; 694 695 for (k = 0; srcExists(k); ++k) { 696 for (int i = 0; i < 2; ++i) 697 moveSourcesAdjustIndex(src(k).indirect[i], s, delta); 698 } 699 moveSourcesAdjustIndex(predSrc, s, delta); 700 moveSourcesAdjustIndex(flagsSrc, s, delta); 701 if (asTex()) { 702 TexInstruction *tex = asTex(); 703 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta); 704 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta); 705 } 706 707 if (delta > 0) { 708 --k; 709 for (int p = k + delta; k >= s; --k, --p) 710 setSrc(p, src(k)); 711 } else { 712 int p; 713 for (p = s; p < k; ++p) 714 setSrc(p + delta, src(p)); 715 for (; (p + delta) < k; ++p) 716 setSrc(p + delta, NULL); 717 } 718} 719 720void 721Instruction::takeExtraSources(int s, Value *values[3]) 722{ 723 values[0] = getIndirect(s, 0); 724 if (values[0]) 725 setIndirect(s, 0, NULL); 726 727 values[1] = getIndirect(s, 1); 728 if (values[1]) 729 setIndirect(s, 1, NULL); 730 731 values[2] = getPredicate(); 732 if (values[2]) 733 setPredicate(cc, NULL); 734} 735 736void 737Instruction::putExtraSources(int s, Value *values[3]) 738{ 739 if (values[0]) 740 setIndirect(s, 0, values[0]); 741 if (values[1]) 742 setIndirect(s, 1, values[1]); 743 if (values[2]) 744 setPredicate(cc, values[2]); 745} 746 747Instruction * 748Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 749{ 750 if (!i) 751 i = new_Instruction(pol.context(), op, dType); 752#if !defined(NDEBUG) && defined(__cpp_rtti) 753 assert(typeid(*i) == typeid(*this)); 754#endif 755 756 pol.set<Instruction>(this, i); 757 758 i->sType = sType; 759 760 i->rnd = rnd; 761 i->cache = cache; 762 i->subOp = subOp; 763 764 i->saturate = saturate; 765 i->join = join; 766 i->exit = exit; 767 i->mask = mask; 768 i->ftz = ftz; 769 i->dnz = dnz; 770 i->ipa = ipa; 771 i->lanes = lanes; 772 i->perPatch = perPatch; 773 774 i->postFactor = postFactor; 775 776 for (int d = 0; defExists(d); ++d) 777 i->setDef(d, pol.get(getDef(d))); 778 779 for (int s = 0; srcExists(s); ++s) { 780 i->setSrc(s, pol.get(getSrc(s))); 781 i->src(s).mod = src(s).mod; 782 } 783 784 i->cc = cc; 785 i->predSrc = predSrc; 786 i->flagsDef = flagsDef; 787 i->flagsSrc = flagsSrc; 788 789 return i; 790} 791 792unsigned int 793Instruction::defCount(unsigned int mask, bool singleFile) const 794{ 795 unsigned int i, n; 796 797 if (singleFile) { 798 unsigned int d = ffs(mask); 799 if (!d) 800 return 0; 801 for (i = d--; defExists(i); ++i) 802 if (getDef(i)->reg.file != getDef(d)->reg.file) 803 mask &= ~(1 << i); 804 } 805 806 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) 807 n += mask & 1; 808 return n; 809} 810 811unsigned int 812Instruction::srcCount(unsigned int mask, bool singleFile) const 813{ 814 unsigned int i, n; 815 816 if (singleFile) { 817 unsigned int s = ffs(mask); 818 if (!s) 819 return 0; 820 for (i = s--; srcExists(i); ++i) 821 if (getSrc(i)->reg.file != getSrc(s)->reg.file) 822 mask &= ~(1 << i); 823 } 824 825 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) 826 n += mask & 1; 827 return n; 828} 829 830bool 831Instruction::setIndirect(int s, int dim, Value *value) 832{ 833 assert(this->srcExists(s)); 834 835 int p = srcs[s].indirect[dim]; 836 if (p < 0) { 837 if (!value) 838 return true; 839 p = srcs.size(); 840 while (p > 0 && !srcExists(p - 1)) 841 --p; 842 } 843 setSrc(p, value); 844 srcs[p].usedAsPtr = (value != 0); 845 srcs[s].indirect[dim] = value ? p : -1; 846 return true; 847} 848 849bool 850Instruction::setPredicate(CondCode ccode, Value *value) 851{ 852 cc = ccode; 853 854 if (!value) { 855 if (predSrc >= 0) { 856 srcs[predSrc].set(NULL); 857 predSrc = -1; 858 } 859 return true; 860 } 861 862 if (predSrc < 0) { 863 predSrc = srcs.size(); 864 while (predSrc > 0 && !srcExists(predSrc - 1)) 865 --predSrc; 866 } 867 868 setSrc(predSrc, value); 869 return true; 870} 871 872bool 873Instruction::writesPredicate() const 874{ 875 for (int d = 0; defExists(d); ++d) 876 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) 877 return true; 878 return false; 879} 880 881bool 882Instruction::canCommuteDefSrc(const Instruction *i) const 883{ 884 for (int d = 0; defExists(d); ++d) 885 for (int s = 0; i->srcExists(s); ++s) 886 if (getDef(d)->interfers(i->getSrc(s))) 887 return false; 888 return true; 889} 890 891bool 892Instruction::canCommuteDefDef(const Instruction *i) const 893{ 894 for (int d = 0; defExists(d); ++d) 895 for (int c = 0; i->defExists(c); ++c) 896 if (getDef(d)->interfers(i->getDef(c))) 897 return false; 898 return true; 899} 900 901bool 902Instruction::isCommutationLegal(const Instruction *i) const 903{ 904 return canCommuteDefDef(i) && 905 canCommuteDefSrc(i) && 906 i->canCommuteDefSrc(this); 907} 908 909TexInstruction::TexInstruction(Function *fn, operation op) 910 : Instruction(fn, op, TYPE_F32), tex() 911{ 912 tex.rIndirectSrc = -1; 913 tex.sIndirectSrc = -1; 914 915 if (op == OP_TXF) 916 sType = TYPE_U32; 917} 918 919TexInstruction::~TexInstruction() 920{ 921 for (int c = 0; c < 3; ++c) { 922 dPdx[c].set(NULL); 923 dPdy[c].set(NULL); 924 } 925 for (int n = 0; n < 4; ++n) 926 for (int c = 0; c < 3; ++c) 927 offset[n][c].set(NULL); 928} 929 930TexInstruction * 931TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 932{ 933 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : 934 new_TexInstruction(pol.context(), op)); 935 936 Instruction::clone(pol, tex); 937 938 tex->tex = this->tex; 939 940 if (op == OP_TXD) { 941 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { 942 tex->dPdx[c].set(dPdx[c]); 943 tex->dPdy[c].set(dPdy[c]); 944 } 945 } 946 947 for (int n = 0; n < tex->tex.useOffsets; ++n) 948 for (int c = 0; c < 3; ++c) 949 tex->offset[n][c].set(offset[n][c]); 950 951 return tex; 952} 953 954const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = 955{ 956 { "1D", 1, 1, false, false, false }, 957 { "2D", 2, 2, false, false, false }, 958 { "2D_MS", 2, 3, false, false, false }, 959 { "3D", 3, 3, false, false, false }, 960 { "CUBE", 2, 3, false, true, false }, 961 { "1D_SHADOW", 1, 1, false, false, true }, 962 { "2D_SHADOW", 2, 2, false, false, true }, 963 { "CUBE_SHADOW", 2, 3, false, true, true }, 964 { "1D_ARRAY", 1, 2, true, false, false }, 965 { "2D_ARRAY", 2, 3, true, false, false }, 966 { "2D_MS_ARRAY", 2, 4, true, false, false }, 967 { "CUBE_ARRAY", 2, 4, true, true, false }, 968 { "1D_ARRAY_SHADOW", 1, 2, true, false, true }, 969 { "2D_ARRAY_SHADOW", 2, 3, true, false, true }, 970 { "RECT", 2, 2, false, false, false }, 971 { "RECT_SHADOW", 2, 2, false, false, true }, 972 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, 973 { "BUFFER", 1, 1, false, false, false }, 974}; 975 976const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = 977{ 978 { "NONE", 0, { 0, 0, 0, 0 }, UINT }, 979 980 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT }, 981 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT }, 982 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT }, 983 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT }, 984 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT }, 985 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT }, 986 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT }, 987 988 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT }, 989 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT }, 990 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT }, 991 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT }, 992 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT }, 993 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT }, 994 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT }, 995 { "R32UI", 1, { 32, 0, 0, 0 }, UINT }, 996 { "R16UI", 1, { 16, 0, 0, 0 }, UINT }, 997 { "R8UI", 1, { 8, 0, 0, 0 }, UINT }, 998 999 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT }, 1000 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT }, 1001 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT }, 1002 { "RG32I", 2, { 32, 32, 0, 0 }, SINT }, 1003 { "RG16I", 2, { 16, 16, 0, 0 }, SINT }, 1004 { "RG8I", 2, { 8, 8, 0, 0 }, SINT }, 1005 { "R32I", 1, { 32, 0, 0, 0 }, SINT }, 1006 { "R16I", 1, { 16, 0, 0, 0 }, SINT }, 1007 { "R8I", 1, { 8, 0, 0, 0 }, SINT }, 1008 1009 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM }, 1010 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM }, 1011 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM }, 1012 { "RG16", 2, { 16, 16, 0, 0 }, UNORM }, 1013 { "RG8", 2, { 8, 8, 0, 0 }, UNORM }, 1014 { "R16", 1, { 16, 0, 0, 0 }, UNORM }, 1015 { "R8", 1, { 8, 0, 0, 0 }, UNORM }, 1016 1017 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM }, 1018 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM }, 1019 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM }, 1020 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM }, 1021 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM }, 1022 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, 1023 1024 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true }, 1025}; 1026 1027const struct TexInstruction::ImgFormatDesc * 1028TexInstruction::translateImgFormat(enum pipe_format format) 1029{ 1030 1031#define FMT_CASE(a, b) \ 1032 case PIPE_FORMAT_ ## a: return &formatTable[nv50_ir::FMT_ ## b] 1033 1034 switch (format) { 1035 FMT_CASE(NONE, NONE); 1036 1037 FMT_CASE(R32G32B32A32_FLOAT, RGBA32F); 1038 FMT_CASE(R16G16B16A16_FLOAT, RGBA16F); 1039 FMT_CASE(R32G32_FLOAT, RG32F); 1040 FMT_CASE(R16G16_FLOAT, RG16F); 1041 FMT_CASE(R11G11B10_FLOAT, R11G11B10F); 1042 FMT_CASE(R32_FLOAT, R32F); 1043 FMT_CASE(R16_FLOAT, R16F); 1044 1045 FMT_CASE(R32G32B32A32_UINT, RGBA32UI); 1046 FMT_CASE(R16G16B16A16_UINT, RGBA16UI); 1047 FMT_CASE(R10G10B10A2_UINT, RGB10A2UI); 1048 FMT_CASE(R8G8B8A8_UINT, RGBA8UI); 1049 FMT_CASE(R32G32_UINT, RG32UI); 1050 FMT_CASE(R16G16_UINT, RG16UI); 1051 FMT_CASE(R8G8_UINT, RG8UI); 1052 FMT_CASE(R32_UINT, R32UI); 1053 FMT_CASE(R16_UINT, R16UI); 1054 FMT_CASE(R8_UINT, R8UI); 1055 1056 FMT_CASE(R32G32B32A32_SINT, RGBA32I); 1057 FMT_CASE(R16G16B16A16_SINT, RGBA16I); 1058 FMT_CASE(R8G8B8A8_SINT, RGBA8I); 1059 FMT_CASE(R32G32_SINT, RG32I); 1060 FMT_CASE(R16G16_SINT, RG16I); 1061 FMT_CASE(R8G8_SINT, RG8I); 1062 FMT_CASE(R32_SINT, R32I); 1063 FMT_CASE(R16_SINT, R16I); 1064 FMT_CASE(R8_SINT, R8I); 1065 1066 FMT_CASE(R16G16B16A16_UNORM, RGBA16); 1067 FMT_CASE(R10G10B10A2_UNORM, RGB10A2); 1068 FMT_CASE(R8G8B8A8_UNORM, RGBA8); 1069 FMT_CASE(R16G16_UNORM, RG16); 1070 FMT_CASE(R8G8_UNORM, RG8); 1071 FMT_CASE(R16_UNORM, R16); 1072 FMT_CASE(R8_UNORM, R8); 1073 1074 FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM); 1075 FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM); 1076 FMT_CASE(R16G16_SNORM, RG16_SNORM); 1077 FMT_CASE(R8G8_SNORM, RG8_SNORM); 1078 FMT_CASE(R16_SNORM, R16_SNORM); 1079 FMT_CASE(R8_SNORM, R8_SNORM); 1080 1081 FMT_CASE(B8G8R8A8_UNORM, BGRA8); 1082 1083 default: 1084 assert(!"Unexpected format"); 1085 return &formatTable[nv50_ir::FMT_NONE]; 1086 } 1087} 1088 1089void 1090TexInstruction::setIndirectR(Value *v) 1091{ 1092 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc; 1093 if (p >= 0) { 1094 tex.rIndirectSrc = p; 1095 setSrc(p, v); 1096 srcs[p].usedAsPtr = !!v; 1097 } 1098} 1099 1100void 1101TexInstruction::setIndirectS(Value *v) 1102{ 1103 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc; 1104 if (p >= 0) { 1105 tex.sIndirectSrc = p; 1106 setSrc(p, v); 1107 srcs[p].usedAsPtr = !!v; 1108 } 1109} 1110 1111CmpInstruction::CmpInstruction(Function *fn, operation op) 1112 : Instruction(fn, op, TYPE_F32) 1113{ 1114 setCond = CC_ALWAYS; 1115} 1116 1117CmpInstruction * 1118CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1119{ 1120 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : 1121 new_CmpInstruction(pol.context(), op)); 1122 cmp->dType = dType; 1123 Instruction::clone(pol, cmp); 1124 cmp->setCond = setCond; 1125 return cmp; 1126} 1127 1128FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) 1129 : Instruction(fn, op, TYPE_NONE) 1130{ 1131 if (op == OP_CALL) 1132 target.fn = reinterpret_cast<Function *>(targ); 1133 else 1134 target.bb = reinterpret_cast<BasicBlock *>(targ); 1135 1136 if (op == OP_BRA || 1137 op == OP_CONT || op == OP_BREAK || 1138 op == OP_RET || op == OP_EXIT) 1139 terminator = 1; 1140 else 1141 if (op == OP_JOIN) 1142 terminator = targ ? 1 : 0; 1143 1144 allWarp = absolute = limit = builtin = indirect = 0; 1145} 1146 1147FlowInstruction * 1148FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1149{ 1150 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : 1151 new_FlowInstruction(pol.context(), op, NULL)); 1152 1153 Instruction::clone(pol, flow); 1154 flow->allWarp = allWarp; 1155 flow->absolute = absolute; 1156 flow->limit = limit; 1157 flow->builtin = builtin; 1158 1159 if (builtin) 1160 flow->target.builtin = target.builtin; 1161 else 1162 if (op == OP_CALL) 1163 flow->target.fn = target.fn; 1164 else 1165 if (target.bb) 1166 flow->target.bb = pol.get<BasicBlock>(target.bb); 1167 1168 return flow; 1169} 1170 1171Program::Program(Type type, Target *arch) 1172 : progType(type), 1173 target(arch), 1174 tlsSize(0), 1175 mem_Instruction(sizeof(Instruction), 6), 1176 mem_CmpInstruction(sizeof(CmpInstruction), 4), 1177 mem_TexInstruction(sizeof(TexInstruction), 4), 1178 mem_FlowInstruction(sizeof(FlowInstruction), 4), 1179 mem_LValue(sizeof(LValue), 8), 1180 mem_Symbol(sizeof(Symbol), 7), 1181 mem_ImmediateValue(sizeof(ImmediateValue), 7), 1182 driver(NULL), 1183 driver_out(NULL) 1184{ 1185 code = NULL; 1186 binSize = 0; 1187 1188 maxGPR = -1; 1189 fp64 = false; 1190 persampleInvocation = false; 1191 1192 main = new Function(this, "MAIN", ~0); 1193 calls.insert(&main->call); 1194 1195 dbgFlags = 0; 1196 optLevel = 0; 1197 1198 targetPriv = NULL; 1199} 1200 1201Program::~Program() 1202{ 1203 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) 1204 delete reinterpret_cast<Function *>(it.get()); 1205 1206 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) 1207 releaseValue(reinterpret_cast<Value *>(it.get())); 1208} 1209 1210void Program::releaseInstruction(Instruction *insn) 1211{ 1212 // TODO: make this not suck so much 1213 1214 insn->~Instruction(); 1215 1216 if (insn->asCmp()) 1217 mem_CmpInstruction.release(insn); 1218 else 1219 if (insn->asTex()) 1220 mem_TexInstruction.release(insn); 1221 else 1222 if (insn->asFlow()) 1223 mem_FlowInstruction.release(insn); 1224 else 1225 mem_Instruction.release(insn); 1226} 1227 1228void Program::releaseValue(Value *value) 1229{ 1230 value->~Value(); 1231 1232 if (value->asLValue()) 1233 mem_LValue.release(value); 1234 else 1235 if (value->asImm()) 1236 mem_ImmediateValue.release(value); 1237 else 1238 if (value->asSym()) 1239 mem_Symbol.release(value); 1240} 1241 1242 1243} // namespace nv50_ir 1244 1245extern "C" { 1246 1247static void 1248nv50_ir_init_prog_info(struct nv50_ir_prog_info *info, 1249 struct nv50_ir_prog_info_out *info_out) 1250{ 1251 info_out->target = info->target; 1252 info_out->type = info->type; 1253 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { 1254 info_out->prop.tp.domain = PIPE_PRIM_MAX; 1255 info_out->prop.tp.outputPrim = PIPE_PRIM_MAX; 1256 } 1257 if (info->type == PIPE_SHADER_GEOMETRY) { 1258 info_out->prop.gp.instanceCount = 1; 1259 info_out->prop.gp.maxVertices = 1; 1260 } 1261 if (info->type == PIPE_SHADER_COMPUTE) { 1262 info->prop.cp.numThreads[0] = 1263 info->prop.cp.numThreads[1] = 1264 info->prop.cp.numThreads[2] = 1; 1265 } 1266 info_out->bin.smemSize = info->bin.smemSize; 1267 info_out->io.genUserClip = info->io.genUserClip; 1268 info_out->io.instanceId = 0xff; 1269 info_out->io.vertexId = 0xff; 1270 info_out->io.edgeFlagIn = 0xff; 1271 info_out->io.edgeFlagOut = 0xff; 1272 info_out->io.fragDepth = 0xff; 1273 info_out->io.sampleMask = 0xff; 1274} 1275 1276int 1277nv50_ir_generate_code(struct nv50_ir_prog_info *info, 1278 struct nv50_ir_prog_info_out *info_out) 1279{ 1280 int ret = 0; 1281 1282 nv50_ir::Program::Type type; 1283 1284 nv50_ir_init_prog_info(info, info_out); 1285 1286#define PROG_TYPE_CASE(a, b) \ 1287 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break 1288 1289 switch (info->type) { 1290 PROG_TYPE_CASE(VERTEX, VERTEX); 1291 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL); 1292 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL); 1293 PROG_TYPE_CASE(GEOMETRY, GEOMETRY); 1294 PROG_TYPE_CASE(FRAGMENT, FRAGMENT); 1295 PROG_TYPE_CASE(COMPUTE, COMPUTE); 1296 default: 1297 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type); 1298 return -1; 1299 } 1300 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); 1301 1302 nv50_ir::Target *targ = nv50_ir::Target::create(info->target); 1303 if (!targ) 1304 return -1; 1305 1306 nv50_ir::Program *prog = new nv50_ir::Program(type, targ); 1307 if (!prog) { 1308 nv50_ir::Target::destroy(targ); 1309 return -1; 1310 } 1311 prog->driver = info; 1312 prog->driver_out = info_out; 1313 prog->dbgFlags = info->dbgFlags; 1314 prog->optLevel = info->optLevel; 1315 1316 switch (info->bin.sourceRep) { 1317 case PIPE_SHADER_IR_NIR: 1318 ret = prog->makeFromNIR(info, info_out) ? 0 : -2; 1319 break; 1320 case PIPE_SHADER_IR_TGSI: 1321 ret = prog->makeFromTGSI(info, info_out) ? 0 : -2; 1322 break; 1323 default: 1324 ret = -1; 1325 break; 1326 } 1327 if (ret < 0) 1328 goto out; 1329 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1330 prog->print(); 1331 1332 targ->parseDriverInfo(info, info_out); 1333 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); 1334 1335 prog->convertToSSA(); 1336 1337 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1338 prog->print(); 1339 1340 prog->optimizeSSA(info->optLevel); 1341 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); 1342 1343 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 1344 prog->print(); 1345 1346 if (!prog->registerAllocation()) { 1347 ret = -4; 1348 goto out; 1349 } 1350 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); 1351 1352 prog->optimizePostRA(info->optLevel); 1353 1354 if (!prog->emitBinary(info_out)) { 1355 ret = -5; 1356 goto out; 1357 } 1358 1359out: 1360 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); 1361 1362 info_out->bin.maxGPR = prog->maxGPR; 1363 info_out->bin.code = prog->code; 1364 info_out->bin.codeSize = prog->binSize; 1365 info_out->bin.tlsSpace = prog->tlsSize; 1366 1367 delete prog; 1368 nv50_ir::Target::destroy(targ); 1369 1370 return ret; 1371} 1372 1373} // extern "C" 1374