1/* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include "codegen/nv50_ir.h" 24#include "codegen/nv50_ir_target.h" 25#include "codegen/nv50_ir_driver.h" 26 27extern "C" { 28#include "nouveau_debug.h" 29#include "nv50/nv50_program.h" 30} 31 32namespace nv50_ir { 33 34Modifier::Modifier(operation op) 35{ 36 switch (op) { 37 case OP_NEG: bits = NV50_IR_MOD_NEG; break; 38 case OP_ABS: bits = NV50_IR_MOD_ABS; break; 39 case OP_SAT: bits = NV50_IR_MOD_SAT; break; 40 case OP_NOT: bits = NV50_IR_MOD_NOT; break; 41 default: 42 bits = 0; 43 break; 44 } 45} 46 47Modifier Modifier::operator*(const Modifier m) const 48{ 49 unsigned int a, b, c; 50 51 b = m.bits; 52 if (this->bits & NV50_IR_MOD_ABS) 53 b &= ~NV50_IR_MOD_NEG; 54 55 a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); 56 c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); 57 58 return Modifier(a | c); 59} 60 61ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) 62{ 63 indirect[0] = -1; 64 indirect[1] = -1; 65 usedAsPtr = false; 66 set(v); 67} 68 69ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) 70{ 71 set(ref); 72 usedAsPtr = ref.usedAsPtr; 73} 74 75ValueRef::~ValueRef() 76{ 77 this->set(NULL); 78} 79 80bool ValueRef::getImmediate(ImmediateValue &imm) const 81{ 82 const ValueRef *src = this; 83 Modifier m; 84 DataType type = src->insn->sType; 85 86 while (src) { 87 if (src->mod) { 88 if (src->insn->sType != type) 89 break; 90 m *= src->mod; 91 } 92 if (src->getFile() == FILE_IMMEDIATE) { 93 imm = *(src->value->asImm()); 94 // The immediate's type isn't required to match its use, it's 95 // more of a hint; applying a modifier makes use of that hint. 96 imm.reg.type = type; 97 m.applyTo(imm); 98 return true; 99 } 100 101 Instruction *insn = src->value->getUniqueInsn(); 102 103 if (insn && insn->op == OP_MOV) { 104 src = &insn->src(0); 105 if (src->mod) 106 WARN("OP_MOV with modifier encountered !\n"); 107 } else { 108 src = NULL; 109 } 110 } 111 return false; 112} 113 114ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL) 115{ 116 set(v); 117} 118 119ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL) 120{ 121 set(def.get()); 122} 123 124ValueDef::~ValueDef() 125{ 126 this->set(NULL); 127} 128 129void 130ValueRef::set(const ValueRef &ref) 131{ 132 this->set(ref.get()); 133 mod = ref.mod; 134 indirect[0] = ref.indirect[0]; 135 indirect[1] = ref.indirect[1]; 136} 137 138void 139ValueRef::set(Value *refVal) 140{ 141 if (value == refVal) 142 return; 143 if (value) 144 value->uses.erase(this); 145 if (refVal) 146 refVal->uses.insert(this); 147 148 value = refVal; 149} 150 151void 152ValueDef::set(Value *defVal) 153{ 154 if (value == defVal) 155 return; 156 if (value) 157 value->defs.remove(this); 158 if (defVal) 159 defVal->defs.push_back(this); 160 161 value = defVal; 162} 163 164// Check if we can replace this definition's value by the value in @rep, 165// including the source modifiers, i.e. make sure that all uses support 166// @rep.mod. 167bool 168ValueDef::mayReplace(const ValueRef &rep) 169{ 170 if (!rep.mod) 171 return true; 172 173 if (!insn || !insn->bb) // Unbound instruction ? 174 return false; 175 176 const Target *target = insn->bb->getProgram()->getTarget(); 177 178 for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); 179 ++it) { 180 Instruction *insn = (*it)->getInsn(); 181 int s = -1; 182 183 for (int i = 0; insn->srcExists(i); ++i) { 184 if (insn->src(i).get() == value) { 185 // If there are multiple references to us we'd have to check if the 186 // combination of mods is still supported, but just bail for now. 187 if (&insn->src(i) != (*it)) 188 return false; 189 s = i; 190 } 191 } 192 assert(s >= 0); // integrity of uses list 193 194 if (!target->isModSupported(insn, s, rep.mod)) 195 return false; 196 } 197 return true; 198} 199 200void 201ValueDef::replace(const ValueRef &repVal, bool doSet) 202{ 203 assert(mayReplace(repVal)); 204 205 if (value == repVal.get()) 206 return; 207 208 while (!value->uses.empty()) { 209 ValueRef *ref = *value->uses.begin(); 210 ref->set(repVal.get()); 211 ref->mod *= repVal.mod; 212 } 213 214 if (doSet) 215 set(repVal.get()); 216} 217 218Value::Value() 219{ 220 join = this; 221 memset(®, 0, sizeof(reg)); 222 reg.size = 4; 223} 224 225LValue::LValue(Function *fn, DataFile file) 226{ 227 reg.file = file; 228 reg.size = (file != FILE_PREDICATE) ? 4 : 1; 229 reg.data.id = -1; 230 231 compMask = 0; 232 compound = 0; 233 ssa = 0; 234 fixedReg = 0; 235 noSpill = 0; 236 237 fn->add(this, this->id); 238} 239 240LValue::LValue(Function *fn, LValue *lval) 241{ 242 assert(lval); 243 244 reg.file = lval->reg.file; 245 reg.size = lval->reg.size; 246 reg.data.id = -1; 247 248 compMask = 0; 249 compound = 0; 250 ssa = 0; 251 fixedReg = 0; 252 noSpill = 0; 253 254 fn->add(this, this->id); 255} 256 257LValue * 258LValue::clone(ClonePolicy<Function>& pol) const 259{ 260 LValue *that = new_LValue(pol.context(), reg.file); 261 262 pol.set<Value>(this, that); 263 264 that->reg.size = this->reg.size; 265 that->reg.type = this->reg.type; 266 that->reg.data = this->reg.data; 267 268 return that; 269} 270 271bool 272LValue::isUniform() const 273{ 274 if (defs.size() > 1) 275 return false; 276 Instruction *insn = getInsn(); 277 // let's not try too hard here for now ... 278 return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); 279} 280 281Symbol::Symbol(Program *prog, DataFile f, ubyte fidx) 282{ 283 baseSym = NULL; 284 285 reg.file = f; 286 reg.fileIndex = fidx; 287 reg.data.offset = 0; 288 289 prog->add(this, this->id); 290} 291 292Symbol * 293Symbol::clone(ClonePolicy<Function>& pol) const 294{ 295 Program *prog = pol.context()->getProgram(); 296 297 Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); 298 299 pol.set<Value>(this, that); 300 301 that->reg.size = this->reg.size; 302 that->reg.type = this->reg.type; 303 that->reg.data = this->reg.data; 304 305 that->baseSym = this->baseSym; 306 307 return that; 308} 309 310bool 311Symbol::isUniform() const 312{ 313 return 314 reg.file != FILE_SYSTEM_VALUE && 315 reg.file != FILE_MEMORY_LOCAL && 316 reg.file != FILE_SHADER_INPUT; 317} 318 319ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) 320{ 321 memset(®, 0, sizeof(reg)); 322 323 reg.file = FILE_IMMEDIATE; 324 reg.size = 4; 325 reg.type = TYPE_U32; 326 327 reg.data.u32 = uval; 328 329 prog->add(this, this->id); 330} 331 332ImmediateValue::ImmediateValue(Program *prog, float fval) 333{ 334 memset(®, 0, sizeof(reg)); 335 336 reg.file = FILE_IMMEDIATE; 337 reg.size = 4; 338 reg.type = TYPE_F32; 339 340 reg.data.f32 = fval; 341 342 prog->add(this, this->id); 343} 344 345ImmediateValue::ImmediateValue(Program *prog, double dval) 346{ 347 memset(®, 0, sizeof(reg)); 348 349 reg.file = FILE_IMMEDIATE; 350 reg.size = 8; 351 reg.type = TYPE_F64; 352 353 reg.data.f64 = dval; 354 355 prog->add(this, this->id); 356} 357 358ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) 359{ 360 reg = proto->reg; 361 362 reg.type = ty; 363 reg.size = typeSizeof(ty); 364} 365 366ImmediateValue * 367ImmediateValue::clone(ClonePolicy<Function>& pol) const 368{ 369 Program *prog = pol.context()->getProgram(); 370 ImmediateValue *that = new_ImmediateValue(prog, 0u); 371 372 pol.set<Value>(this, that); 373 374 that->reg.size = this->reg.size; 375 that->reg.type = this->reg.type; 376 that->reg.data = this->reg.data; 377 378 return that; 379} 380 381bool 382ImmediateValue::isInteger(const int i) const 383{ 384 switch (reg.type) { 385 case TYPE_S8: 386 return reg.data.s8 == i; 387 case TYPE_U8: 388 return reg.data.u8 == i; 389 case TYPE_S16: 390 return reg.data.s16 == i; 391 case TYPE_U16: 392 return reg.data.u16 == i; 393 case TYPE_S32: 394 case TYPE_U32: 395 return reg.data.s32 == i; // as if ... 396 case TYPE_S64: 397 case TYPE_U64: 398 return reg.data.s64 == i; // as if ... 399 case TYPE_F32: 400 return reg.data.f32 == static_cast<float>(i); 401 case TYPE_F64: 402 return reg.data.f64 == static_cast<double>(i); 403 default: 404 return false; 405 } 406} 407 408bool 409ImmediateValue::isNegative() const 410{ 411 switch (reg.type) { 412 case TYPE_S8: return reg.data.s8 < 0; 413 case TYPE_S16: return reg.data.s16 < 0; 414 case TYPE_S32: 415 case TYPE_U32: return reg.data.s32 < 0; 416 case TYPE_F32: return reg.data.u32 & (1 << 31); 417 case TYPE_F64: return reg.data.u64 & (1ULL << 63); 418 default: 419 return false; 420 } 421} 422 423bool 424ImmediateValue::isPow2() const 425{ 426 if (reg.type == TYPE_U64 || reg.type == TYPE_S64) 427 return util_is_power_of_two_or_zero64(reg.data.u64); 428 else 429 return util_is_power_of_two_or_zero(reg.data.u32); 430} 431 432void 433ImmediateValue::applyLog2() 434{ 435 switch (reg.type) { 436 case TYPE_S8: 437 case TYPE_S16: 438 case TYPE_S32: 439 assert(!this->isNegative()); 440 // fall through 441 case TYPE_U8: 442 case TYPE_U16: 443 case TYPE_U32: 444 reg.data.u32 = util_logbase2(reg.data.u32); 445 break; 446 case TYPE_S64: 447 assert(!this->isNegative()); 448 // fall through 449 case TYPE_U64: 450 reg.data.u64 = util_logbase2_64(reg.data.u64); 451 break; 452 case TYPE_F32: 453 reg.data.f32 = log2f(reg.data.f32); 454 break; 455 case TYPE_F64: 456 reg.data.f64 = log2(reg.data.f64); 457 break; 458 default: 459 assert(0); 460 break; 461 } 462} 463 464bool 465ImmediateValue::compare(CondCode cc, float fval) const 466{ 467 if (reg.type != TYPE_F32) 468 ERROR("immediate value is not of type f32"); 469 470 switch (static_cast<CondCode>(cc & 7)) { 471 case CC_TR: return true; 472 case CC_FL: return false; 473 case CC_LT: return reg.data.f32 < fval; 474 case CC_LE: return reg.data.f32 <= fval; 475 case CC_GT: return reg.data.f32 > fval; 476 case CC_GE: return reg.data.f32 >= fval; 477 case CC_EQ: return reg.data.f32 == fval; 478 case CC_NE: return reg.data.f32 != fval; 479 default: 480 assert(0); 481 return false; 482 } 483} 484 485ImmediateValue& 486ImmediateValue::operator=(const ImmediateValue &that) 487{ 488 this->reg = that.reg; 489 return (*this); 490} 491 492bool 493Value::interfers(const Value *that) const 494{ 495 uint32_t idA, idB; 496 497 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 498 return false; 499 if (this->asImm()) 500 return false; 501 502 if (this->asSym()) { 503 idA = this->join->reg.data.offset; 504 idB = that->join->reg.data.offset; 505 } else { 506 idA = this->join->reg.data.id * MIN2(this->reg.size, 4); 507 idB = that->join->reg.data.id * MIN2(that->reg.size, 4); 508 } 509 510 if (idA < idB) 511 return (idA + this->reg.size > idB); 512 else 513 if (idA > idB) 514 return (idB + that->reg.size > idA); 515 else 516 return (idA == idB); 517} 518 519bool 520Value::equals(const Value *that, bool strict) const 521{ 522 if (strict) 523 return this == that; 524 525 if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) 526 return false; 527 if (that->reg.size != this->reg.size) 528 return false; 529 530 if (that->reg.data.id != this->reg.data.id) 531 return false; 532 533 return true; 534} 535 536bool 537ImmediateValue::equals(const Value *that, bool strict) const 538{ 539 const ImmediateValue *imm = that->asImm(); 540 if (!imm) 541 return false; 542 return reg.data.u64 == imm->reg.data.u64; 543} 544 545bool 546Symbol::equals(const Value *that, bool strict) const 547{ 548 if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) 549 return false; 550 assert(that->asSym()); 551 552 if (this->baseSym != that->asSym()->baseSym) 553 return false; 554 555 if (reg.file == FILE_SYSTEM_VALUE) 556 return (this->reg.data.sv.sv == that->reg.data.sv.sv && 557 this->reg.data.sv.index == that->reg.data.sv.index); 558 return this->reg.data.offset == that->reg.data.offset; 559} 560 561void Instruction::init() 562{ 563 next = prev = 0; 564 565 cc = CC_ALWAYS; 566 rnd = ROUND_N; 567 cache = CACHE_CA; 568 subOp = 0; 569 570 saturate = 0; 571 join = 0; 572 exit = 0; 573 terminator = 0; 574 ftz = 0; 575 dnz = 0; 576 perPatch = 0; 577 fixed = 0; 578 encSize = 0; 579 ipa = 0; 580 mask = 0; 581 precise = 0; 582 583 lanes = 0xf; 584 585 postFactor = 0; 586 587 predSrc = -1; 588 flagsDef = -1; 589 flagsSrc = -1; 590} 591 592Instruction::Instruction() 593{ 594 init(); 595 596 op = OP_NOP; 597 dType = sType = TYPE_F32; 598 599 id = -1; 600 bb = 0; 601} 602 603Instruction::Instruction(Function *fn, operation opr, DataType ty) 604{ 605 init(); 606 607 op = opr; 608 dType = sType = ty; 609 610 fn->add(this, id); 611} 612 613Instruction::~Instruction() 614{ 615 if (bb) { 616 Function *fn = bb->getFunction(); 617 bb->remove(this); 618 fn->allInsns.remove(id); 619 } 620 621 for (int s = 0; srcExists(s); ++s) 622 setSrc(s, NULL); 623 // must unlink defs too since the list pointers will get deallocated 624 for (int d = 0; defExists(d); ++d) 625 setDef(d, NULL); 626} 627 628void 629Instruction::setDef(int i, Value *val) 630{ 631 int size = defs.size(); 632 if (i >= size) { 633 defs.resize(i + 1); 634 while (size <= i) 635 defs[size++].setInsn(this); 636 } 637 defs[i].set(val); 638} 639 640void 641Instruction::setSrc(int s, Value *val) 642{ 643 int size = srcs.size(); 644 if (s >= size) { 645 srcs.resize(s + 1); 646 while (size <= s) 647 srcs[size++].setInsn(this); 648 } 649 srcs[s].set(val); 650} 651 652void 653Instruction::setSrc(int s, const ValueRef& ref) 654{ 655 setSrc(s, ref.get()); 656 srcs[s].mod = ref.mod; 657} 658 659void 660Instruction::swapSources(int a, int b) 661{ 662 Value *value = srcs[a].get(); 663 Modifier m = srcs[a].mod; 664 665 setSrc(a, srcs[b]); 666 667 srcs[b].set(value); 668 srcs[b].mod = m; 669} 670 671static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta) 672{ 673 if (index >= s) 674 index += delta; 675 else 676 if ((delta < 0) && (index >= (s + delta))) 677 index = -1; 678} 679 680// Moves sources [@s,last_source] by @delta. 681// If @delta < 0, sources [@s - abs(@delta), @s) are erased. 682void 683Instruction::moveSources(const int s, const int delta) 684{ 685 if (delta == 0) 686 return; 687 assert(s + delta >= 0); 688 689 int k; 690 691 for (k = 0; srcExists(k); ++k) { 692 for (int i = 0; i < 2; ++i) 693 moveSourcesAdjustIndex(src(k).indirect[i], s, delta); 694 } 695 moveSourcesAdjustIndex(predSrc, s, delta); 696 moveSourcesAdjustIndex(flagsSrc, s, delta); 697 if (asTex()) { 698 TexInstruction *tex = asTex(); 699 moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta); 700 moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta); 701 } 702 703 if (delta > 0) { 704 --k; 705 for (int p = k + delta; k >= s; --k, --p) 706 setSrc(p, src(k)); 707 } else { 708 int p; 709 for (p = s; p < k; ++p) 710 setSrc(p + delta, src(p)); 711 for (; (p + delta) < k; ++p) 712 setSrc(p + delta, NULL); 713 } 714} 715 716void 717Instruction::takeExtraSources(int s, Value *values[3]) 718{ 719 values[0] = getIndirect(s, 0); 720 if (values[0]) 721 setIndirect(s, 0, NULL); 722 723 values[1] = getIndirect(s, 1); 724 if (values[1]) 725 setIndirect(s, 1, NULL); 726 727 values[2] = getPredicate(); 728 if (values[2]) 729 setPredicate(cc, NULL); 730} 731 732void 733Instruction::putExtraSources(int s, Value *values[3]) 734{ 735 if (values[0]) 736 setIndirect(s, 0, values[0]); 737 if (values[1]) 738 setIndirect(s, 1, values[1]); 739 if (values[2]) 740 setPredicate(cc, values[2]); 741} 742 743Instruction * 744Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 745{ 746 if (!i) 747 i = new_Instruction(pol.context(), op, dType); 748#ifndef NDEBUG // non-conformant assert, so this is required 749 assert(typeid(*i) == typeid(*this)); 750#endif 751 752 pol.set<Instruction>(this, i); 753 754 i->sType = sType; 755 756 i->rnd = rnd; 757 i->cache = cache; 758 i->subOp = subOp; 759 760 i->saturate = saturate; 761 i->join = join; 762 i->exit = exit; 763 i->mask = mask; 764 i->ftz = ftz; 765 i->dnz = dnz; 766 i->ipa = ipa; 767 i->lanes = lanes; 768 i->perPatch = perPatch; 769 770 i->postFactor = postFactor; 771 772 for (int d = 0; defExists(d); ++d) 773 i->setDef(d, pol.get(getDef(d))); 774 775 for (int s = 0; srcExists(s); ++s) { 776 i->setSrc(s, pol.get(getSrc(s))); 777 i->src(s).mod = src(s).mod; 778 } 779 780 i->cc = cc; 781 i->predSrc = predSrc; 782 i->flagsDef = flagsDef; 783 i->flagsSrc = flagsSrc; 784 785 return i; 786} 787 788unsigned int 789Instruction::defCount(unsigned int mask, bool singleFile) const 790{ 791 unsigned int i, n; 792 793 if (singleFile) { 794 unsigned int d = ffs(mask); 795 if (!d) 796 return 0; 797 for (i = d--; defExists(i); ++i) 798 if (getDef(i)->reg.file != getDef(d)->reg.file) 799 mask &= ~(1 << i); 800 } 801 802 for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) 803 n += mask & 1; 804 return n; 805} 806 807unsigned int 808Instruction::srcCount(unsigned int mask, bool singleFile) const 809{ 810 unsigned int i, n; 811 812 if (singleFile) { 813 unsigned int s = ffs(mask); 814 if (!s) 815 return 0; 816 for (i = s--; srcExists(i); ++i) 817 if (getSrc(i)->reg.file != getSrc(s)->reg.file) 818 mask &= ~(1 << i); 819 } 820 821 for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) 822 n += mask & 1; 823 return n; 824} 825 826bool 827Instruction::setIndirect(int s, int dim, Value *value) 828{ 829 assert(this->srcExists(s)); 830 831 int p = srcs[s].indirect[dim]; 832 if (p < 0) { 833 if (!value) 834 return true; 835 p = srcs.size(); 836 while (p > 0 && !srcExists(p - 1)) 837 --p; 838 } 839 setSrc(p, value); 840 srcs[p].usedAsPtr = (value != 0); 841 srcs[s].indirect[dim] = value ? p : -1; 842 return true; 843} 844 845bool 846Instruction::setPredicate(CondCode ccode, Value *value) 847{ 848 cc = ccode; 849 850 if (!value) { 851 if (predSrc >= 0) { 852 srcs[predSrc].set(NULL); 853 predSrc = -1; 854 } 855 return true; 856 } 857 858 if (predSrc < 0) { 859 predSrc = srcs.size(); 860 while (predSrc > 0 && !srcExists(predSrc - 1)) 861 --predSrc; 862 } 863 864 setSrc(predSrc, value); 865 return true; 866} 867 868bool 869Instruction::writesPredicate() const 870{ 871 for (int d = 0; defExists(d); ++d) 872 if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) 873 return true; 874 return false; 875} 876 877bool 878Instruction::canCommuteDefSrc(const Instruction *i) const 879{ 880 for (int d = 0; defExists(d); ++d) 881 for (int s = 0; i->srcExists(s); ++s) 882 if (getDef(d)->interfers(i->getSrc(s))) 883 return false; 884 return true; 885} 886 887bool 888Instruction::canCommuteDefDef(const Instruction *i) const 889{ 890 for (int d = 0; defExists(d); ++d) 891 for (int c = 0; i->defExists(c); ++c) 892 if (getDef(d)->interfers(i->getDef(c))) 893 return false; 894 return true; 895} 896 897bool 898Instruction::isCommutationLegal(const Instruction *i) const 899{ 900 return canCommuteDefDef(i) && 901 canCommuteDefSrc(i) && 902 i->canCommuteDefSrc(this); 903} 904 905TexInstruction::TexInstruction(Function *fn, operation op) 906 : Instruction(fn, op, TYPE_F32) 907{ 908 memset(&tex, 0, sizeof(tex)); 909 910 tex.rIndirectSrc = -1; 911 tex.sIndirectSrc = -1; 912 913 if (op == OP_TXF) 914 sType = TYPE_U32; 915} 916 917TexInstruction::~TexInstruction() 918{ 919 for (int c = 0; c < 3; ++c) { 920 dPdx[c].set(NULL); 921 dPdy[c].set(NULL); 922 } 923 for (int n = 0; n < 4; ++n) 924 for (int c = 0; c < 3; ++c) 925 offset[n][c].set(NULL); 926} 927 928TexInstruction * 929TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 930{ 931 TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : 932 new_TexInstruction(pol.context(), op)); 933 934 Instruction::clone(pol, tex); 935 936 tex->tex = this->tex; 937 938 if (op == OP_TXD) { 939 for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { 940 tex->dPdx[c].set(dPdx[c]); 941 tex->dPdy[c].set(dPdy[c]); 942 } 943 } 944 945 for (int n = 0; n < tex->tex.useOffsets; ++n) 946 for (int c = 0; c < 3; ++c) 947 tex->offset[n][c].set(offset[n][c]); 948 949 return tex; 950} 951 952const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = 953{ 954 { "1D", 1, 1, false, false, false }, 955 { "2D", 2, 2, false, false, false }, 956 { "2D_MS", 2, 3, false, false, false }, 957 { "3D", 3, 3, false, false, false }, 958 { "CUBE", 2, 3, false, true, false }, 959 { "1D_SHADOW", 1, 1, false, false, true }, 960 { "2D_SHADOW", 2, 2, false, false, true }, 961 { "CUBE_SHADOW", 2, 3, false, true, true }, 962 { "1D_ARRAY", 1, 2, true, false, false }, 963 { "2D_ARRAY", 2, 3, true, false, false }, 964 { "2D_MS_ARRAY", 2, 4, true, false, false }, 965 { "CUBE_ARRAY", 2, 4, true, true, false }, 966 { "1D_ARRAY_SHADOW", 1, 2, true, false, true }, 967 { "2D_ARRAY_SHADOW", 2, 3, true, false, true }, 968 { "RECT", 2, 2, false, false, false }, 969 { "RECT_SHADOW", 2, 2, false, false, true }, 970 { "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, 971 { "BUFFER", 1, 1, false, false, false }, 972}; 973 974const struct TexInstruction::ImgFormatDesc TexInstruction::formatTable[] = 975{ 976 { "NONE", 0, { 0, 0, 0, 0 }, UINT }, 977 978 { "RGBA32F", 4, { 32, 32, 32, 32 }, FLOAT }, 979 { "RGBA16F", 4, { 16, 16, 16, 16 }, FLOAT }, 980 { "RG32F", 2, { 32, 32, 0, 0 }, FLOAT }, 981 { "RG16F", 2, { 16, 16, 0, 0 }, FLOAT }, 982 { "R11G11B10F", 3, { 11, 11, 10, 0 }, FLOAT }, 983 { "R32F", 1, { 32, 0, 0, 0 }, FLOAT }, 984 { "R16F", 1, { 16, 0, 0, 0 }, FLOAT }, 985 986 { "RGBA32UI", 4, { 32, 32, 32, 32 }, UINT }, 987 { "RGBA16UI", 4, { 16, 16, 16, 16 }, UINT }, 988 { "RGB10A2UI", 4, { 10, 10, 10, 2 }, UINT }, 989 { "RGBA8UI", 4, { 8, 8, 8, 8 }, UINT }, 990 { "RG32UI", 2, { 32, 32, 0, 0 }, UINT }, 991 { "RG16UI", 2, { 16, 16, 0, 0 }, UINT }, 992 { "RG8UI", 2, { 8, 8, 0, 0 }, UINT }, 993 { "R32UI", 1, { 32, 0, 0, 0 }, UINT }, 994 { "R16UI", 1, { 16, 0, 0, 0 }, UINT }, 995 { "R8UI", 1, { 8, 0, 0, 0 }, UINT }, 996 997 { "RGBA32I", 4, { 32, 32, 32, 32 }, SINT }, 998 { "RGBA16I", 4, { 16, 16, 16, 16 }, SINT }, 999 { "RGBA8I", 4, { 8, 8, 8, 8 }, SINT }, 1000 { "RG32I", 2, { 32, 32, 0, 0 }, SINT }, 1001 { "RG16I", 2, { 16, 16, 0, 0 }, SINT }, 1002 { "RG8I", 2, { 8, 8, 0, 0 }, SINT }, 1003 { "R32I", 1, { 32, 0, 0, 0 }, SINT }, 1004 { "R16I", 1, { 16, 0, 0, 0 }, SINT }, 1005 { "R8I", 1, { 8, 0, 0, 0 }, SINT }, 1006 1007 { "RGBA16", 4, { 16, 16, 16, 16 }, UNORM }, 1008 { "RGB10A2", 4, { 10, 10, 10, 2 }, UNORM }, 1009 { "RGBA8", 4, { 8, 8, 8, 8 }, UNORM }, 1010 { "RG16", 2, { 16, 16, 0, 0 }, UNORM }, 1011 { "RG8", 2, { 8, 8, 0, 0 }, UNORM }, 1012 { "R16", 1, { 16, 0, 0, 0 }, UNORM }, 1013 { "R8", 1, { 8, 0, 0, 0 }, UNORM }, 1014 1015 { "RGBA16_SNORM", 4, { 16, 16, 16, 16 }, SNORM }, 1016 { "RGBA8_SNORM", 4, { 8, 8, 8, 8 }, SNORM }, 1017 { "RG16_SNORM", 2, { 16, 16, 0, 0 }, SNORM }, 1018 { "RG8_SNORM", 2, { 8, 8, 0, 0 }, SNORM }, 1019 { "R16_SNORM", 1, { 16, 0, 0, 0 }, SNORM }, 1020 { "R8_SNORM", 1, { 8, 0, 0, 0 }, SNORM }, 1021 1022 { "BGRA8", 4, { 8, 8, 8, 8 }, UNORM, true }, 1023}; 1024 1025void 1026TexInstruction::setIndirectR(Value *v) 1027{ 1028 int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc; 1029 if (p >= 0) { 1030 tex.rIndirectSrc = p; 1031 setSrc(p, v); 1032 srcs[p].usedAsPtr = !!v; 1033 } 1034} 1035 1036void 1037TexInstruction::setIndirectS(Value *v) 1038{ 1039 int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc; 1040 if (p >= 0) { 1041 tex.sIndirectSrc = p; 1042 setSrc(p, v); 1043 srcs[p].usedAsPtr = !!v; 1044 } 1045} 1046 1047CmpInstruction::CmpInstruction(Function *fn, operation op) 1048 : Instruction(fn, op, TYPE_F32) 1049{ 1050 setCond = CC_ALWAYS; 1051} 1052 1053CmpInstruction * 1054CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1055{ 1056 CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : 1057 new_CmpInstruction(pol.context(), op)); 1058 cmp->dType = dType; 1059 Instruction::clone(pol, cmp); 1060 cmp->setCond = setCond; 1061 return cmp; 1062} 1063 1064FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) 1065 : Instruction(fn, op, TYPE_NONE) 1066{ 1067 if (op == OP_CALL) 1068 target.fn = reinterpret_cast<Function *>(targ); 1069 else 1070 target.bb = reinterpret_cast<BasicBlock *>(targ); 1071 1072 if (op == OP_BRA || 1073 op == OP_CONT || op == OP_BREAK || 1074 op == OP_RET || op == OP_EXIT) 1075 terminator = 1; 1076 else 1077 if (op == OP_JOIN) 1078 terminator = targ ? 1 : 0; 1079 1080 allWarp = absolute = limit = builtin = indirect = 0; 1081} 1082 1083FlowInstruction * 1084FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const 1085{ 1086 FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : 1087 new_FlowInstruction(pol.context(), op, NULL)); 1088 1089 Instruction::clone(pol, flow); 1090 flow->allWarp = allWarp; 1091 flow->absolute = absolute; 1092 flow->limit = limit; 1093 flow->builtin = builtin; 1094 1095 if (builtin) 1096 flow->target.builtin = target.builtin; 1097 else 1098 if (op == OP_CALL) 1099 flow->target.fn = target.fn; 1100 else 1101 if (target.bb) 1102 flow->target.bb = pol.get<BasicBlock>(target.bb); 1103 1104 return flow; 1105} 1106 1107Program::Program(Type type, Target *arch) 1108 : progType(type), 1109 target(arch), 1110 mem_Instruction(sizeof(Instruction), 6), 1111 mem_CmpInstruction(sizeof(CmpInstruction), 4), 1112 mem_TexInstruction(sizeof(TexInstruction), 4), 1113 mem_FlowInstruction(sizeof(FlowInstruction), 4), 1114 mem_LValue(sizeof(LValue), 8), 1115 mem_Symbol(sizeof(Symbol), 7), 1116 mem_ImmediateValue(sizeof(ImmediateValue), 7) 1117{ 1118 code = NULL; 1119 binSize = 0; 1120 1121 maxGPR = -1; 1122 fp64 = false; 1123 1124 main = new Function(this, "MAIN", ~0); 1125 calls.insert(&main->call); 1126 1127 dbgFlags = 0; 1128 optLevel = 0; 1129 1130 targetPriv = NULL; 1131} 1132 1133Program::~Program() 1134{ 1135 for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) 1136 delete reinterpret_cast<Function *>(it.get()); 1137 1138 for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) 1139 releaseValue(reinterpret_cast<Value *>(it.get())); 1140} 1141 1142void Program::releaseInstruction(Instruction *insn) 1143{ 1144 // TODO: make this not suck so much 1145 1146 insn->~Instruction(); 1147 1148 if (insn->asCmp()) 1149 mem_CmpInstruction.release(insn); 1150 else 1151 if (insn->asTex()) 1152 mem_TexInstruction.release(insn); 1153 else 1154 if (insn->asFlow()) 1155 mem_FlowInstruction.release(insn); 1156 else 1157 mem_Instruction.release(insn); 1158} 1159 1160void Program::releaseValue(Value *value) 1161{ 1162 value->~Value(); 1163 1164 if (value->asLValue()) 1165 mem_LValue.release(value); 1166 else 1167 if (value->asImm()) 1168 mem_ImmediateValue.release(value); 1169 else 1170 if (value->asSym()) 1171 mem_Symbol.release(value); 1172} 1173 1174 1175} // namespace nv50_ir 1176 1177extern "C" { 1178 1179static void 1180nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) 1181{ 1182 if (info->type == PIPE_SHADER_TESS_CTRL || info->type == PIPE_SHADER_TESS_EVAL) { 1183 info->prop.tp.domain = PIPE_PRIM_MAX; 1184 info->prop.tp.outputPrim = PIPE_PRIM_MAX; 1185 } 1186 if (info->type == PIPE_SHADER_GEOMETRY) { 1187 info->prop.gp.instanceCount = 1; 1188 info->prop.gp.maxVertices = 1; 1189 } 1190 if (info->type == PIPE_SHADER_COMPUTE) { 1191 info->prop.cp.numThreads[0] = 1192 info->prop.cp.numThreads[1] = 1193 info->prop.cp.numThreads[2] = 1; 1194 } 1195 info->io.pointSize = 0xff; 1196 info->io.instanceId = 0xff; 1197 info->io.vertexId = 0xff; 1198 info->io.edgeFlagIn = 0xff; 1199 info->io.edgeFlagOut = 0xff; 1200 info->io.fragDepth = 0xff; 1201 info->io.sampleMask = 0xff; 1202 info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; 1203} 1204 1205int 1206nv50_ir_generate_code(struct nv50_ir_prog_info *info) 1207{ 1208 int ret = 0; 1209 1210 nv50_ir::Program::Type type; 1211 1212 nv50_ir_init_prog_info(info); 1213 1214#define PROG_TYPE_CASE(a, b) \ 1215 case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break 1216 1217 switch (info->type) { 1218 PROG_TYPE_CASE(VERTEX, VERTEX); 1219 PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL); 1220 PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL); 1221 PROG_TYPE_CASE(GEOMETRY, GEOMETRY); 1222 PROG_TYPE_CASE(FRAGMENT, FRAGMENT); 1223 PROG_TYPE_CASE(COMPUTE, COMPUTE); 1224 default: 1225 INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", info->type); 1226 return -1; 1227 } 1228 INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); 1229 1230 nv50_ir::Target *targ = nv50_ir::Target::create(info->target); 1231 if (!targ) 1232 return -1; 1233 1234 nv50_ir::Program *prog = new nv50_ir::Program(type, targ); 1235 if (!prog) { 1236 nv50_ir::Target::destroy(targ); 1237 return -1; 1238 } 1239 prog->driver = info; 1240 prog->dbgFlags = info->dbgFlags; 1241 prog->optLevel = info->optLevel; 1242 1243 switch (info->bin.sourceRep) { 1244 case PIPE_SHADER_IR_NIR: 1245 ret = prog->makeFromNIR(info) ? 0 : -2; 1246 break; 1247 case PIPE_SHADER_IR_TGSI: 1248 ret = prog->makeFromTGSI(info) ? 0 : -2; 1249 break; 1250 default: 1251 ret = -1; 1252 break; 1253 } 1254 if (ret < 0) 1255 goto out; 1256 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1257 prog->print(); 1258 1259 targ->parseDriverInfo(info); 1260 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); 1261 1262 prog->convertToSSA(); 1263 1264 if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) 1265 prog->print(); 1266 1267 prog->optimizeSSA(info->optLevel); 1268 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); 1269 1270 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 1271 prog->print(); 1272 1273 if (!prog->registerAllocation()) { 1274 ret = -4; 1275 goto out; 1276 } 1277 prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); 1278 1279 prog->optimizePostRA(info->optLevel); 1280 1281 if (!prog->emitBinary(info)) { 1282 ret = -5; 1283 goto out; 1284 } 1285 1286out: 1287 INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); 1288 1289 info->bin.maxGPR = prog->maxGPR; 1290 info->bin.code = prog->code; 1291 info->bin.codeSize = prog->binSize; 1292 info->bin.tlsSpace = prog->tlsSize; 1293 1294 delete prog; 1295 nv50_ir::Target::destroy(targ); 1296 1297 return ret; 1298} 1299 1300} // extern "C" 1301