1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#define VT_DEBUG 0 28 29#if VT_DEBUG 30#define VT_DUMP(q) do { q } while (0) 31#else 32#define VT_DUMP(q) 33#endif 34 35#include <cstring> 36 37#include "sb_shader.h" 38#include "sb_pass.h" 39 40namespace r600_sb { 41 42static const char * chans = "xyzw01?_"; 43 44sb_ostream& operator << (sb_ostream &o, value &v) { 45 46 bool dead = v.flags & VLF_DEAD; 47 48 if (dead) 49 o << "{"; 50 51 switch (v.kind) { 52 case VLK_SPECIAL_REG: { 53 switch (v.select.sel()) { 54 case SV_AR_INDEX: o << "AR"; break; 55 case SV_ALU_PRED: o << "PR"; break; 56 case SV_EXEC_MASK: o << "EM"; break; 57 case SV_VALID_MASK: o << "VM"; break; 58 case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break; 59 case SV_LDS_RW: o << "LDS_RW"; break; 60 case SV_LDS_OQA: o << "LDS_OQA"; break; 61 case SV_LDS_OQB: o << "LDS_OQB"; break; 62 case SV_SCRATCH: o << "SCRATCH"; break; 63 default: o << "???specialreg"; break; 64 } 65 break; 66 } 67 68 case VLK_REG: 69 o << "R" << v.select.sel() << "." 70 << chans[v.select.chan()]; 71 72 break; 73 case VLK_KCACHE: { 74 o << "C" << v.select.sel() << "." << chans[v.select.chan()]; 75 } 76 break; 77 case VLK_CONST: 78 o << v.literal_value.f << "|"; 79 o.print_zw_hex(v.literal_value.u, 8); 80 break; 81 case VLK_PARAM: 82 o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET) 83 << chans[v.select.chan()]; 84 break; 85 case VLK_TEMP: 86 o << "t" << v.select.sel() - shader::temp_regid_offset; 87 break; 88 case VLK_REL_REG: 89 90 o << "A" << v.select; 91 o << "["; 92 o << *v.rel; 93 o << "]"; 94 95 o << "_" << v.uid; 96 97 break; 98 case VLK_UNDEF: 99 o << "undef"; 100 break; 101 default: 102 o << v.kind << "?????"; 103 break; 104 } 105 106 if (v.version) 107 o << "." << v.version; 108 109 if (dead) 110 o << "}"; 111 112 if (v.is_global()) 113 o << "||"; 114 if (v.is_fixed()) 115 o << "F"; 116 if (v.is_prealloc()) 117 o << "P"; 118 119 sel_chan g; 120 121 if (v.is_rel()) { 122 g = v.array->gpr; 123 } else { 124 g = v.gpr; 125 } 126 127 if (g) { 128 o << "@R" << g.sel() << "." << chans[g.chan()]; 129 } 130 131 return o; 132} 133 134void value_table::add_value(value* v) { 135 136 if (v->gvn_source) { 137 return; 138 } 139 140 VT_DUMP( 141 sblog << "gvn add_value "; 142 dump::dump_val(v); 143 ); 144 145 value_hash hash = v->hash(); 146 vt_item & vti = hashtable[hash & size_mask]; 147 vti.push_back(v); 148 ++cnt; 149 150 if (v->def && ex.try_fold(v)) { 151 VT_DUMP( 152 sblog << " folded: "; 153 dump::dump_val(v->gvn_source); 154 sblog << "\n"; 155 ); 156 return; 157 } 158 159 int n = 0; 160 for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) { 161 value *c = *I; 162 163 if (c == v) 164 break; 165 166 if (expr_equal(c, v)) { 167 v->gvn_source = c->gvn_source; 168 169 VT_DUMP( 170 sblog << " found : equal to "; 171 dump::dump_val(v->gvn_source); 172 sblog << "\n"; 173 ); 174 return; 175 } 176 } 177 178 v->gvn_source = v; 179 VT_DUMP( 180 sblog << " added new\n"; 181 ); 182} 183 184value_hash value::hash() { 185 if (ghash) 186 return ghash; 187 if (is_rel()) 188 ghash = rel_hash(); 189 else if (def) 190 ghash = def->hash(); 191 else 192 ghash = ((uintptr_t)this) | 1; 193 194 return ghash; 195} 196 197value_hash value::rel_hash() { 198 value_hash h = rel ? rel->hash() : 0; 199 h |= select << 10; 200 h |= array->hash(); 201 return h; 202} 203 204bool value_table::expr_equal(value* l, value* r) { 205 return ex.equal(l, r); 206} 207 208void value_table::get_values(vvec& v) { 209 v.resize(cnt); 210 211 vvec::iterator T = v.begin(); 212 213 for(vt_table::iterator I = hashtable.begin(), E = hashtable.end(); 214 I != E; ++I) { 215 T = std::copy(I->begin(), I->end(), T); 216 } 217} 218 219void value::add_use(node* n) { 220 if (0) { 221 sblog << "add_use "; 222 dump::dump_val(this); 223 sblog << " => "; 224 dump::dump_op(n); 225 } 226 uses.push_back(n); 227} 228 229struct use_node_comp { 230 explicit use_node_comp(const node *n) : n(n) {} 231 bool operator() (const node *o) { 232 return o->hash() == n->hash(); 233 } 234 235 private: 236 const node *n; 237}; 238 239void value::remove_use(const node *n) { 240 uselist::iterator it = 241 std::find_if(uses.begin(), uses.end(), use_node_comp(n)); 242 243 if (it != uses.end()) 244 { 245 // We only ever had a pointer, so don't delete it here 246 uses.erase(it); 247 } 248} 249 250unsigned value::use_count() { 251 return uses.size(); 252} 253 254bool value::is_global() { 255 if (chunk) 256 return chunk->is_global(); 257 return flags & VLF_GLOBAL; 258} 259 260void value::set_global() { 261 assert(is_sgpr()); 262 flags |= VLF_GLOBAL; 263 if (chunk) 264 chunk->set_global(); 265} 266 267void value::set_prealloc() { 268 assert(is_sgpr()); 269 flags |= VLF_PREALLOC; 270 if (chunk) 271 chunk->set_prealloc(); 272} 273 274bool value::is_fixed() { 275 if (array && array->gpr) 276 return true; 277 if (chunk && chunk->is_fixed()) 278 return true; 279 return flags & VLF_FIXED; 280} 281 282void value::fix() { 283 if (chunk) 284 chunk->fix(); 285 flags |= VLF_FIXED; 286} 287 288bool value::is_prealloc() { 289 if (chunk) 290 return chunk->is_prealloc(); 291 return flags & VLF_PREALLOC; 292} 293 294void value::delete_uses() { 295 // We only ever had pointers, so don't delete them here 296 uses.erase(uses.begin(), uses.end()); 297} 298 299bool value::no_reladdr_conflict_with(value *src) 300{ 301 /* if the register is not relative, it can't create an relative access conflict */ 302 if (!src->is_rel()) 303 return true; 304 305 /* If the destination is AR then we accept the copy propagation, because the 306 * scheduler actually re-creates the address loading operation and will 307 * signal interference if there is an address register load and it will fail 308 * because of this. 309 */ 310 if (gvalue()->is_AR()) 311 return true; 312 313 /* For all nodes that use this value test whether the operation uses another 314 * relative access with a different address value. If found, signal conflict. 315 */ 316 for (uselist::const_iterator N = uses.begin(); N != uses.end(); ++N) { 317 for (vvec::const_iterator V = (*N)->src.begin(); V != (*N)->src.end(); ++V) { 318 if (*V) { 319 value *v = (*V)->gvalue(); 320 if (v != src && v->is_rel() && v->rel != src->rel) 321 return false; 322 } 323 } 324 for (vvec::const_iterator V = (*N)->dst.begin(); V != (*N)->dst.end(); ++V) { 325 if (*V) { 326 value *v = (*V)->gvalue(); 327 if (v && v != src && v->is_rel() && (v->rel != src->rel)) 328 return false; 329 } 330 } 331 } 332 return true; 333} 334 335void ra_constraint::update_values() { 336 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) { 337 assert(!(*I)->constraint); 338 (*I)->constraint = this; 339 } 340} 341 342void* sb_pool::allocate(unsigned sz) { 343 sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1); 344 assert (sz < (block_size >> 6) && "too big allocation size for sb_pool"); 345 346 unsigned offset = total_size % block_size; 347 unsigned capacity = block_size * blocks.size(); 348 349 if (total_size + sz > capacity) { 350 total_size = capacity; 351 void * nb = malloc(block_size); 352 blocks.push_back(nb); 353 offset = 0; 354 } 355 356 total_size += sz; 357 return ((char*)blocks.back() + offset); 358} 359 360void sb_pool::free_all() { 361 for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E; 362 ++I) { 363 free(*I); 364 } 365} 366 367value* sb_value_pool::create(value_kind k, sel_chan regid, 368 unsigned ver) { 369 void* np = allocate(aligned_elt_size); 370 value *v = new (np) value(size(), k, regid, ver); 371 return v; 372} 373 374void sb_value_pool::delete_all() { 375 unsigned bcnt = blocks.size(); 376 unsigned toffset = 0; 377 for (unsigned b = 0; b < bcnt; ++b) { 378 char *bstart = (char*)blocks[b]; 379 for (unsigned offset = 0; offset < block_size; 380 offset += aligned_elt_size) { 381 ((value*)(bstart + offset))->~value(); 382 toffset += aligned_elt_size; 383 if (toffset >= total_size) 384 return; 385 } 386 } 387} 388 389bool sb_bitset::get(unsigned id) { 390 assert(id < bit_size); 391 unsigned w = id / bt_bits; 392 unsigned b = id % bt_bits; 393 return (data[w] >> b) & 1; 394} 395 396void sb_bitset::set(unsigned id, bool bit) { 397 assert(id < bit_size); 398 unsigned w = id / bt_bits; 399 unsigned b = id % bt_bits; 400 if (w >= data.size()) 401 data.resize(w + 1); 402 403 if (bit) 404 data[w] |= (1 << b); 405 else 406 data[w] &= ~(1 << b); 407} 408 409inline bool sb_bitset::set_chk(unsigned id, bool bit) { 410 assert(id < bit_size); 411 unsigned w = id / bt_bits; 412 unsigned b = id % bt_bits; 413 basetype d = data[w]; 414 basetype dn = (d & ~(1 << b)) | (bit << b); 415 bool r = (d != dn); 416 data[w] = r ? dn : data[w]; 417 return r; 418} 419 420void sb_bitset::clear() { 421 std::fill(data.begin(), data.end(), 0); 422} 423 424void sb_bitset::resize(unsigned size) { 425 unsigned cur_data_size = data.size(); 426 unsigned new_data_size = (size + bt_bits - 1) / bt_bits; 427 428 429 if (new_data_size != cur_data_size) 430 data.resize(new_data_size); 431 432 // make sure that new bits in the existing word are cleared 433 if (cur_data_size && size > bit_size && bit_size % bt_bits) { 434 basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits); 435 data[cur_data_size - 1] &= ~clear_mask; 436 } 437 438 bit_size = size; 439} 440 441unsigned sb_bitset::find_bit(unsigned start) { 442 assert(start < bit_size); 443 unsigned w = start / bt_bits; 444 unsigned b = start % bt_bits; 445 unsigned sz = data.size(); 446 447 while (w < sz) { 448 basetype d = data[w] >> b; 449 if (d != 0) { 450 unsigned pos = __builtin_ctz(d) + b + w * bt_bits; 451 return pos; 452 } 453 454 b = 0; 455 ++w; 456 } 457 458 return bit_size; 459} 460 461sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb) 462 : vp(sh.get_value_pool()), s(s), nb(nb) {} 463 464bool sb_value_set::add_set_checked(sb_value_set& s2) { 465 if (bs.size() < s2.bs.size()) 466 bs.resize(s2.bs.size()); 467 sb_bitset nbs = bs | s2.bs; 468 if (bs != nbs) { 469 bs.swap(nbs); 470 return true; 471 } 472 return false; 473} 474 475void r600_sb::sb_value_set::remove_set(sb_value_set& s2) { 476 bs.mask(s2.bs); 477} 478 479bool sb_value_set::add_val(value* v) { 480 assert(v); 481 if (bs.size() < v->uid) 482 bs.resize(v->uid + 32); 483 484 return bs.set_chk(v->uid - 1, 1); 485} 486 487bool sb_value_set::remove_vec(vvec& vv) { 488 bool modified = false; 489 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 490 if (*I) 491 modified |= remove_val(*I); 492 } 493 return modified; 494} 495 496void sb_value_set::clear() { 497 bs.clear(); 498} 499 500bool sb_value_set::remove_val(value* v) { 501 assert(v); 502 if (bs.size() < v->uid) 503 return false; 504 return bs.set_chk(v->uid - 1, 0); 505} 506 507bool r600_sb::sb_value_set::add_vec(vvec& vv) { 508 bool modified = false; 509 for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) { 510 value *v = *I; 511 if (v) 512 modified |= add_val(v); 513 } 514 return modified; 515} 516 517bool r600_sb::sb_value_set::contains(value* v) { 518 unsigned b = v->uid - 1; 519 if (b < bs.size()) 520 return bs.get(b); 521 else 522 return false; 523} 524 525bool sb_value_set::empty() { 526 return bs.size() == 0 || bs.find_bit(0) == bs.size(); 527} 528 529void sb_bitset::swap(sb_bitset& bs2) { 530 std::swap(data, bs2.data); 531 std::swap(bit_size, bs2.bit_size); 532} 533 534bool sb_bitset::operator ==(const sb_bitset& bs2) { 535 if (bit_size != bs2.bit_size) 536 return false; 537 538 for (unsigned i = 0, c = data.size(); i < c; ++i) { 539 if (data[i] != bs2.data[i]) 540 return false; 541 } 542 return true; 543} 544 545sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) { 546 if (bit_size > bs2.bit_size) { 547 resize(bs2.bit_size); 548 } 549 550 for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c; 551 ++i) { 552 data[i] &= bs2.data[i]; 553 } 554 return *this; 555} 556 557sb_bitset& sb_bitset::mask(const sb_bitset& bs2) { 558 if (bit_size < bs2.bit_size) { 559 resize(bs2.bit_size); 560 } 561 562 for (unsigned i = 0, c = data.size(); i < c; 563 ++i) { 564 data[i] &= ~bs2.data[i]; 565 } 566 return *this; 567} 568 569bool ra_constraint::check() { 570 assert(kind == CK_SAME_REG); 571 572 unsigned reg = 0; 573 574 for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) { 575 value *v = *I; 576 if (!v) 577 continue; 578 579 if (!v->gpr) 580 return false; 581 582 if (reg == 0) 583 reg = v->gpr.sel() + 1; 584 else if (reg != v->gpr.sel() + 1) 585 return false; 586 587 if (v->is_chan_pinned()) { 588 if (v->pin_gpr.chan() != v->gpr.chan()) 589 return false; 590 } 591 } 592 return true; 593} 594 595bool gpr_array::is_dead() { 596 return false; 597} 598 599} // namespace r600_sb 600