1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#include <cmath> 28 29#include "sb_shader.h" 30 31namespace r600_sb { 32 33value* get_select_value_for_em(shader& sh, value* em) { 34 if (!em->def) 35 return NULL; 36 37 node *predset = em->def; 38 if (!predset->is_pred_set()) 39 return NULL; 40 41 alu_node *s = sh.clone(static_cast<alu_node*>(predset)); 42 convert_predset_to_set(sh, s); 43 44 predset->insert_after(s); 45 46 value* &d0 = s->dst[0]; 47 d0 = sh.create_temp_value(); 48 d0->def = s; 49 return d0; 50} 51 52void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) { 53 n.src.resize(1); 54 n.src[0] = src; 55 n.bc.src[0].abs = abs; 56 n.bc.src[0].neg = neg; 57 n.bc.set_op(ALU_OP1_MOV); 58} 59 60expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {} 61 62value * expr_handler::get_const(const literal &l) { 63 value *v = sh.get_const_value(l); 64 if (!v->gvn_source) 65 vt.add_value(v); 66 return v; 67} 68 69void expr_handler::assign_source(value *dst, value *src) { 70 dst->gvn_source = src->gvn_source; 71} 72 73bool expr_handler::equal(value *l, value *r) { 74 75 assert(l != r); 76 77 if (l->is_lds_access() || r->is_lds_access()) 78 return false; 79 if (l->gvalue() == r->gvalue()) 80 return true; 81 82 if (l->def && r->def) 83 return defs_equal(l, r); 84 85 if (l->is_rel() && r->is_rel()) 86 return ivars_equal(l, r); 87 88 return false; 89} 90 91bool expr_handler::ivars_equal(value* l, value* r) { 92 if (l->rel->gvalue() == r->rel->gvalue() 93 && l->select == r->select) { 94 95 vvec &lv = l->mdef.empty() ? l->muse : l->mdef; 96 vvec &rv = r->mdef.empty() ? r->muse : r->mdef; 97 98 // FIXME: replace this with more precise aliasing test 99 return lv == rv; 100 } 101 return false; 102} 103 104bool expr_handler::defs_equal(value* l, value* r) { 105 106 node *d1 = l->def; 107 node *d2 = r->def; 108 109 if (d1->type != d2->type || d1->subtype != d2->subtype) 110 return false; 111 112 if (d1->is_pred_set() || d2->is_pred_set()) 113 return false; 114 115 if (d1->type == NT_OP) { 116 switch (d1->subtype) { 117 case NST_ALU_INST: 118 return ops_equal( 119 static_cast<alu_node*>(d1), 120 static_cast<alu_node*>(d2)); 121// case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1), 122// static_cast<fetch_node*>(d2); 123// case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1), 124// static_cast<cf_node*>(d2); 125 default: 126 break; 127 } 128 } 129 return false; 130} 131 132bool expr_handler::try_fold(value* v) { 133 assert(!v->gvn_source); 134 135 if (v->def) 136 try_fold(v->def); 137 138 if (v->gvn_source) 139 return true; 140 141 return false; 142} 143 144bool expr_handler::try_fold(node* n) { 145 return n->fold_dispatch(this); 146} 147 148bool expr_handler::fold(node& n) { 149 if (n.subtype == NST_PHI) { 150 151 value *s = n.src[0]; 152 153 // FIXME disabling phi folding for registers for now, otherwise we lose 154 // control flow information in some cases 155 // (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test) 156 // probably control flow transformation is required to enable it 157 if (s->is_sgpr()) 158 return false; 159 160 for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) { 161 value *v = *I; 162 if (!s->v_equal(v)) 163 return false; 164 } 165 166 assign_source(n.dst[0], s); 167 } else { 168 assert(n.subtype == NST_PSI); 169 assert(n.src.size() >= 6); 170 171 value *s = n.src[2]; 172 assert(s->gvn_source); 173 174 for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) { 175 value *v = *(I+2); 176 if (!s->v_equal(v)) 177 return false; 178 } 179 assign_source(n.dst[0], s); 180 } 181 return true; 182} 183 184bool expr_handler::fold(container_node& n) { 185 return false; 186} 187 188bool expr_handler::fold_setcc(alu_node &n) { 189 190 value* v0 = n.src[0]->gvalue(); 191 value* v1 = n.src[1]->gvalue(); 192 193 assert(v0 && v1 && n.dst[0]); 194 195 unsigned flags = n.bc.op_ptr->flags; 196 unsigned cc = flags & AF_CC_MASK; 197 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 198 unsigned dst_type = flags & AF_DST_TYPE_MASK; 199 200 bool cond_result; 201 bool have_result = false; 202 203 bool isc0 = v0->is_const(); 204 bool isc1 = v1->is_const(); 205 206 literal dv, cv0, cv1; 207 208 if (isc0) { 209 cv0 = v0->get_const_value(); 210 apply_alu_src_mod(n.bc, 0, cv0); 211 } 212 213 if (isc1) { 214 cv1 = v1->get_const_value(); 215 apply_alu_src_mod(n.bc, 1, cv1); 216 } 217 218 if (isc0 && isc1) { 219 cond_result = evaluate_condition(flags, cv0, cv1); 220 have_result = true; 221 } else if (isc1) { 222 if (cmp_type == AF_FLOAT_CMP) { 223 if (n.bc.src[0].abs && !n.bc.src[0].neg) { 224 if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) { 225 cond_result = true; 226 have_result = true; 227 } else if (cv1.f <= 0.0f && cc == AF_CC_GE) { 228 cond_result = true; 229 have_result = true; 230 } 231 } else if (n.bc.src[0].abs && n.bc.src[0].neg) { 232 if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) { 233 cond_result = false; 234 have_result = true; 235 } else if (cv1.f >= 0.0f && cc == AF_CC_GT) { 236 cond_result = false; 237 have_result = true; 238 } 239 } 240 } else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) { 241 cond_result = true; 242 have_result = true; 243 } 244 } else if (isc0) { 245 if (cmp_type == AF_FLOAT_CMP) { 246 if (n.bc.src[1].abs && !n.bc.src[1].neg) { 247 if (cv0.f <= 0.0f && cc == AF_CC_GT) { 248 cond_result = false; 249 have_result = true; 250 } else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) { 251 cond_result = false; 252 have_result = true; 253 } 254 } else if (n.bc.src[1].abs && n.bc.src[1].neg) { 255 if (cv0.f >= 0.0f && cc == AF_CC_GE) { 256 cond_result = true; 257 have_result = true; 258 } else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) { 259 cond_result = true; 260 have_result = true; 261 } 262 } 263 } else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) { 264 cond_result = false; 265 have_result = true; 266 } 267 } else if (v0 == v1) { 268 bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1]; 269 if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) { 270 // NOTE can't handle float comparisons here because of NaNs 271 cond_result = (cc == AF_CC_E || cc == AF_CC_GE); 272 have_result = true; 273 } 274 } 275 276 if (have_result) { 277 literal result; 278 279 if (cond_result) 280 result = dst_type != AF_FLOAT_DST ? 281 literal(0xFFFFFFFFu) : literal(1.0f); 282 else 283 result = literal(0); 284 285 convert_to_mov(n, sh.get_const_value(result)); 286 return fold_alu_op1(n); 287 } 288 289 return false; 290} 291 292bool expr_handler::fold(alu_node& n) { 293 294 switch (n.bc.op_ptr->src_count) { 295 case 1: return fold_alu_op1(n); 296 case 2: return fold_alu_op2(n); 297 case 3: return fold_alu_op3(n); 298 default: 299 assert(0); 300 } 301 return false; 302} 303 304bool expr_handler::fold(fetch_node& n) { 305 306 unsigned chan = 0; 307 for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) { 308 value* &v = *I; 309 if (v) { 310 if (n.bc.dst_sel[chan] == SEL_0) 311 assign_source(*I, get_const(0.0f)); 312 else if (n.bc.dst_sel[chan] == SEL_1) 313 assign_source(*I, get_const(1.0f)); 314 } 315 ++chan; 316 } 317 return false; 318} 319 320bool expr_handler::fold(cf_node& n) { 321 return false; 322} 323 324void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src, 325 literal &v) { 326 const bc_alu_src &s = bc.src[src]; 327 328 if (s.abs) 329 v = fabsf(v.f); 330 if (s.neg) 331 v = -v.f; 332} 333 334void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) { 335 const float omod_coeff[] = {2.0f, 4.0, 0.5f}; 336 337 if (bc.omod) 338 v = v.f * omod_coeff[bc.omod - 1]; 339 if (bc.clamp) 340 v = float_clamp(v.f); 341} 342 343bool expr_handler::args_equal(const vvec &l, const vvec &r) { 344 345 assert(l.size() == r.size()); 346 347 int s = l.size(); 348 349 for (int k = 0; k < s; ++k) { 350 if (!l[k]->v_equal(r[k])) 351 return false; 352 } 353 354 return true; 355} 356 357bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) { 358 const bc_alu &b0 = l->bc; 359 const bc_alu &b1 = r->bc; 360 361 if (b0.op != b1.op) 362 return false; 363 364 unsigned src_count = b0.op_ptr->src_count; 365 366 if (b0.index_mode != b1.index_mode) 367 return false; 368 369 if (b0.clamp != b1.clamp || b0.omod != b1.omod) 370 return false; 371 372 for (unsigned s = 0; s < src_count; ++s) { 373 const bc_alu_src &s0 = b0.src[s]; 374 const bc_alu_src &s1 = b1.src[s]; 375 376 if (s0.abs != s1.abs || s0.neg != s1.neg) 377 return false; 378 } 379 return args_equal(l->src, r->src); 380} 381 382bool expr_handler::fold_alu_op1(alu_node& n) { 383 384 assert(!n.src.empty()); 385 if (n.src.empty()) 386 return false; 387 388 /* don't fold LDS instructions */ 389 if (n.bc.op_ptr->flags & AF_LDS) 390 return false; 391 392 value* v0 = n.src[0]->gvalue(); 393 394 if (v0->is_lds_oq() || v0->is_lds_access()) 395 return false; 396 assert(v0 && n.dst[0]); 397 398 if (!v0->is_const()) { 399 // handle (MOV -(MOV -x)) => (MOV x) 400 if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs 401 && v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) { 402 alu_node *sd = static_cast<alu_node*>(v0->def); 403 if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs && 404 sd->bc.src[0].neg) { 405 n.src[0] = sd->src[0]; 406 n.bc.src[0].neg = 0; 407 v0 = n.src[0]->gvalue(); 408 } 409 } 410 411 if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT || 412 n.bc.op == ALU_OP1_MOVA_GPR_INT) 413 && n.bc.clamp == 0 && n.bc.omod == 0 414 && n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 && 415 n.src.size() == 1 /* RIM/SIM can be appended as additional values */ 416 && n.dst[0]->no_reladdr_conflict_with(v0)) { 417 assign_source(n.dst[0], v0); 418 return true; 419 } 420 return false; 421 } 422 423 literal dv, cv = v0->get_const_value(); 424 apply_alu_src_mod(n.bc, 0, cv); 425 426 switch (n.bc.op) { 427 case ALU_OP1_CEIL: dv = ceilf(cv.f); break; 428 case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break; 429 case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break; 430 case ALU_OP1_FLOOR: dv = floorf(cv.f); break; 431 case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ???? 432 case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break; 433 case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break; 434 case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break; 435 case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break; 436 case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break; 437 case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break; 438 case ALU_OP1_LOG_CLAMPED: 439 case ALU_OP1_LOG_IEEE: 440 if (cv.f != 0.0f) 441 dv = log2f(cv.f); 442 else 443 // don't fold to NAN, let the GPU handle it for now 444 // (prevents degenerate LIT tests from failing) 445 return false; 446 break; 447 case ALU_OP1_MOV: dv = cv; break; 448 case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ??? 449// case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break; 450// case ALU_OP1_MOVA_GPR_INT: 451 case ALU_OP1_NOT_INT: dv = ~cv.i; break; 452 case ALU_OP1_PRED_SET_INV: 453 dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break; 454 case ALU_OP1_PRED_SET_RESTORE: dv = cv; break; 455 case ALU_OP1_RECIPSQRT_CLAMPED: 456 case ALU_OP1_RECIPSQRT_FF: 457 case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break; 458 case ALU_OP1_RECIP_CLAMPED: 459 case ALU_OP1_RECIP_FF: 460 case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break; 461// case ALU_OP1_RECIP_INT: 462 case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break; 463// case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break; 464 case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break; 465 case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break; 466 case ALU_OP1_TRUNC: dv = truncf(cv.f); break; 467 468 default: 469 return false; 470 } 471 472 apply_alu_dst_mod(n.bc, dv); 473 assign_source(n.dst[0], get_const(dv)); 474 return true; 475} 476 477bool expr_handler::fold_mul_add(alu_node *n) { 478 479 bool ieee; 480 value* v0 = n->src[0]->gvalue(); 481 482 alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ? 483 static_cast<alu_node*>(v0->def) : NULL; 484 485 if (d0) { 486 if (d0->is_alu_op(ALU_OP2_MUL_IEEE)) 487 ieee = true; 488 else if (d0->is_alu_op(ALU_OP2_MUL)) 489 ieee = false; 490 else 491 return false; 492 493 if (!d0->bc.src[0].abs && !d0->bc.src[1].abs && 494 !n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod && 495 !d0->bc.clamp && !n->bc.omod && 496 (!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() || 497 !n->src[1]->is_kcache())) { 498 499 bool mul_neg = n->bc.src[0].neg; 500 501 n->src.resize(3); 502 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); 503 n->src[2] = n->src[1]; 504 n->bc.src[2] = n->bc.src[1]; 505 n->src[0] = d0->src[0]; 506 n->bc.src[0] = d0->bc.src[0]; 507 n->src[1] = d0->src[1]; 508 n->bc.src[1] = d0->bc.src[1]; 509 510 n->bc.src[0].neg ^= mul_neg; 511 512 fold_alu_op3(*n); 513 return true; 514 } 515 } 516 517 value* v1 = n->src[1]->gvalue(); 518 519 alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ? 520 static_cast<alu_node*>(v1->def) : NULL; 521 522 if (d1) { 523 if (d1->is_alu_op(ALU_OP2_MUL_IEEE)) 524 ieee = true; 525 else if (d1->is_alu_op(ALU_OP2_MUL)) 526 ieee = false; 527 else 528 return false; 529 530 if (!d1->bc.src[1].abs && !d1->bc.src[0].abs && 531 !n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod && 532 !d1->bc.clamp && !n->bc.omod && 533 (!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() || 534 !n->src[0]->is_kcache())) { 535 536 bool mul_neg = n->bc.src[1].neg; 537 538 n->src.resize(3); 539 n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD); 540 n->src[2] = n->src[0]; 541 n->bc.src[2] = n->bc.src[0]; 542 n->src[1] = d1->src[1]; 543 n->bc.src[1] = d1->bc.src[1]; 544 n->src[0] = d1->src[0]; 545 n->bc.src[0] = d1->bc.src[0]; 546 547 n->bc.src[1].neg ^= mul_neg; 548 549 fold_alu_op3(*n); 550 return true; 551 } 552 } 553 554 return false; 555} 556 557bool expr_handler::eval_const_op(unsigned op, literal &r, 558 literal cv0, literal cv1) { 559 560 switch (op) { 561 case ALU_OP2_ADD: r = cv0.f + cv1.f; break; 562 case ALU_OP2_ADDC_UINT: 563 r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break; 564 case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break; 565 case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break; 566 case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break; 567 case ALU_OP2_BFM_INT: 568 r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break; 569 case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break; 570 case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break; 571 case ALU_OP2_MAX: 572 case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break; 573 case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break; 574 case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break; 575 case ALU_OP2_MIN: 576 case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break; 577 case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break; 578 case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break; 579 case ALU_OP2_MUL: 580 case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break; 581 case ALU_OP2_MULHI_INT: 582 r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break; 583 case ALU_OP2_MULHI_UINT: 584 r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break; 585 case ALU_OP2_MULLO_INT: 586 r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; 587 case ALU_OP2_MULLO_UINT: 588 r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break; 589 case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break; 590 case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break; 591 case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break; 592 593 default: 594 return false; 595 } 596 597 return true; 598} 599 600// fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5) 601bool expr_handler::fold_assoc(alu_node *n) { 602 603 alu_node *a = n; 604 literal cr; 605 606 int last_arg = -3; 607 608 unsigned op = n->bc.op; 609 bool allow_neg = false, cur_neg = false; 610 bool distribute_neg = false; 611 612 switch(op) { 613 case ALU_OP2_ADD: 614 distribute_neg = true; 615 allow_neg = true; 616 break; 617 case ALU_OP2_MUL: 618 case ALU_OP2_MUL_IEEE: 619 allow_neg = true; 620 break; 621 case ALU_OP3_MULADD: 622 allow_neg = true; 623 op = ALU_OP2_MUL; 624 break; 625 case ALU_OP3_MULADD_IEEE: 626 allow_neg = true; 627 op = ALU_OP2_MUL_IEEE; 628 break; 629 default: 630 if (n->bc.op_ptr->src_count != 2) 631 return false; 632 } 633 634 // check if we can evaluate the op 635 if (!eval_const_op(op, cr, literal(0), literal(0))) 636 return false; 637 638 while (true) { 639 640 value *v0 = a->src[0]->gvalue(); 641 value *v1 = a->src[1]->gvalue(); 642 643 last_arg = -2; 644 645 if (v1->is_const()) { 646 literal arg = v1->get_const_value(); 647 apply_alu_src_mod(a->bc, 1, arg); 648 if (cur_neg && distribute_neg) 649 arg.f = -arg.f; 650 651 if (a == n) 652 cr = arg; 653 else 654 eval_const_op(op, cr, cr, arg); 655 656 if (v0->def) { 657 alu_node *d0 = static_cast<alu_node*>(v0->def); 658 if ((d0->is_alu_op(op) || 659 (op == ALU_OP2_MUL_IEEE && 660 d0->is_alu_op(ALU_OP2_MUL))) && 661 !d0->bc.omod && !d0->bc.clamp && 662 !a->bc.src[0].abs && 663 (!a->bc.src[0].neg || allow_neg)) { 664 cur_neg ^= a->bc.src[0].neg; 665 a = d0; 666 continue; 667 } 668 } 669 last_arg = 0; 670 671 } 672 673 if (v0->is_const()) { 674 literal arg = v0->get_const_value(); 675 apply_alu_src_mod(a->bc, 0, arg); 676 if (cur_neg && distribute_neg) 677 arg.f = -arg.f; 678 679 if (last_arg == 0) { 680 eval_const_op(op, cr, cr, arg); 681 last_arg = -1; 682 break; 683 } 684 685 if (a == n) 686 cr = arg; 687 else 688 eval_const_op(op, cr, cr, arg); 689 690 if (v1->def) { 691 alu_node *d1 = static_cast<alu_node*>(v1->def); 692 if ((d1->is_alu_op(op) || 693 (op == ALU_OP2_MUL_IEEE && 694 d1->is_alu_op(ALU_OP2_MUL))) && 695 !d1->bc.omod && !d1->bc.clamp && 696 !a->bc.src[1].abs && 697 (!a->bc.src[1].neg || allow_neg)) { 698 cur_neg ^= a->bc.src[1].neg; 699 a = d1; 700 continue; 701 } 702 } 703 704 last_arg = 1; 705 } 706 707 break; 708 }; 709 710 if (last_arg == -1) { 711 // result is const 712 apply_alu_dst_mod(n->bc, cr); 713 714 if (n->bc.op == op) { 715 convert_to_mov(*n, sh.get_const_value(cr)); 716 fold_alu_op1(*n); 717 return true; 718 } else { // MULADD => ADD 719 n->src[0] = n->src[2]; 720 n->bc.src[0] = n->bc.src[2]; 721 n->src[1] = sh.get_const_value(cr); 722 n->bc.src[1].clear(); 723 724 n->src.resize(2); 725 n->bc.set_op(ALU_OP2_ADD); 726 } 727 } else if (last_arg >= 0) { 728 n->src[0] = a->src[last_arg]; 729 n->bc.src[0] = a->bc.src[last_arg]; 730 n->bc.src[0].neg ^= cur_neg; 731 n->src[1] = sh.get_const_value(cr); 732 n->bc.src[1].clear(); 733 } 734 735 return false; 736} 737 738bool expr_handler::fold_alu_op2(alu_node& n) { 739 740 if (n.src.size() < 2) 741 return false; 742 743 unsigned flags = n.bc.op_ptr->flags; 744 745 if (flags & AF_SET) { 746 return fold_setcc(n); 747 } 748 749 if (!sh.safe_math && (flags & AF_M_ASSOC)) { 750 if (fold_assoc(&n)) 751 return true; 752 } 753 754 value* v0 = n.src[0]->gvalue(); 755 value* v1 = n.src[1]->gvalue(); 756 757 assert(v0 && v1); 758 759 // handle some operations with equal args, e.g. x + x => x * 2 760 if (v0 == v1) { 761 if (n.bc.src[0].neg == n.bc.src[1].neg && 762 n.bc.src[0].abs == n.bc.src[1].abs) { 763 switch (n.bc.op) { 764 case ALU_OP2_MIN: // (MIN x, x) => (MOV x) 765 case ALU_OP2_MIN_DX10: 766 case ALU_OP2_MAX: 767 case ALU_OP2_MAX_DX10: 768 convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs); 769 return fold_alu_op1(n); 770 case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) 771 if (!sh.safe_math) { 772 n.src[1] = sh.get_const_value(2.0f); 773 n.bc.src[1].clear(); 774 n.bc.set_op(ALU_OP2_MUL); 775 return fold_alu_op2(n); 776 } 777 break; 778 } 779 } 780 if (n.bc.src[0].neg != n.bc.src[1].neg && 781 n.bc.src[0].abs == n.bc.src[1].abs) { 782 switch (n.bc.op) { 783 case ALU_OP2_ADD: // (ADD x, -x) => (MOV 0) 784 if (!sh.safe_math) { 785 convert_to_mov(n, sh.get_const_value(literal(0))); 786 return fold_alu_op1(n); 787 } 788 break; 789 } 790 } 791 } 792 793 if (n.bc.op == ALU_OP2_ADD) { 794 if (fold_mul_add(&n)) 795 return true; 796 } 797 798 bool isc0 = v0->is_const(); 799 bool isc1 = v1->is_const(); 800 801 if (!isc0 && !isc1) 802 return false; 803 804 literal dv, cv0, cv1; 805 806 if (isc0) { 807 cv0 = v0->get_const_value(); 808 apply_alu_src_mod(n.bc, 0, cv0); 809 } 810 811 if (isc1) { 812 cv1 = v1->get_const_value(); 813 apply_alu_src_mod(n.bc, 1, cv1); 814 } 815 816 if (isc0 && isc1) { 817 818 if (!eval_const_op(n.bc.op, dv, cv0, cv1)) 819 return false; 820 821 } else { // one source is const 822 823 if (isc0 && cv0 == literal(0)) { 824 switch (n.bc.op) { 825 case ALU_OP2_ADD: 826 case ALU_OP2_ADD_INT: 827 case ALU_OP2_MAX_UINT: 828 case ALU_OP2_OR_INT: 829 case ALU_OP2_XOR_INT: 830 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); 831 return fold_alu_op1(n); 832 case ALU_OP2_AND_INT: 833 case ALU_OP2_ASHR_INT: 834 case ALU_OP2_LSHL_INT: 835 case ALU_OP2_LSHR_INT: 836 case ALU_OP2_MIN_UINT: 837 case ALU_OP2_MUL: 838 case ALU_OP2_MULHI_UINT: 839 case ALU_OP2_MULLO_UINT: 840 convert_to_mov(n, sh.get_const_value(literal(0))); 841 return fold_alu_op1(n); 842 } 843 } else if (isc1 && cv1 == literal(0)) { 844 switch (n.bc.op) { 845 case ALU_OP2_ADD: 846 case ALU_OP2_ADD_INT: 847 case ALU_OP2_ASHR_INT: 848 case ALU_OP2_LSHL_INT: 849 case ALU_OP2_LSHR_INT: 850 case ALU_OP2_MAX_UINT: 851 case ALU_OP2_OR_INT: 852 case ALU_OP2_SUB_INT: 853 case ALU_OP2_XOR_INT: 854 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); 855 return fold_alu_op1(n); 856 case ALU_OP2_AND_INT: 857 case ALU_OP2_MIN_UINT: 858 case ALU_OP2_MUL: 859 case ALU_OP2_MULHI_UINT: 860 case ALU_OP2_MULLO_UINT: 861 convert_to_mov(n, sh.get_const_value(literal(0))); 862 return fold_alu_op1(n); 863 } 864 } else if (isc0 && cv0 == literal(1.0f)) { 865 switch (n.bc.op) { 866 case ALU_OP2_MUL: 867 case ALU_OP2_MUL_IEEE: 868 convert_to_mov(n, n.src[1], n.bc.src[1].neg, n.bc.src[1].abs); 869 return fold_alu_op1(n); 870 } 871 } else if (isc1 && cv1 == literal(1.0f)) { 872 switch (n.bc.op) { 873 case ALU_OP2_MUL: 874 case ALU_OP2_MUL_IEEE: 875 convert_to_mov(n, n.src[0], n.bc.src[0].neg, n.bc.src[0].abs); 876 return fold_alu_op1(n); 877 } 878 } 879 880 return false; 881 } 882 883 apply_alu_dst_mod(n.bc, dv); 884 assign_source(n.dst[0], get_const(dv)); 885 return true; 886} 887 888bool expr_handler::evaluate_condition(unsigned alu_cnd_flags, 889 literal s1, literal s2) { 890 891 unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK; 892 unsigned cc = alu_cnd_flags & AF_CC_MASK; 893 894 switch (cmp_type) { 895 case AF_FLOAT_CMP: { 896 switch (cc) { 897 case AF_CC_E : return s1.f == s2.f; 898 case AF_CC_GT: return s1.f > s2.f; 899 case AF_CC_GE: return s1.f >= s2.f; 900 case AF_CC_NE: return s1.f != s2.f; 901 case AF_CC_LT: return s1.f < s2.f; 902 case AF_CC_LE: return s1.f <= s2.f; 903 default: 904 assert(!"invalid condition code"); 905 return false; 906 } 907 } 908 case AF_INT_CMP: { 909 switch (cc) { 910 case AF_CC_E : return s1.i == s2.i; 911 case AF_CC_GT: return s1.i > s2.i; 912 case AF_CC_GE: return s1.i >= s2.i; 913 case AF_CC_NE: return s1.i != s2.i; 914 case AF_CC_LT: return s1.i < s2.i; 915 case AF_CC_LE: return s1.i <= s2.i; 916 default: 917 assert(!"invalid condition code"); 918 return false; 919 } 920 } 921 case AF_UINT_CMP: { 922 switch (cc) { 923 case AF_CC_E : return s1.u == s2.u; 924 case AF_CC_GT: return s1.u > s2.u; 925 case AF_CC_GE: return s1.u >= s2.u; 926 case AF_CC_NE: return s1.u != s2.u; 927 case AF_CC_LT: return s1.u < s2.u; 928 case AF_CC_LE: return s1.u <= s2.u; 929 default: 930 assert(!"invalid condition code"); 931 return false; 932 } 933 } 934 default: 935 assert(!"invalid cmp_type"); 936 return false; 937 } 938} 939 940bool expr_handler::fold_alu_op3(alu_node& n) { 941 942 if (n.src.size() < 3) 943 return false; 944 945 if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) { 946 if (fold_assoc(&n)) 947 return true; 948 if (n.src.size() < 3) 949 return fold_alu_op2(n); 950 } 951 952 value* v0 = n.src[0]->gvalue(); 953 value* v1 = n.src[1]->gvalue(); 954 value* v2 = n.src[2]->gvalue(); 955 956 /* LDS instructions look like op3 with no dst - don't fold. */ 957 if (!n.dst[0]) 958 return false; 959 assert(v0 && v1 && v2 && n.dst[0]); 960 961 bool isc0 = v0->is_const(); 962 bool isc1 = v1->is_const(); 963 bool isc2 = v2->is_const(); 964 965 literal dv, cv0, cv1, cv2; 966 967 if (isc0) { 968 cv0 = v0->get_const_value(); 969 apply_alu_src_mod(n.bc, 0, cv0); 970 } 971 972 if (isc1) { 973 cv1 = v1->get_const_value(); 974 apply_alu_src_mod(n.bc, 1, cv1); 975 } 976 977 if (isc2) { 978 cv2 = v2->get_const_value(); 979 apply_alu_src_mod(n.bc, 2, cv2); 980 } 981 982 unsigned flags = n.bc.op_ptr->flags; 983 984 if (flags & AF_CMOV) { 985 int src = 0; 986 987 if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) { 988 // result doesn't depend on condition, convert to MOV 989 src = 1; 990 } else if (isc0) { 991 // src0 is const, condition can be evaluated, convert to MOV 992 bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK | 993 AF_CMP_TYPE_MASK), cv0, literal(0)); 994 src = cond ? 1 : 2; 995 } 996 997 if (src) { 998 // if src is selected, convert to MOV 999 convert_to_mov(n, n.src[src], n.bc.src[src].neg); 1000 return fold_alu_op1(n); 1001 } 1002 } 1003 1004 // handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b)) 1005 if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD || 1006 n.bc.op == ALU_OP3_MULADD_IEEE)) { 1007 1008 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? 1009 ALU_OP2_MUL_IEEE : ALU_OP2_MUL; 1010 1011 if (!isc2 && v2->def && v2->def->is_alu_op(op)) { 1012 1013 alu_node *md = static_cast<alu_node*>(v2->def); 1014 value *mv0 = md->src[0]->gvalue(); 1015 value *mv1 = md->src[1]->gvalue(); 1016 1017 int es0 = -1, es1; 1018 1019 if (v0 == mv0) { 1020 es0 = 0; 1021 es1 = 0; 1022 } else if (v0 == mv1) { 1023 es0 = 0; 1024 es1 = 1; 1025 } else if (v1 == mv0) { 1026 es0 = 1; 1027 es1 = 0; 1028 } else if (v1 == mv1) { 1029 es0 = 1; 1030 es1 = 1; 1031 } 1032 1033 value *va0 = es0 == 0 ? v1 : v0; 1034 value *va1 = es1 == 0 ? mv1 : mv0; 1035 1036 /* Don't fold if no equal multipliers were found. 1037 * Also don#t fold if the operands of the to be created ADD are both 1038 * relatively accessed with different AR values because that would 1039 * create impossible code. 1040 */ 1041 if (es0 != -1 && 1042 (!va0->is_rel() || !va1->is_rel() || 1043 (va0->rel == va1->rel))) { 1044 1045 alu_node *add = sh.create_alu(); 1046 add->bc.set_op(ALU_OP2_ADD); 1047 1048 add->dst.resize(1); 1049 add->src.resize(2); 1050 1051 value *t = sh.create_temp_value(); 1052 t->def = add; 1053 add->dst[0] = t; 1054 add->src[0] = va0; 1055 add->src[1] = va1; 1056 add->bc.src[0] = n.bc.src[!es0]; 1057 add->bc.src[1] = md->bc.src[!es1]; 1058 1059 add->bc.src[1].neg ^= n.bc.src[2].neg ^ 1060 (n.bc.src[es0].neg != md->bc.src[es1].neg); 1061 1062 n.insert_before(add); 1063 vt.add_value(t); 1064 1065 t = t->gvalue(); 1066 1067 if (es0 == 1) { 1068 n.src[0] = n.src[1]; 1069 n.bc.src[0] = n.bc.src[1]; 1070 } 1071 1072 n.src[1] = t; 1073 n.bc.src[1].clear(); 1074 1075 n.src.resize(2); 1076 1077 n.bc.set_op(op); 1078 return fold_alu_op2(n); 1079 } 1080 } 1081 } 1082 1083 if (!isc0 && !isc1 && !isc2) 1084 return false; 1085 1086 if (isc0 && isc1 && isc2) { 1087 switch (n.bc.op) { 1088 case ALU_OP3_MULADD_IEEE: 1089 case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break; 1090 1091 // TODO 1092 1093 default: 1094 return false; 1095 } 1096 } else { 1097 if (isc0 && isc1) { 1098 switch (n.bc.op) { 1099 case ALU_OP3_MULADD: 1100 case ALU_OP3_MULADD_IEEE: 1101 dv = cv0.f * cv1.f; 1102 n.bc.set_op(ALU_OP2_ADD); 1103 n.src[0] = sh.get_const_value(dv); 1104 n.bc.src[0].clear(); 1105 n.src[1] = n.src[2]; 1106 n.bc.src[1] = n.bc.src[2]; 1107 n.src.resize(2); 1108 return fold_alu_op2(n); 1109 } 1110 } 1111 1112 if (n.bc.op == ALU_OP3_MULADD) { 1113 if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) { 1114 convert_to_mov(n, n.src[2], n.bc.src[2].neg, n.bc.src[2].abs); 1115 return fold_alu_op1(n); 1116 } 1117 } 1118 1119 if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) { 1120 unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ? 1121 ALU_OP2_MUL_IEEE : ALU_OP2_MUL; 1122 1123 if (isc1 && v0 == v2) { 1124 cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f); 1125 n.src[1] = sh.get_const_value(cv1); 1126 n.bc.src[1].neg = 0; 1127 n.bc.src[1].abs = 0; 1128 n.bc.set_op(op); 1129 n.src.resize(2); 1130 return fold_alu_op2(n); 1131 } else if (isc0 && v1 == v2) { 1132 cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f); 1133 n.src[0] = sh.get_const_value(cv0); 1134 n.bc.src[0].neg = 0; 1135 n.bc.src[0].abs = 0; 1136 n.bc.set_op(op); 1137 n.src.resize(2); 1138 return fold_alu_op2(n); 1139 } 1140 } 1141 1142 return false; 1143 } 1144 1145 apply_alu_dst_mod(n.bc, dv); 1146 assign_source(n.dst[0], get_const(dv)); 1147 return true; 1148} 1149 1150unsigned invert_setcc_condition(unsigned cc, bool &swap_args) { 1151 unsigned ncc = 0; 1152 1153 switch (cc) { 1154 case AF_CC_E: ncc = AF_CC_NE; break; 1155 case AF_CC_NE: ncc = AF_CC_E; break; 1156 case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break; 1157 case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break; 1158 default: 1159 assert(!"unexpected condition code"); 1160 break; 1161 } 1162 return ncc; 1163} 1164 1165unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) { 1166 1167 if (int_dst && cmp_type == AF_FLOAT_CMP) { 1168 switch (cc) { 1169 case AF_CC_E: return ALU_OP2_SETE_DX10; 1170 case AF_CC_NE: return ALU_OP2_SETNE_DX10; 1171 case AF_CC_GT: return ALU_OP2_SETGT_DX10; 1172 case AF_CC_GE: return ALU_OP2_SETGE_DX10; 1173 } 1174 } else { 1175 1176 switch(cmp_type) { 1177 case AF_FLOAT_CMP: { 1178 switch (cc) { 1179 case AF_CC_E: return ALU_OP2_SETE; 1180 case AF_CC_NE: return ALU_OP2_SETNE; 1181 case AF_CC_GT: return ALU_OP2_SETGT; 1182 case AF_CC_GE: return ALU_OP2_SETGE; 1183 } 1184 break; 1185 } 1186 case AF_INT_CMP: { 1187 switch (cc) { 1188 case AF_CC_E: return ALU_OP2_SETE_INT; 1189 case AF_CC_NE: return ALU_OP2_SETNE_INT; 1190 case AF_CC_GT: return ALU_OP2_SETGT_INT; 1191 case AF_CC_GE: return ALU_OP2_SETGE_INT; 1192 } 1193 break; 1194 } 1195 case AF_UINT_CMP: { 1196 switch (cc) { 1197 case AF_CC_E: return ALU_OP2_SETE_INT; 1198 case AF_CC_NE: return ALU_OP2_SETNE_INT; 1199 case AF_CC_GT: return ALU_OP2_SETGT_UINT; 1200 case AF_CC_GE: return ALU_OP2_SETGE_UINT; 1201 } 1202 break; 1203 } 1204 } 1205 } 1206 1207 assert(!"unexpected cc&cmp_type combination"); 1208 return ~0u; 1209} 1210 1211unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) { 1212 1213 switch(cmp_type) { 1214 case AF_FLOAT_CMP: { 1215 switch (cc) { 1216 case AF_CC_E: return ALU_OP2_PRED_SETE; 1217 case AF_CC_NE: return ALU_OP2_PRED_SETNE; 1218 case AF_CC_GT: return ALU_OP2_PRED_SETGT; 1219 case AF_CC_GE: return ALU_OP2_PRED_SETGE; 1220 } 1221 break; 1222 } 1223 case AF_INT_CMP: { 1224 switch (cc) { 1225 case AF_CC_E: return ALU_OP2_PRED_SETE_INT; 1226 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; 1227 case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT; 1228 case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT; 1229 } 1230 break; 1231 } 1232 case AF_UINT_CMP: { 1233 switch (cc) { 1234 case AF_CC_E: return ALU_OP2_PRED_SETE_INT; 1235 case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT; 1236 case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT; 1237 case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT; 1238 } 1239 break; 1240 } 1241 } 1242 1243 assert(!"unexpected cc&cmp_type combination"); 1244 return ~0u; 1245} 1246 1247unsigned get_killcc_op(unsigned cc, unsigned cmp_type) { 1248 1249 switch(cmp_type) { 1250 case AF_FLOAT_CMP: { 1251 switch (cc) { 1252 case AF_CC_E: return ALU_OP2_KILLE; 1253 case AF_CC_NE: return ALU_OP2_KILLNE; 1254 case AF_CC_GT: return ALU_OP2_KILLGT; 1255 case AF_CC_GE: return ALU_OP2_KILLGE; 1256 } 1257 break; 1258 } 1259 case AF_INT_CMP: { 1260 switch (cc) { 1261 case AF_CC_E: return ALU_OP2_KILLE_INT; 1262 case AF_CC_NE: return ALU_OP2_KILLNE_INT; 1263 case AF_CC_GT: return ALU_OP2_KILLGT_INT; 1264 case AF_CC_GE: return ALU_OP2_KILLGE_INT; 1265 } 1266 break; 1267 } 1268 case AF_UINT_CMP: { 1269 switch (cc) { 1270 case AF_CC_E: return ALU_OP2_KILLE_INT; 1271 case AF_CC_NE: return ALU_OP2_KILLNE_INT; 1272 case AF_CC_GT: return ALU_OP2_KILLGT_UINT; 1273 case AF_CC_GE: return ALU_OP2_KILLGE_UINT; 1274 } 1275 break; 1276 } 1277 } 1278 1279 assert(!"unexpected cc&cmp_type combination"); 1280 return ~0u; 1281} 1282 1283unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) { 1284 1285 switch(cmp_type) { 1286 case AF_FLOAT_CMP: { 1287 switch (cc) { 1288 case AF_CC_E: return ALU_OP3_CNDE; 1289 case AF_CC_GT: return ALU_OP3_CNDGT; 1290 case AF_CC_GE: return ALU_OP3_CNDGE; 1291 } 1292 break; 1293 } 1294 case AF_INT_CMP: { 1295 switch (cc) { 1296 case AF_CC_E: return ALU_OP3_CNDE_INT; 1297 case AF_CC_GT: return ALU_OP3_CNDGT_INT; 1298 case AF_CC_GE: return ALU_OP3_CNDGE_INT; 1299 } 1300 break; 1301 } 1302 } 1303 1304 assert(!"unexpected cc&cmp_type combination"); 1305 return ~0u; 1306} 1307 1308 1309void convert_predset_to_set(shader& sh, alu_node* a) { 1310 1311 unsigned flags = a->bc.op_ptr->flags; 1312 unsigned cc = flags & AF_CC_MASK; 1313 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 1314 1315 bool swap_args = false; 1316 1317 cc = invert_setcc_condition(cc, swap_args); 1318 1319 unsigned newop = get_setcc_op(cc, cmp_type, true); 1320 1321 a->dst.resize(1); 1322 a->bc.set_op(newop); 1323 1324 if (swap_args) { 1325 std::swap(a->src[0], a->src[1]); 1326 std::swap(a->bc.src[0], a->bc.src[1]); 1327 } 1328 1329 a->bc.update_exec_mask = 0; 1330 a->bc.update_pred = 0; 1331} 1332 1333} // namespace r600_sb 1334