1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#include <cmath>
28
29#include "sb_shader.h"
30
31namespace r600_sb {
32
33value* get_select_value_for_em(shader& sh, value* em) {
34	if (!em->def)
35		return NULL;
36
37	node *predset = em->def;
38	if (!predset->is_pred_set())
39		return NULL;
40
41	alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42	convert_predset_to_set(sh, s);
43
44	predset->insert_after(s);
45
46	value* &d0 = s->dst[0];
47	d0 = sh.create_temp_value();
48	d0->def = s;
49	return d0;
50}
51
52void convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53	n.src.resize(1);
54	n.src[0] = src;
55	n.bc.src[0].abs = abs;
56	n.bc.src[0].neg = neg;
57	n.bc.set_op(ALU_OP1_MOV);
58}
59
60expr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61
62value * expr_handler::get_const(const literal &l) {
63	value *v = sh.get_const_value(l);
64	if (!v->gvn_source)
65		vt.add_value(v);
66	return v;
67}
68
69void expr_handler::assign_source(value *dst, value *src) {
70	dst->gvn_source = src->gvn_source;
71}
72
73bool expr_handler::equal(value *l, value *r) {
74
75	assert(l != r);
76
77	if (l->is_lds_access() || r->is_lds_access())
78		return false;
79	if (l->gvalue() == r->gvalue())
80		return true;
81
82	if (l->def && r->def)
83		return defs_equal(l, r);
84
85	if (l->is_rel() && r->is_rel())
86		return ivars_equal(l, r);
87
88	return false;
89}
90
91bool expr_handler::ivars_equal(value* l, value* r) {
92	if (l->rel->gvalue() == r->rel->gvalue()
93			&& l->select == r->select) {
94
95		vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
96		vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
97
98		// FIXME: replace this with more precise aliasing test
99		return lv == rv;
100	}
101	return false;
102}
103
104bool expr_handler::defs_equal(value* l, value* r) {
105
106	node *d1 = l->def;
107	node *d2 = r->def;
108
109	if (d1->type != d2->type || d1->subtype != d2->subtype)
110		return false;
111
112	if (d1->is_pred_set() || d2->is_pred_set())
113		return false;
114
115	if (d1->type == NT_OP) {
116		switch (d1->subtype) {
117		case NST_ALU_INST:
118			return ops_equal(
119					static_cast<alu_node*>(d1),
120					static_cast<alu_node*>(d2));
121//		case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
122//			static_cast<fetch_node*>(d2);
123//		case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
124//			static_cast<cf_node*>(d2);
125		default:
126			break;
127		}
128	}
129	return false;
130}
131
132bool expr_handler::try_fold(value* v) {
133	assert(!v->gvn_source);
134
135	if (v->def)
136		try_fold(v->def);
137
138	if (v->gvn_source)
139		return true;
140
141	return false;
142}
143
144bool expr_handler::try_fold(node* n) {
145	return n->fold_dispatch(this);
146}
147
148bool expr_handler::fold(node& n) {
149	if (n.subtype == NST_PHI) {
150
151		value *s = n.src[0];
152
153		// FIXME disabling phi folding for registers for now, otherwise we lose
154		// control flow information in some cases
155		// (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
156		// probably control flow transformation is required to enable it
157		if (s->is_sgpr())
158			return false;
159
160		for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
161			value *v = *I;
162			if (!s->v_equal(v))
163				return false;
164		}
165
166		assign_source(n.dst[0], s);
167	} else {
168		assert(n.subtype == NST_PSI);
169		assert(n.src.size() >= 6);
170
171		value *s = n.src[2];
172		assert(s->gvn_source);
173
174		for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
175			value *v = *(I+2);
176			if (!s->v_equal(v))
177				return false;
178		}
179		assign_source(n.dst[0], s);
180	}
181	return true;
182}
183
184bool expr_handler::fold(container_node& n) {
185	return false;
186}
187
188bool expr_handler::fold_setcc(alu_node &n) {
189
190	value* v0 = n.src[0]->gvalue();
191	value* v1 = n.src[1]->gvalue();
192
193	assert(v0 && v1 && n.dst[0]);
194
195	unsigned flags = n.bc.op_ptr->flags;
196	unsigned cc = flags & AF_CC_MASK;
197	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
198	unsigned dst_type = flags & AF_DST_TYPE_MASK;
199
200	bool cond_result;
201	bool have_result = false;
202
203	bool isc0 = v0->is_const();
204	bool isc1 = v1->is_const();
205
206	literal dv, cv0, cv1;
207
208	if (isc0) {
209		cv0 = v0->get_const_value();
210		apply_alu_src_mod(n.bc, 0, cv0);
211	}
212
213	if (isc1) {
214		cv1 = v1->get_const_value();
215		apply_alu_src_mod(n.bc, 1, cv1);
216	}
217
218	if (isc0 && isc1) {
219		cond_result = evaluate_condition(flags, cv0, cv1);
220		have_result = true;
221	} else if (isc1) {
222		if (cmp_type == AF_FLOAT_CMP) {
223			if (n.bc.src[0].abs && !n.bc.src[0].neg) {
224				if (cv1.f < 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
225					cond_result = true;
226					have_result = true;
227				} else if (cv1.f <= 0.0f && cc == AF_CC_GE) {
228					cond_result = true;
229					have_result = true;
230				}
231			} else if (n.bc.src[0].abs && n.bc.src[0].neg) {
232				if (cv1.f > 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
233					cond_result = false;
234					have_result = true;
235				} else if (cv1.f >= 0.0f && cc == AF_CC_GT) {
236					cond_result = false;
237					have_result = true;
238				}
239			}
240		} else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
241			cond_result = true;
242			have_result = true;
243		}
244	} else if (isc0) {
245		if (cmp_type == AF_FLOAT_CMP) {
246			if (n.bc.src[1].abs && !n.bc.src[1].neg) {
247				if (cv0.f <= 0.0f && cc == AF_CC_GT) {
248					cond_result = false;
249					have_result = true;
250				} else if (cv0.f < 0.0f && (cc == AF_CC_GE || cc == AF_CC_E)) {
251					cond_result = false;
252					have_result = true;
253				}
254			} else if (n.bc.src[1].abs && n.bc.src[1].neg) {
255				if (cv0.f >= 0.0f && cc == AF_CC_GE) {
256					cond_result = true;
257					have_result = true;
258				} else if (cv0.f > 0.0f && (cc == AF_CC_GT || cc == AF_CC_NE)) {
259					cond_result = true;
260					have_result = true;
261				}
262			}
263		} else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
264			cond_result = false;
265			have_result = true;
266		}
267	} else if (v0 == v1) {
268		bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
269		if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
270			// NOTE can't handle float comparisons here because of NaNs
271			cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
272			have_result = true;
273		}
274	}
275
276	if (have_result) {
277		literal result;
278
279		if (cond_result)
280			result = dst_type != AF_FLOAT_DST ?
281					literal(0xFFFFFFFFu) : literal(1.0f);
282		else
283			result = literal(0);
284
285		convert_to_mov(n, sh.get_const_value(result));
286		return fold_alu_op1(n);
287	}
288
289	return false;
290}
291
292bool expr_handler::fold(alu_node& n) {
293
294	switch (n.bc.op_ptr->src_count) {
295	case 1: return fold_alu_op1(n);
296	case 2: return fold_alu_op2(n);
297	case 3: return fold_alu_op3(n);
298	default:
299		assert(0);
300	}
301	return false;
302}
303
304bool expr_handler::fold(fetch_node& n) {
305
306	unsigned chan = 0;
307	for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
308		value* &v = *I;
309		if (v) {
310			if (n.bc.dst_sel[chan] == SEL_0)
311				assign_source(*I, get_const(0.0f));
312			else if (n.bc.dst_sel[chan] == SEL_1)
313				assign_source(*I, get_const(1.0f));
314		}
315		++chan;
316	}
317	return false;
318}
319
320bool expr_handler::fold(cf_node& n) {
321	return false;
322}
323
324void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
325                                     literal &v) {
326	const bc_alu_src &s = bc.src[src];
327
328	if (s.abs)
329		v = fabsf(v.f);
330	if (s.neg)
331		v = -v.f;
332}
333
334void expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
335	const float omod_coeff[] = {2.0f, 4.0, 0.5f};
336
337	if (bc.omod)
338		v = v.f * omod_coeff[bc.omod - 1];
339	if (bc.clamp)
340		v = float_clamp(v.f);
341}
342
343bool expr_handler::args_equal(const vvec &l, const vvec &r) {
344
345	assert(l.size() == r.size());
346
347	int s = l.size();
348
349	for (int k = 0; k < s; ++k) {
350		if (!l[k]->v_equal(r[k]))
351			return false;
352	}
353
354	return true;
355}
356
357bool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
358	const bc_alu &b0 = l->bc;
359	const bc_alu &b1 = r->bc;
360
361	if (b0.op != b1.op)
362		return false;
363
364	unsigned src_count = b0.op_ptr->src_count;
365
366	if (b0.index_mode != b1.index_mode)
367		return false;
368
369	if (b0.clamp != b1.clamp || b0.omod != b1.omod)
370			return false;
371
372	for (unsigned s = 0; s < src_count; ++s) {
373		const bc_alu_src &s0 = b0.src[s];
374		const bc_alu_src &s1 = b1.src[s];
375
376		if (s0.abs != s1.abs || s0.neg != s1.neg)
377			return false;
378	}
379	return args_equal(l->src, r->src);
380}
381
382bool expr_handler::fold_alu_op1(alu_node& n) {
383
384	assert(!n.src.empty());
385	if (n.src.empty())
386		return false;
387
388	/* don't fold LDS instructions */
389	if (n.bc.op_ptr->flags & AF_LDS)
390		return false;
391
392	value* v0 = n.src[0]->gvalue();
393
394	if (v0->is_lds_oq() || v0->is_lds_access())
395		return false;
396	assert(v0 && n.dst[0]);
397
398	if (!v0->is_const()) {
399		// handle (MOV -(MOV -x)) => (MOV x)
400		if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
401				&& v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
402			alu_node *sd = static_cast<alu_node*>(v0->def);
403			if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
404					sd->bc.src[0].neg) {
405				n.src[0] = sd->src[0];
406				n.bc.src[0].neg = 0;
407				v0 = n.src[0]->gvalue();
408			}
409		}
410
411		if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
412				n.bc.op == ALU_OP1_MOVA_GPR_INT)
413				&& n.bc.clamp == 0 && n.bc.omod == 0
414				&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
415				n.src.size() == 1 /* RIM/SIM can be appended as additional values */
416				&& n.dst[0]->no_reladdr_conflict_with(v0)) {
417			assign_source(n.dst[0], v0);
418			return true;
419		}
420		return false;
421	}
422
423	literal dv, cv = v0->get_const_value();
424	apply_alu_src_mod(n.bc, 0, cv);
425
426	switch (n.bc.op) {
427	case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
428	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
429	case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
430	case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
431	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
432	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
433	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
434	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
435	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
436	case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
437	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
438	case ALU_OP1_LOG_CLAMPED:
439	case ALU_OP1_LOG_IEEE:
440		if (cv.f != 0.0f)
441			dv = log2f(cv.f);
442		else
443			// don't fold to NAN, let the GPU handle it for now
444			// (prevents degenerate LIT tests from failing)
445			return false;
446		break;
447	case ALU_OP1_MOV: dv = cv; break;
448	case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
449//	case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
450//	case ALU_OP1_MOVA_GPR_INT:
451	case ALU_OP1_NOT_INT: dv = ~cv.i; break;
452	case ALU_OP1_PRED_SET_INV:
453		dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
454	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
455	case ALU_OP1_RECIPSQRT_CLAMPED:
456	case ALU_OP1_RECIPSQRT_FF:
457	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
458	case ALU_OP1_RECIP_CLAMPED:
459	case ALU_OP1_RECIP_FF:
460	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
461//	case ALU_OP1_RECIP_INT:
462	case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
463//	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
464	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
465	case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
466	case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
467
468	default:
469		return false;
470	}
471
472	apply_alu_dst_mod(n.bc, dv);
473	assign_source(n.dst[0], get_const(dv));
474	return true;
475}
476
477bool expr_handler::fold_mul_add(alu_node *n) {
478
479	bool ieee;
480	value* v0 = n->src[0]->gvalue();
481
482	alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
483			static_cast<alu_node*>(v0->def) : NULL;
484
485	if (d0) {
486		if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
487			ieee = true;
488		else if (d0->is_alu_op(ALU_OP2_MUL))
489			ieee = false;
490		else
491			return false;
492
493		if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
494				!n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
495				!d0->bc.clamp && !n->bc.omod &&
496				(!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
497						!n->src[1]->is_kcache())) {
498
499			bool mul_neg = n->bc.src[0].neg;
500
501			n->src.resize(3);
502			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
503			n->src[2] = n->src[1];
504			n->bc.src[2] = n->bc.src[1];
505			n->src[0] = d0->src[0];
506			n->bc.src[0] = d0->bc.src[0];
507			n->src[1] = d0->src[1];
508			n->bc.src[1] = d0->bc.src[1];
509
510			n->bc.src[0].neg ^= mul_neg;
511
512			fold_alu_op3(*n);
513			return true;
514		}
515	}
516
517	value* v1 = n->src[1]->gvalue();
518
519	alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
520			static_cast<alu_node*>(v1->def) : NULL;
521
522	if (d1) {
523		if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
524			ieee = true;
525		else if (d1->is_alu_op(ALU_OP2_MUL))
526			ieee = false;
527		else
528			return false;
529
530		if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
531				!n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
532				!d1->bc.clamp && !n->bc.omod &&
533				(!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
534						!n->src[0]->is_kcache())) {
535
536			bool mul_neg = n->bc.src[1].neg;
537
538			n->src.resize(3);
539			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
540			n->src[2] = n->src[0];
541			n->bc.src[2] = n->bc.src[0];
542			n->src[1] = d1->src[1];
543			n->bc.src[1] = d1->bc.src[1];
544			n->src[0] = d1->src[0];
545			n->bc.src[0] = d1->bc.src[0];
546
547			n->bc.src[1].neg ^= mul_neg;
548
549			fold_alu_op3(*n);
550			return true;
551		}
552	}
553
554	return false;
555}
556
557bool expr_handler::eval_const_op(unsigned op, literal &r,
558                                 literal cv0, literal cv1) {
559
560	switch (op) {
561	case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
562	case ALU_OP2_ADDC_UINT:
563		r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
564	case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
565	case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
566	case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
567	case ALU_OP2_BFM_INT:
568		r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
569	case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
570	case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
571	case ALU_OP2_MAX:
572	case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
573	case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
574	case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
575	case ALU_OP2_MIN:
576	case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
577	case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
578	case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
579	case ALU_OP2_MUL:
580	case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
581	case ALU_OP2_MULHI_INT:
582		r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
583	case ALU_OP2_MULHI_UINT:
584		r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
585	case ALU_OP2_MULLO_INT:
586		r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
587	case ALU_OP2_MULLO_UINT:
588		r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
589	case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
590	case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
591	case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
592
593	default:
594		return false;
595	}
596
597	return true;
598}
599
600// fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
601bool expr_handler::fold_assoc(alu_node *n) {
602
603	alu_node *a = n;
604	literal cr;
605
606	int last_arg = -3;
607
608	unsigned op = n->bc.op;
609	bool allow_neg = false, cur_neg = false;
610	bool distribute_neg = false;
611
612	switch(op) {
613	case ALU_OP2_ADD:
614		distribute_neg = true;
615		allow_neg = true;
616		break;
617	case ALU_OP2_MUL:
618	case ALU_OP2_MUL_IEEE:
619		allow_neg = true;
620		break;
621	case ALU_OP3_MULADD:
622		allow_neg = true;
623		op = ALU_OP2_MUL;
624		break;
625	case ALU_OP3_MULADD_IEEE:
626		allow_neg = true;
627		op = ALU_OP2_MUL_IEEE;
628		break;
629	default:
630		if (n->bc.op_ptr->src_count != 2)
631			return false;
632	}
633
634	// check if we can evaluate the op
635	if (!eval_const_op(op, cr, literal(0), literal(0)))
636		return false;
637
638	while (true) {
639
640		value *v0 = a->src[0]->gvalue();
641		value *v1 = a->src[1]->gvalue();
642
643		last_arg = -2;
644
645		if (v1->is_const()) {
646			literal arg = v1->get_const_value();
647			apply_alu_src_mod(a->bc, 1, arg);
648			if (cur_neg && distribute_neg)
649				arg.f = -arg.f;
650
651			if (a == n)
652				cr = arg;
653			else
654				eval_const_op(op, cr, cr, arg);
655
656			if (v0->def) {
657				alu_node *d0 = static_cast<alu_node*>(v0->def);
658				if ((d0->is_alu_op(op) ||
659						(op == ALU_OP2_MUL_IEEE &&
660								d0->is_alu_op(ALU_OP2_MUL))) &&
661						!d0->bc.omod && !d0->bc.clamp &&
662						!a->bc.src[0].abs &&
663						(!a->bc.src[0].neg || allow_neg)) {
664					cur_neg ^= a->bc.src[0].neg;
665					a = d0;
666					continue;
667				}
668			}
669			last_arg = 0;
670
671		}
672
673		if (v0->is_const()) {
674			literal arg = v0->get_const_value();
675			apply_alu_src_mod(a->bc, 0, arg);
676			if (cur_neg && distribute_neg)
677				arg.f = -arg.f;
678
679			if (last_arg == 0) {
680				eval_const_op(op, cr, cr, arg);
681				last_arg = -1;
682				break;
683			}
684
685			if (a == n)
686				cr = arg;
687			else
688				eval_const_op(op, cr, cr, arg);
689
690			if (v1->def) {
691				alu_node *d1 = static_cast<alu_node*>(v1->def);
692				if ((d1->is_alu_op(op) ||
693						(op == ALU_OP2_MUL_IEEE &&
694								d1->is_alu_op(ALU_OP2_MUL))) &&
695						!d1->bc.omod && !d1->bc.clamp &&
696						!a->bc.src[1].abs &&
697						(!a->bc.src[1].neg || allow_neg)) {
698					cur_neg ^= a->bc.src[1].neg;
699					a = d1;
700					continue;
701				}
702			}
703
704			last_arg = 1;
705		}
706
707		break;
708	};
709
710	if (last_arg == -1) {
711		// result is const
712		apply_alu_dst_mod(n->bc, cr);
713
714		if (n->bc.op == op) {
715			convert_to_mov(*n, sh.get_const_value(cr));
716			fold_alu_op1(*n);
717			return true;
718		} else { // MULADD => ADD
719			n->src[0] = n->src[2];
720			n->bc.src[0] = n->bc.src[2];
721			n->src[1] = sh.get_const_value(cr);
722			n->bc.src[1].clear();
723
724			n->src.resize(2);
725			n->bc.set_op(ALU_OP2_ADD);
726		}
727	} else if (last_arg >= 0) {
728		n->src[0] = a->src[last_arg];
729		n->bc.src[0] = a->bc.src[last_arg];
730		n->bc.src[0].neg ^= cur_neg;
731		n->src[1] = sh.get_const_value(cr);
732		n->bc.src[1].clear();
733	}
734
735	return false;
736}
737
738bool expr_handler::fold_alu_op2(alu_node& n) {
739
740	if (n.src.size() < 2)
741		return false;
742
743	unsigned flags = n.bc.op_ptr->flags;
744
745	if (flags & AF_SET) {
746		return fold_setcc(n);
747	}
748
749	if (!sh.safe_math && (flags & AF_M_ASSOC)) {
750		if (fold_assoc(&n))
751			return true;
752	}
753
754	value* v0 = n.src[0]->gvalue();
755	value* v1 = n.src[1]->gvalue();
756
757	assert(v0 && v1);
758
759	// handle some operations with equal args, e.g. x + x => x * 2
760	if (v0 == v1) {
761		if (n.bc.src[0].neg == n.bc.src[1].neg &&
762				n.bc.src[0].abs == n.bc.src[1].abs) {
763			switch (n.bc.op) {
764			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
765			case ALU_OP2_MIN_DX10:
766			case ALU_OP2_MAX:
767			case ALU_OP2_MAX_DX10:
768				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
769				return fold_alu_op1(n);
770			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
771				if (!sh.safe_math) {
772					n.src[1] = sh.get_const_value(2.0f);
773					n.bc.src[1].clear();
774					n.bc.set_op(ALU_OP2_MUL);
775					return fold_alu_op2(n);
776				}
777				break;
778			}
779		}
780		if (n.bc.src[0].neg != n.bc.src[1].neg &&
781				n.bc.src[0].abs == n.bc.src[1].abs) {
782			switch (n.bc.op) {
783			case ALU_OP2_ADD:  // (ADD x, -x) => (MOV 0)
784				if (!sh.safe_math) {
785					convert_to_mov(n, sh.get_const_value(literal(0)));
786					return fold_alu_op1(n);
787				}
788				break;
789			}
790		}
791	}
792
793	if (n.bc.op == ALU_OP2_ADD) {
794		if (fold_mul_add(&n))
795			return true;
796	}
797
798	bool isc0 = v0->is_const();
799	bool isc1 = v1->is_const();
800
801	if (!isc0 && !isc1)
802		return false;
803
804	literal dv, cv0, cv1;
805
806	if (isc0) {
807		cv0 = v0->get_const_value();
808		apply_alu_src_mod(n.bc, 0, cv0);
809	}
810
811	if (isc1) {
812		cv1 = v1->get_const_value();
813		apply_alu_src_mod(n.bc, 1, cv1);
814	}
815
816	if (isc0 && isc1) {
817
818		if (!eval_const_op(n.bc.op, dv, cv0, cv1))
819			return false;
820
821	} else { // one source is const
822
823		if (isc0 && cv0 == literal(0)) {
824			switch (n.bc.op) {
825			case ALU_OP2_ADD:
826			case ALU_OP2_ADD_INT:
827			case ALU_OP2_MAX_UINT:
828			case ALU_OP2_OR_INT:
829			case ALU_OP2_XOR_INT:
830				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
831				return fold_alu_op1(n);
832			case ALU_OP2_AND_INT:
833			case ALU_OP2_ASHR_INT:
834			case ALU_OP2_LSHL_INT:
835			case ALU_OP2_LSHR_INT:
836			case ALU_OP2_MIN_UINT:
837			case ALU_OP2_MUL:
838			case ALU_OP2_MULHI_UINT:
839			case ALU_OP2_MULLO_UINT:
840				convert_to_mov(n, sh.get_const_value(literal(0)));
841				return fold_alu_op1(n);
842			}
843		} else if (isc1 && cv1 == literal(0)) {
844			switch (n.bc.op) {
845			case ALU_OP2_ADD:
846			case ALU_OP2_ADD_INT:
847			case ALU_OP2_ASHR_INT:
848			case ALU_OP2_LSHL_INT:
849			case ALU_OP2_LSHR_INT:
850			case ALU_OP2_MAX_UINT:
851			case ALU_OP2_OR_INT:
852			case ALU_OP2_SUB_INT:
853			case ALU_OP2_XOR_INT:
854				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
855				return fold_alu_op1(n);
856			case ALU_OP2_AND_INT:
857			case ALU_OP2_MIN_UINT:
858			case ALU_OP2_MUL:
859			case ALU_OP2_MULHI_UINT:
860			case ALU_OP2_MULLO_UINT:
861				convert_to_mov(n, sh.get_const_value(literal(0)));
862				return fold_alu_op1(n);
863			}
864		} else if (isc0 && cv0 == literal(1.0f)) {
865			switch (n.bc.op) {
866			case ALU_OP2_MUL:
867			case ALU_OP2_MUL_IEEE:
868				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
869				return fold_alu_op1(n);
870			}
871		} else if (isc1 && cv1 == literal(1.0f)) {
872			switch (n.bc.op) {
873			case ALU_OP2_MUL:
874			case ALU_OP2_MUL_IEEE:
875				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
876				return fold_alu_op1(n);
877			}
878		}
879
880		return false;
881	}
882
883	apply_alu_dst_mod(n.bc, dv);
884	assign_source(n.dst[0], get_const(dv));
885	return true;
886}
887
888bool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
889                                      literal s1, literal s2) {
890
891	unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
892	unsigned cc = alu_cnd_flags & AF_CC_MASK;
893
894	switch (cmp_type) {
895	case AF_FLOAT_CMP: {
896		switch (cc) {
897		case AF_CC_E : return s1.f == s2.f;
898		case AF_CC_GT: return s1.f >  s2.f;
899		case AF_CC_GE: return s1.f >= s2.f;
900		case AF_CC_NE: return s1.f != s2.f;
901		case AF_CC_LT: return s1.f <  s2.f;
902		case AF_CC_LE: return s1.f <= s2.f;
903		default:
904			assert(!"invalid condition code");
905			return false;
906		}
907	}
908	case AF_INT_CMP: {
909		switch (cc) {
910		case AF_CC_E : return s1.i == s2.i;
911		case AF_CC_GT: return s1.i >  s2.i;
912		case AF_CC_GE: return s1.i >= s2.i;
913		case AF_CC_NE: return s1.i != s2.i;
914		case AF_CC_LT: return s1.i <  s2.i;
915		case AF_CC_LE: return s1.i <= s2.i;
916		default:
917			assert(!"invalid condition code");
918			return false;
919		}
920	}
921	case AF_UINT_CMP: {
922		switch (cc) {
923		case AF_CC_E : return s1.u == s2.u;
924		case AF_CC_GT: return s1.u >  s2.u;
925		case AF_CC_GE: return s1.u >= s2.u;
926		case AF_CC_NE: return s1.u != s2.u;
927		case AF_CC_LT: return s1.u <  s2.u;
928		case AF_CC_LE: return s1.u <= s2.u;
929		default:
930			assert(!"invalid condition code");
931			return false;
932		}
933	}
934	default:
935		assert(!"invalid cmp_type");
936		return false;
937	}
938}
939
940bool expr_handler::fold_alu_op3(alu_node& n) {
941
942	if (n.src.size() < 3)
943		return false;
944
945	if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
946		if (fold_assoc(&n))
947			return true;
948		if (n.src.size() < 3)
949			return fold_alu_op2(n);
950	}
951
952	value* v0 = n.src[0]->gvalue();
953	value* v1 = n.src[1]->gvalue();
954	value* v2 = n.src[2]->gvalue();
955
956	/* LDS instructions look like op3 with no dst - don't fold. */
957	if (!n.dst[0])
958		return false;
959	assert(v0 && v1 && v2 && n.dst[0]);
960
961	bool isc0 = v0->is_const();
962	bool isc1 = v1->is_const();
963	bool isc2 = v2->is_const();
964
965	literal dv, cv0, cv1, cv2;
966
967	if (isc0) {
968		cv0 = v0->get_const_value();
969		apply_alu_src_mod(n.bc, 0, cv0);
970	}
971
972	if (isc1) {
973		cv1 = v1->get_const_value();
974		apply_alu_src_mod(n.bc, 1, cv1);
975	}
976
977	if (isc2) {
978		cv2 = v2->get_const_value();
979		apply_alu_src_mod(n.bc, 2, cv2);
980	}
981
982	unsigned flags = n.bc.op_ptr->flags;
983
984	if (flags & AF_CMOV) {
985		int src = 0;
986
987		if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
988			// result doesn't depend on condition, convert to MOV
989			src = 1;
990		} else if (isc0) {
991			// src0 is const, condition can be evaluated, convert to MOV
992			bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
993					AF_CMP_TYPE_MASK), cv0, literal(0));
994			src = cond ? 1 : 2;
995		}
996
997		if (src) {
998			// if src is selected, convert to MOV
999			convert_to_mov(n, n.src[src], n.bc.src[src].neg);
1000			return fold_alu_op1(n);
1001		}
1002	}
1003
1004	// handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
1005	if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
1006			n.bc.op == ALU_OP3_MULADD_IEEE)) {
1007
1008		unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1009				ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1010
1011		if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
1012
1013			alu_node *md = static_cast<alu_node*>(v2->def);
1014			value *mv0 = md->src[0]->gvalue();
1015			value *mv1 = md->src[1]->gvalue();
1016
1017			int es0 = -1, es1;
1018
1019			if (v0 == mv0) {
1020				es0 = 0;
1021				es1 = 0;
1022			} else if (v0 == mv1) {
1023				es0 = 0;
1024				es1 = 1;
1025			} else if (v1 == mv0) {
1026				es0 = 1;
1027				es1 = 0;
1028			} else if (v1 == mv1) {
1029				es0 = 1;
1030				es1 = 1;
1031			}
1032
1033			value *va0 = es0 == 0 ? v1 : v0;
1034			value *va1 = es1 == 0 ? mv1 : mv0;
1035
1036			/* Don't fold if no equal multipliers were found.
1037			 * Also don#t fold if the operands of the to be created ADD are both
1038			 * relatively accessed with different AR values because that would
1039			 * create impossible code.
1040			 */
1041			if (es0 != -1 &&
1042			    (!va0->is_rel() || !va1->is_rel() ||
1043			     (va0->rel == va1->rel))) {
1044
1045				alu_node *add = sh.create_alu();
1046				add->bc.set_op(ALU_OP2_ADD);
1047
1048				add->dst.resize(1);
1049				add->src.resize(2);
1050
1051				value *t = sh.create_temp_value();
1052				t->def = add;
1053				add->dst[0] = t;
1054				add->src[0] = va0;
1055				add->src[1] = va1;
1056				add->bc.src[0] = n.bc.src[!es0];
1057				add->bc.src[1] = md->bc.src[!es1];
1058
1059				add->bc.src[1].neg ^= n.bc.src[2].neg ^
1060						(n.bc.src[es0].neg != md->bc.src[es1].neg);
1061
1062				n.insert_before(add);
1063				vt.add_value(t);
1064
1065				t = t->gvalue();
1066
1067				if (es0 == 1) {
1068					n.src[0] = n.src[1];
1069					n.bc.src[0] = n.bc.src[1];
1070				}
1071
1072				n.src[1] = t;
1073				n.bc.src[1].clear();
1074
1075				n.src.resize(2);
1076
1077				n.bc.set_op(op);
1078				return fold_alu_op2(n);
1079			}
1080		}
1081	}
1082
1083	if (!isc0 && !isc1 && !isc2)
1084		return false;
1085
1086	if (isc0 && isc1 && isc2) {
1087		switch (n.bc.op) {
1088		case ALU_OP3_MULADD_IEEE:
1089		case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1090
1091		// TODO
1092
1093		default:
1094			return false;
1095		}
1096	} else {
1097		if (isc0 && isc1) {
1098			switch (n.bc.op) {
1099			case ALU_OP3_MULADD:
1100			case ALU_OP3_MULADD_IEEE:
1101				dv = cv0.f * cv1.f;
1102				n.bc.set_op(ALU_OP2_ADD);
1103				n.src[0] = sh.get_const_value(dv);
1104				n.bc.src[0].clear();
1105				n.src[1] = n.src[2];
1106				n.bc.src[1] = n.bc.src[2];
1107				n.src.resize(2);
1108				return fold_alu_op2(n);
1109			}
1110		}
1111
1112		if (n.bc.op == ALU_OP3_MULADD) {
1113			if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1114				convert_to_mov(n, n.src[2], n.bc.src[2].neg,  n.bc.src[2].abs);
1115				return fold_alu_op1(n);
1116			}
1117		}
1118
1119		if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1120			unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1121					ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1122
1123			if (isc1 && v0 == v2) {
1124				cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1125				n.src[1] = sh.get_const_value(cv1);
1126				n.bc.src[1].neg = 0;
1127				n.bc.src[1].abs = 0;
1128				n.bc.set_op(op);
1129				n.src.resize(2);
1130				return fold_alu_op2(n);
1131			} else if (isc0 && v1 == v2) {
1132				cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1133				n.src[0] = sh.get_const_value(cv0);
1134				n.bc.src[0].neg = 0;
1135				n.bc.src[0].abs = 0;
1136				n.bc.set_op(op);
1137				n.src.resize(2);
1138				return fold_alu_op2(n);
1139			}
1140		}
1141
1142		return false;
1143	}
1144
1145	apply_alu_dst_mod(n.bc, dv);
1146	assign_source(n.dst[0], get_const(dv));
1147	return true;
1148}
1149
1150unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1151	unsigned ncc = 0;
1152
1153	switch (cc) {
1154	case AF_CC_E: ncc = AF_CC_NE; break;
1155	case AF_CC_NE: ncc = AF_CC_E; break;
1156	case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1157	case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1158	default:
1159		assert(!"unexpected condition code");
1160		break;
1161	}
1162	return ncc;
1163}
1164
1165unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1166
1167	if (int_dst && cmp_type == AF_FLOAT_CMP) {
1168		switch (cc) {
1169		case AF_CC_E: return ALU_OP2_SETE_DX10;
1170		case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1171		case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1172		case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1173		}
1174	} else {
1175
1176		switch(cmp_type) {
1177		case AF_FLOAT_CMP: {
1178			switch (cc) {
1179			case AF_CC_E: return ALU_OP2_SETE;
1180			case AF_CC_NE: return ALU_OP2_SETNE;
1181			case AF_CC_GT: return ALU_OP2_SETGT;
1182			case AF_CC_GE: return ALU_OP2_SETGE;
1183			}
1184			break;
1185		}
1186		case AF_INT_CMP: {
1187			switch (cc) {
1188			case AF_CC_E: return ALU_OP2_SETE_INT;
1189			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1190			case AF_CC_GT: return ALU_OP2_SETGT_INT;
1191			case AF_CC_GE: return ALU_OP2_SETGE_INT;
1192			}
1193			break;
1194		}
1195		case AF_UINT_CMP: {
1196			switch (cc) {
1197			case AF_CC_E: return ALU_OP2_SETE_INT;
1198			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1199			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1200			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1201			}
1202			break;
1203		}
1204		}
1205	}
1206
1207	assert(!"unexpected cc&cmp_type combination");
1208	return ~0u;
1209}
1210
1211unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1212
1213	switch(cmp_type) {
1214	case AF_FLOAT_CMP: {
1215		switch (cc) {
1216		case AF_CC_E: return ALU_OP2_PRED_SETE;
1217		case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1218		case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1219		case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1220		}
1221		break;
1222	}
1223	case AF_INT_CMP: {
1224		switch (cc) {
1225		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1226		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1227		case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1228		case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1229		}
1230		break;
1231	}
1232	case AF_UINT_CMP: {
1233		switch (cc) {
1234		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1235		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1236		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1237		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1238		}
1239		break;
1240	}
1241	}
1242
1243	assert(!"unexpected cc&cmp_type combination");
1244	return ~0u;
1245}
1246
1247unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1248
1249	switch(cmp_type) {
1250	case AF_FLOAT_CMP: {
1251		switch (cc) {
1252		case AF_CC_E: return ALU_OP2_KILLE;
1253		case AF_CC_NE: return ALU_OP2_KILLNE;
1254		case AF_CC_GT: return ALU_OP2_KILLGT;
1255		case AF_CC_GE: return ALU_OP2_KILLGE;
1256		}
1257		break;
1258	}
1259	case AF_INT_CMP: {
1260		switch (cc) {
1261		case AF_CC_E: return ALU_OP2_KILLE_INT;
1262		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1263		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1264		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1265		}
1266		break;
1267	}
1268	case AF_UINT_CMP: {
1269		switch (cc) {
1270		case AF_CC_E: return ALU_OP2_KILLE_INT;
1271		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1272		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1273		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1274		}
1275		break;
1276	}
1277	}
1278
1279	assert(!"unexpected cc&cmp_type combination");
1280	return ~0u;
1281}
1282
1283unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1284
1285	switch(cmp_type) {
1286	case AF_FLOAT_CMP: {
1287		switch (cc) {
1288		case AF_CC_E: return ALU_OP3_CNDE;
1289		case AF_CC_GT: return ALU_OP3_CNDGT;
1290		case AF_CC_GE: return ALU_OP3_CNDGE;
1291		}
1292		break;
1293	}
1294	case AF_INT_CMP: {
1295		switch (cc) {
1296		case AF_CC_E: return ALU_OP3_CNDE_INT;
1297		case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1298		case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1299		}
1300		break;
1301	}
1302	}
1303
1304	assert(!"unexpected cc&cmp_type combination");
1305	return ~0u;
1306}
1307
1308
1309void convert_predset_to_set(shader& sh, alu_node* a) {
1310
1311	unsigned flags = a->bc.op_ptr->flags;
1312	unsigned cc = flags & AF_CC_MASK;
1313	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1314
1315	bool swap_args = false;
1316
1317	cc = invert_setcc_condition(cc, swap_args);
1318
1319	unsigned newop = get_setcc_op(cc, cmp_type, true);
1320
1321	a->dst.resize(1);
1322	a->bc.set_op(newop);
1323
1324	if (swap_args) {
1325		std::swap(a->src[0], a->src[1]);
1326		std::swap(a->bc.src[0], a->bc.src[1]);
1327	}
1328
1329	a->bc.update_exec_mask = 0;
1330	a->bc.update_pred = 0;
1331}
1332
1333} // namespace r600_sb
1334