1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#define PPH_DEBUG 0
28
29#if PPH_DEBUG
30#define PPH_DUMP(q) do { q } while (0)
31#else
32#define PPH_DUMP(q)
33#endif
34
35#include "sb_shader.h"
36#include "sb_pass.h"
37
38namespace r600_sb {
39
40int peephole::run() {
41
42	run_on(sh.root);
43
44	return 0;
45}
46
47void peephole::run_on(container_node* c) {
48
49	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
50		node *n = *I;
51
52		if (n->is_container())
53			run_on(static_cast<container_node*>(n));
54		else {
55			if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) {
56				fetch_node *f = static_cast<fetch_node*>(n);
57				bool has_dst = false;
58
59				for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) {
60					value *v = *I;
61					if (v)
62						has_dst = true;
63				}
64				if (!has_dst)
65					if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET)
66						f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD);
67			}
68			if (n->is_alu_inst()) {
69				alu_node *a = static_cast<alu_node*>(n);
70
71				if (a->bc.op_ptr->flags & AF_LDS) {
72					if (!a->dst[0]) {
73						if (a->bc.op >= LDS_OP2_LDS_ADD_RET && a->bc.op <= LDS_OP3_LDS_MSKOR_RET)
74							a->bc.set_op(a->bc.op - LDS_OP2_LDS_ADD_RET + LDS_OP2_LDS_ADD);
75						if (a->bc.op == LDS_OP1_LDS_READ_RET)
76							a->src[0] = sh.get_undef_value();
77					}
78				} else if (a->bc.op_ptr->flags &
79						(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
80					optimize_cc_op(a);
81				} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
82
83					alu_node *s = a;
84					if (get_bool_flt_to_int_source(s)) {
85						convert_float_setcc(a, s);
86					}
87				}
88			}
89		}
90	}
91}
92
93void peephole::optimize_cc_op(alu_node* a) {
94	unsigned aflags = a->bc.op_ptr->flags;
95
96	if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
97		optimize_cc_op2(a);
98	} else if (aflags & AF_CMOV) {
99		optimize_CNDcc_op(a);
100	}
101}
102
103void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
104	alu_node *ns = sh.clone(s);
105
106	ns->dst[0] = f2i->dst[0];
107	ns->dst[0]->def = ns;
108	ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
109	f2i->insert_after(ns);
110	f2i->remove();
111}
112
113void peephole::optimize_cc_op2(alu_node* a) {
114
115	unsigned flags = a->bc.op_ptr->flags;
116	unsigned cc = flags & AF_CC_MASK;
117
118	if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
119		return;
120
121	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
122	unsigned dst_type = flags & AF_DST_TYPE_MASK;
123
124	int op_kind = (flags & AF_PRED) ? 1 :
125			(flags & AF_SET) ? 2 :
126			(flags & AF_KILL) ? 3 : 0;
127
128	bool swapped = false;
129
130	if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
131		std::swap(a->src[0],a->src[1]);
132		swapped = true;
133		// clear modifiers
134		memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
135		memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
136	}
137
138	if (swapped || (a->src[1]->is_const() &&
139			a->src[1]->literal_value == literal(0))) {
140
141		value *s = a->src[0];
142
143		bool_op_info bop = {};
144
145		PPH_DUMP(
146			sblog << "cc_op2: ";
147			dump::dump_op(a);
148			sblog << "\n";
149		);
150
151		if (!get_bool_op_info(s, bop))
152			return;
153
154		if (cc == AF_CC_E)
155			bop.invert = !bop.invert;
156
157		bool swap_args = false;
158
159		cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
160
161		if (bop.invert)
162			cc = invert_setcc_condition(cc, swap_args);
163
164		if (bop.int_cvt) {
165			assert(cmp_type != AF_FLOAT_CMP);
166			cmp_type = AF_FLOAT_CMP;
167		}
168
169		PPH_DUMP(
170			sblog << "boi node: ";
171			dump::dump_op(bop.n);
172			sblog << " invert: " << bop.invert << "  int_cvt: " << bop.int_cvt;
173			sblog <<"\n";
174		);
175
176		unsigned newop;
177
178		switch(op_kind) {
179		case 1:
180			newop = get_predsetcc_op(cc, cmp_type);
181			break;
182		case 2:
183			newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
184			break;
185		case 3:
186			newop = get_killcc_op(cc, cmp_type);
187			break;
188		default:
189			newop = ALU_OP0_NOP;
190			assert(!"invalid op kind");
191			break;
192		}
193
194		a->bc.set_op(newop);
195
196		if (swap_args) {
197			a->src[0] = bop.n->src[1];
198			a->src[1] = bop.n->src[0];
199			a->bc.src[0] = bop.n->bc.src[1];
200			a->bc.src[1] = bop.n->bc.src[0];
201
202		} else {
203			a->src[0] = bop.n->src[0];
204			a->src[1] = bop.n->src[1];
205			a->bc.src[0] = bop.n->bc.src[0];
206			a->bc.src[1] = bop.n->bc.src[1];
207		}
208	}
209}
210
211void peephole::optimize_CNDcc_op(alu_node* a) {
212	unsigned flags = a->bc.op_ptr->flags;
213	unsigned cc = flags & AF_CC_MASK;
214	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
215	bool swap = false;
216
217	if (cc == AF_CC_E) {
218		swap = !swap;
219		cc = AF_CC_NE;
220	} else if (cc != AF_CC_NE)
221		return;
222
223	value *s = a->src[0];
224
225	bool_op_info bop = {};
226
227	PPH_DUMP(
228		sblog << "cndcc: ";
229		dump::dump_op(a);
230		sblog << "\n";
231	);
232
233	if (!get_bool_op_info(s, bop))
234		return;
235
236	alu_node *d = bop.n;
237
238	if (d->bc.omod)
239		return;
240
241	PPH_DUMP(
242		sblog << "cndcc def: ";
243		dump::dump_op(d);
244		sblog << "\n";
245	);
246
247
248	unsigned dflags = d->bc.op_ptr->flags;
249	unsigned dcc = dflags & AF_CC_MASK;
250	unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
251	unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
252	int nds;
253
254	// TODO we can handle some of these cases,
255	// though probably this shouldn't happen
256	if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
257		return;
258
259	if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
260		nds = 1;
261	else if ((d->src[1]->is_const() &&
262			d->src[1]->literal_value == literal(0)))
263		nds = 0;
264	else
265		return;
266
267	// can't propagate ABS modifier to CNDcc because it's OP3
268	if (d->bc.src[nds].abs)
269		return;
270
271	// TODO we can handle some cases for uint comparison
272	if (dcmp_type == AF_UINT_CMP)
273		return;
274
275	if (dcc == AF_CC_NE) {
276		dcc = AF_CC_E;
277		swap = !swap;
278	}
279
280	if (nds == 1) {
281		switch (dcc) {
282		case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
283		case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
284		default: break;
285		}
286	}
287
288	a->src[0] = d->src[nds];
289	a->bc.src[0] = d->bc.src[nds];
290
291	if (swap) {
292		std::swap(a->src[1], a->src[2]);
293		std::swap(a->bc.src[1], a->bc.src[2]);
294	}
295
296	a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
297
298}
299
300bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
301
302	if (a->bc.op == ALU_OP1_FLT_TO_INT) {
303
304		if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
305			return false;
306
307		value *s = a->src[0];
308		if (!s || !s->def || !s->def->is_alu_inst())
309			return false;
310
311		alu_node *dn = static_cast<alu_node*>(s->def);
312
313		if (dn->is_alu_op(ALU_OP1_TRUNC)) {
314			s = dn->src[0];
315			if (!s || !s->def || !s->def->is_alu_inst())
316				return false;
317
318			if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
319					dn->bc.src[0].rel != 0) {
320				return false;
321			}
322
323			dn = static_cast<alu_node*>(s->def);
324
325		}
326
327		if (dn->bc.op_ptr->flags & AF_SET) {
328			a = dn;
329			return true;
330		}
331	}
332	return false;
333}
334
335bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
336
337	node *d = b->def;
338
339	if (!d || !d->is_alu_inst())
340		return false;
341
342	alu_node *dn = static_cast<alu_node*>(d);
343
344	if (dn->bc.op_ptr->flags & AF_SET) {
345		bop.n = dn;
346
347		if (dn->bc.op_ptr->flags & AF_DX10)
348			bop.int_cvt = true;
349
350		return true;
351	}
352
353	if (get_bool_flt_to_int_source(dn)) {
354		bop.n = dn;
355		bop.int_cvt = true;
356		return true;
357	}
358
359	return false;
360}
361
362} // namespace r600_sb
363