1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#define VT_DEBUG 0
28
29#if VT_DEBUG
30#define VT_DUMP(q) do { q } while (0)
31#else
32#define VT_DUMP(q)
33#endif
34
35#include <cstring>
36
37#include "sb_shader.h"
38#include "sb_pass.h"
39
40namespace r600_sb {
41
42static const char * chans = "xyzw01?_";
43
44sb_ostream& operator << (sb_ostream &o, value &v) {
45
46	bool dead = v.flags & VLF_DEAD;
47
48	if (dead)
49		o << "{";
50
51	switch (v.kind) {
52	case VLK_SPECIAL_REG: {
53		switch (v.select.sel()) {
54			case SV_AR_INDEX: o << "AR"; break;
55			case SV_ALU_PRED: o << "PR"; break;
56			case SV_EXEC_MASK: o << "EM"; break;
57			case SV_VALID_MASK: o << "VM"; break;
58			case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break;
59			case SV_LDS_RW: o << "LDS_RW"; break;
60			case SV_LDS_OQA: o << "LDS_OQA"; break;
61			case SV_LDS_OQB: o << "LDS_OQB"; break;
62			case SV_SCRATCH: o << "SCRATCH"; break;
63			default: o << "???specialreg"; break;
64		}
65		break;
66	}
67
68	case VLK_REG:
69		o << "R" << v.select.sel() << "."
70			<< chans[v.select.chan()];
71
72		break;
73	case VLK_KCACHE: {
74		o << "C" << v.select.sel() << "." << chans[v.select.chan()];
75	}
76		break;
77	case VLK_CONST:
78		o << v.literal_value.f << "|";
79		o.print_zw_hex(v.literal_value.u, 8);
80		break;
81	case VLK_PARAM:
82		o << "Param" << (v.select.sel() - ALU_SRC_PARAM_OFFSET)
83			<< chans[v.select.chan()];
84		break;
85	case VLK_TEMP:
86		o << "t" << v.select.sel() - shader::temp_regid_offset;
87		break;
88	case VLK_REL_REG:
89
90		o << "A" << v.select;
91		o << "[";
92		o << *v.rel;
93		o << "]";
94
95		o << "_" << v.uid;
96
97		break;
98	case VLK_UNDEF:
99		o << "undef";
100		break;
101	default:
102		o << v.kind << "?????";
103		break;
104	}
105
106	if (v.version)
107		o << "." << v.version;
108
109	if (dead)
110		o << "}";
111
112	if (v.is_global())
113		o << "||";
114	if (v.is_fixed())
115		o << "F";
116	if (v.is_prealloc())
117		o << "P";
118
119	sel_chan g;
120
121	if (v.is_rel()) {
122		g = v.array->gpr;
123	} else {
124		g = v.gpr;
125	}
126
127	if (g) {
128		o << "@R" << g.sel() << "." << chans[g.chan()];
129	}
130
131	return o;
132}
133
134void value_table::add_value(value* v) {
135
136	if (v->gvn_source) {
137		return;
138	}
139
140	VT_DUMP(
141		sblog << "gvn add_value ";
142		dump::dump_val(v);
143	);
144
145	value_hash hash = v->hash();
146	vt_item & vti = hashtable[hash & size_mask];
147	vti.push_back(v);
148	++cnt;
149
150	if (v->def && ex.try_fold(v)) {
151		VT_DUMP(
152			sblog << " folded: ";
153			dump::dump_val(v->gvn_source);
154			sblog << "\n";
155		);
156		return;
157	}
158
159	int n = 0;
160	for (vt_item::iterator I = vti.begin(), E = vti.end(); I != E; ++I, ++n) {
161		value *c = *I;
162
163		if (c == v)
164			break;
165
166		if (expr_equal(c, v)) {
167			v->gvn_source = c->gvn_source;
168
169			VT_DUMP(
170				sblog << " found : equal to ";
171				dump::dump_val(v->gvn_source);
172				sblog << "\n";
173			);
174			return;
175		}
176	}
177
178	v->gvn_source = v;
179	VT_DUMP(
180		sblog << " added new\n";
181	);
182}
183
184value_hash value::hash() {
185	if (ghash)
186		return ghash;
187	if (is_rel())
188		ghash = rel_hash();
189	else if (def)
190		ghash = def->hash();
191	else
192		ghash = ((uintptr_t)this) | 1;
193
194	return ghash;
195}
196
197value_hash value::rel_hash() {
198	value_hash h = rel ? rel->hash() : 0;
199	h |= select << 10;
200	h |= array->hash();
201	return h;
202}
203
204bool value_table::expr_equal(value* l, value* r) {
205	return ex.equal(l, r);
206}
207
208void value_table::get_values(vvec& v) {
209	v.resize(cnt);
210
211	vvec::iterator T = v.begin();
212
213	for(vt_table::iterator I = hashtable.begin(), E = hashtable.end();
214			I != E; ++I) {
215		T = std::copy(I->begin(), I->end(), T);
216	}
217}
218
219void value::add_use(node* n) {
220	if (0) {
221	sblog << "add_use ";
222	dump::dump_val(this);
223	sblog << "   =>  ";
224	dump::dump_op(n);
225	}
226	uses.push_back(n);
227}
228
229struct use_node_comp {
230	explicit use_node_comp(const node *n) : n(n) {}
231	bool operator() (const node *o) {
232		return o->hash() == n->hash();
233	}
234
235	private:
236		const node *n;
237};
238
239void value::remove_use(const node *n) {
240	uselist::iterator it =
241		std::find_if(uses.begin(), uses.end(), use_node_comp(n));
242
243	if (it != uses.end())
244	{
245		// We only ever had a pointer, so don't delete it here
246		uses.erase(it);
247	}
248}
249
250unsigned value::use_count() {
251	return uses.size();
252}
253
254bool value::is_global() {
255	if (chunk)
256		return chunk->is_global();
257	return flags & VLF_GLOBAL;
258}
259
260void value::set_global() {
261	assert(is_sgpr());
262	flags |= VLF_GLOBAL;
263	if (chunk)
264		chunk->set_global();
265}
266
267void value::set_prealloc() {
268	assert(is_sgpr());
269	flags |= VLF_PREALLOC;
270	if (chunk)
271		chunk->set_prealloc();
272}
273
274bool value::is_fixed() {
275	if (array && array->gpr)
276		return true;
277	if (chunk && chunk->is_fixed())
278		return true;
279	return flags & VLF_FIXED;
280}
281
282void value::fix() {
283	if (chunk)
284		chunk->fix();
285	flags |= VLF_FIXED;
286}
287
288bool value::is_prealloc() {
289	if (chunk)
290		return chunk->is_prealloc();
291	return flags & VLF_PREALLOC;
292}
293
294void value::delete_uses() {
295	// We only ever had pointers, so don't delete them here
296	uses.erase(uses.begin(), uses.end());
297}
298
299bool value::no_reladdr_conflict_with(value *src)
300{
301	/*  if the register is not relative, it can't create an relative access conflict */
302	if (!src->is_rel())
303		return true;
304
305	/* If the destination is AR then we accept the copy propagation, because the
306	 * scheduler actually re-creates the address loading operation and will
307	 * signal interference if there is an address register load and it will fail
308	 * because of this.
309	 */
310	if (gvalue()->is_AR())
311		return true;
312
313	/* For all nodes that use this value test whether the operation uses another
314	 * relative access with a different address value. If found, signal conflict.
315	 */
316	for (uselist::const_iterator N = uses.begin(); N != uses.end(); ++N) {
317		for (vvec::const_iterator V = (*N)->src.begin(); V != (*N)->src.end(); ++V) {
318			if (*V) {
319				value *v = (*V)->gvalue();
320				if (v != src && v->is_rel() && v->rel != src->rel)
321					return false;
322			}
323		}
324		for (vvec::const_iterator V = (*N)->dst.begin(); V != (*N)->dst.end(); ++V) {
325			if (*V) {
326				value *v = (*V)->gvalue();
327				if (v && v != src && v->is_rel() && (v->rel != src->rel))
328					return false;
329			}
330		}
331	}
332	return true;
333}
334
335void ra_constraint::update_values() {
336	for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
337		assert(!(*I)->constraint);
338		(*I)->constraint = this;
339	}
340}
341
342void* sb_pool::allocate(unsigned sz) {
343	sz = (sz + SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1);
344	assert (sz < (block_size >> 6) && "too big allocation size for sb_pool");
345
346	unsigned offset = total_size % block_size;
347	unsigned capacity = block_size * blocks.size();
348
349	if (total_size + sz > capacity) {
350		total_size = capacity;
351		void * nb = malloc(block_size);
352		blocks.push_back(nb);
353		offset = 0;
354	}
355
356	total_size += sz;
357	return ((char*)blocks.back() + offset);
358}
359
360void sb_pool::free_all() {
361	for (block_vector::iterator I = blocks.begin(), E = blocks.end(); I != E;
362			++I) {
363		free(*I);
364	}
365}
366
367value* sb_value_pool::create(value_kind k, sel_chan regid,
368                             unsigned ver) {
369	void* np = allocate(aligned_elt_size);
370	value *v = new (np) value(size(), k, regid, ver);
371	return v;
372}
373
374void sb_value_pool::delete_all() {
375	unsigned bcnt = blocks.size();
376	unsigned toffset = 0;
377	for (unsigned b = 0; b < bcnt; ++b) {
378		char *bstart = (char*)blocks[b];
379		for (unsigned offset = 0; offset < block_size;
380				offset += aligned_elt_size) {
381			((value*)(bstart + offset))->~value();
382			toffset += aligned_elt_size;
383			if (toffset >= total_size)
384				return;
385		}
386	}
387}
388
389bool sb_bitset::get(unsigned id) {
390	assert(id < bit_size);
391	unsigned w = id / bt_bits;
392	unsigned b = id % bt_bits;
393	return (data[w] >> b) & 1;
394}
395
396void sb_bitset::set(unsigned id, bool bit) {
397	assert(id < bit_size);
398	unsigned w = id / bt_bits;
399	unsigned b = id % bt_bits;
400	if (w >= data.size())
401		data.resize(w + 1);
402
403	if (bit)
404		data[w] |= (1 << b);
405	else
406		data[w] &= ~(1 << b);
407}
408
409inline bool sb_bitset::set_chk(unsigned id, bool bit) {
410	assert(id < bit_size);
411	unsigned w = id / bt_bits;
412	unsigned b = id % bt_bits;
413	basetype d = data[w];
414	basetype dn = (d & ~(1 << b)) | (bit << b);
415	bool r = (d != dn);
416	data[w] = r ? dn : data[w];
417	return r;
418}
419
420void sb_bitset::clear() {
421	std::fill(data.begin(), data.end(), 0);
422}
423
424void sb_bitset::resize(unsigned size) {
425	unsigned cur_data_size = data.size();
426	unsigned new_data_size = (size + bt_bits - 1) / bt_bits;
427
428
429	if (new_data_size != cur_data_size)
430		data.resize(new_data_size);
431
432	// make sure that new bits in the existing word are cleared
433	if (cur_data_size && size > bit_size && bit_size % bt_bits) {
434		basetype clear_mask = (~(basetype)0u) << (bit_size % bt_bits);
435		data[cur_data_size - 1] &= ~clear_mask;
436	}
437
438	bit_size = size;
439}
440
441unsigned sb_bitset::find_bit(unsigned start) {
442	assert(start < bit_size);
443	unsigned w = start / bt_bits;
444	unsigned b = start % bt_bits;
445	unsigned sz = data.size();
446
447	while (w < sz) {
448		basetype d = data[w] >> b;
449		if (d != 0) {
450			unsigned pos = __builtin_ctz(d) + b + w * bt_bits;
451			return pos;
452		}
453
454		b = 0;
455		++w;
456	}
457
458	return bit_size;
459}
460
461sb_value_set::iterator::iterator(shader& sh, sb_value_set* s, unsigned nb)
462	: vp(sh.get_value_pool()), s(s), nb(nb) {}
463
464bool sb_value_set::add_set_checked(sb_value_set& s2) {
465	if (bs.size() < s2.bs.size())
466		bs.resize(s2.bs.size());
467	sb_bitset nbs = bs | s2.bs;
468	if (bs != nbs) {
469		bs.swap(nbs);
470		return true;
471	}
472	return false;
473}
474
475void r600_sb::sb_value_set::remove_set(sb_value_set& s2) {
476	bs.mask(s2.bs);
477}
478
479bool sb_value_set::add_val(value* v) {
480	assert(v);
481	if (bs.size() < v->uid)
482		bs.resize(v->uid + 32);
483
484	return bs.set_chk(v->uid - 1, 1);
485}
486
487bool sb_value_set::remove_vec(vvec& vv) {
488	bool modified = false;
489	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
490		if (*I)
491			modified |= remove_val(*I);
492	}
493	return modified;
494}
495
496void sb_value_set::clear() {
497	bs.clear();
498}
499
500bool sb_value_set::remove_val(value* v) {
501	assert(v);
502	if (bs.size() < v->uid)
503		return false;
504	return bs.set_chk(v->uid - 1, 0);
505}
506
507bool r600_sb::sb_value_set::add_vec(vvec& vv) {
508	bool modified = false;
509	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
510		value *v = *I;
511		if (v)
512			modified |= add_val(v);
513	}
514	return modified;
515}
516
517bool r600_sb::sb_value_set::contains(value* v) {
518	unsigned b = v->uid - 1;
519	if (b < bs.size())
520		return bs.get(b);
521	else
522		return false;
523}
524
525bool sb_value_set::empty() {
526	return bs.size() == 0 || bs.find_bit(0) == bs.size();
527}
528
529void sb_bitset::swap(sb_bitset& bs2) {
530	std::swap(data, bs2.data);
531	std::swap(bit_size, bs2.bit_size);
532}
533
534bool sb_bitset::operator ==(const sb_bitset& bs2) {
535	if (bit_size != bs2.bit_size)
536		return false;
537
538	for (unsigned i = 0, c = data.size(); i < c; ++i) {
539		if (data[i] != bs2.data[i])
540			return false;
541	}
542	return true;
543}
544
545sb_bitset& sb_bitset::operator &=(const sb_bitset& bs2) {
546	if (bit_size > bs2.bit_size) {
547		resize(bs2.bit_size);
548	}
549
550	for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
551			++i) {
552		data[i] &= bs2.data[i];
553	}
554	return *this;
555}
556
557sb_bitset& sb_bitset::mask(const sb_bitset& bs2) {
558	if (bit_size < bs2.bit_size) {
559		resize(bs2.bit_size);
560	}
561
562	for (unsigned i = 0, c = data.size(); i < c;
563			++i) {
564		data[i] &= ~bs2.data[i];
565	}
566	return *this;
567}
568
569bool ra_constraint::check() {
570	assert(kind == CK_SAME_REG);
571
572	unsigned reg = 0;
573
574	for (vvec::iterator I = values.begin(), E = values.end(); I != E; ++I) {
575		value *v = *I;
576		if (!v)
577			continue;
578
579		if (!v->gpr)
580			return false;
581
582		if (reg == 0)
583			reg = v->gpr.sel() + 1;
584		else if (reg != v->gpr.sel() + 1)
585			return false;
586
587		if (v->is_chan_pinned()) {
588			if (v->pin_gpr.chan() != v->gpr.chan())
589				return false;
590		}
591	}
592	return true;
593}
594
595bool gpr_array::is_dead() {
596	return false;
597}
598
599} // namespace r600_sb
600