1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#define RA_DEBUG 0
28
29#if RA_DEBUG
30#define RA_DUMP(q) do { q } while (0)
31#else
32#define RA_DUMP(q)
33#endif
34
35#include "sb_shader.h"
36#include "sb_pass.h"
37
38namespace r600_sb {
39
40int ra_coalesce::run() {
41	return sh.coal.run();
42}
43
44void coalescer::add_edge(value* a, value* b, unsigned cost) {
45	assert(a->is_sgpr() && b->is_sgpr());
46	edges.insert(new ra_edge(a,b, cost));
47}
48
49void coalescer::create_chunk(value *v) {
50
51	assert(v->is_sgpr());
52
53	ra_chunk *c = new ra_chunk();
54
55	c->values.push_back(v);
56
57	if (v->is_chan_pinned())
58		c->flags |= RCF_PIN_CHAN;
59	if (v->is_reg_pinned()) {
60		c->flags |= RCF_PIN_REG;
61	}
62
63	c->pin = v->pin_gpr;
64
65	RA_DUMP(
66		sblog << "create_chunk: ";
67		dump_chunk(c);
68	);
69
70	all_chunks.push_back(c);
71	v->chunk = c;
72
73}
74
75void coalescer::unify_chunks(ra_edge *e) {
76	ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
77
78	RA_DUMP(
79		sblog << "unify_chunks: ";
80		dump_chunk(c1);
81		dump_chunk(c2);
82	);
83
84	if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
85		c1->flags |= RCF_PIN_CHAN;
86		c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
87	}
88
89	if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
90		c1->flags |= RCF_PIN_REG;
91		c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
92	}
93
94	c1->values.reserve(c1->values.size() + c2->values.size());
95
96	for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
97			++I) {
98		(*I)->chunk = c1;
99		c1->values.push_back(*I);
100	}
101
102	chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
103	assert(F != all_chunks.end());
104
105	all_chunks.erase(F);
106
107	c1->cost += c2->cost + e->cost;
108	delete c2;
109}
110
111bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
112	unsigned pin_flags = (c1->flags & c2->flags) &
113			(RCF_PIN_CHAN | RCF_PIN_REG);
114
115	if ((pin_flags & RCF_PIN_CHAN) &&
116			c1->pin.chan() != c2->pin.chan())
117		return true;
118
119	if ((pin_flags & RCF_PIN_REG) &&
120			c1->pin.sel() != c2->pin.sel())
121		return true;
122
123	for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
124			++I) {
125		value *v1 = *I;
126
127		for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
128				++I) {
129			value *v2 = *I;
130
131			if (!v1->v_equal(v2) && v1->interferences.contains(v2))
132				return true;
133		}
134	}
135	return false;
136}
137
138void coalescer::build_chunks() {
139
140	for (edge_queue::iterator I = edges.begin(), E = edges.end();
141			I != E; ++I) {
142
143		ra_edge *e = *I;
144
145		if (!e->a->chunk)
146			create_chunk(e->a);
147
148		if (!e->b->chunk)
149			create_chunk(e->b);
150
151		ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
152
153		if (c1 == c2) {
154			c1->cost += e->cost;
155		} else if (!chunks_interference(c1, c2))
156			unify_chunks(e);
157	}
158}
159
160ra_constraint* coalescer::create_constraint(constraint_kind kind) {
161	ra_constraint *c = new ra_constraint(kind);
162	all_constraints.push_back(c);
163	return c;
164}
165
166void coalescer::dump_edges() {
167	sblog << "######## affinity edges\n";
168
169	for (edge_queue::iterator I = edges.begin(), E = edges.end();
170			I != E; ++I) {
171		ra_edge* e = *I;
172		sblog << "  ra_edge ";
173		dump::dump_val(e->a);
174		sblog << " <-> ";
175		dump::dump_val(e->b);
176		sblog << "   cost = " << e->cost << "\n";
177	}
178}
179
180void coalescer::dump_chunks() {
181	sblog << "######## chunks\n";
182
183	for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
184			I != E; ++I) {
185		ra_chunk* c = *I;
186		dump_chunk(c);
187	}
188}
189
190
191void coalescer::dump_constraint_queue() {
192	sblog << "######## constraints\n";
193
194	for (constraint_queue::iterator I = constraints.begin(),
195			E = constraints.end(); I != E; ++I) {
196		ra_constraint* c = *I;
197		dump_constraint(c);
198	}
199}
200
201void coalescer::dump_chunk(ra_chunk* c) {
202	sblog << "  ra_chunk cost = " << c->cost << "  :  ";
203	dump::dump_vec(c->values);
204
205	if (c->flags & RCF_PIN_REG)
206		sblog << "   REG = " << c->pin.sel();
207
208	if (c->flags & RCF_PIN_CHAN)
209		sblog << "   CHAN = " << c->pin.chan();
210
211	sblog << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
212
213	sblog << "\n";
214}
215
216void coalescer::dump_constraint(ra_constraint* c) {
217	sblog << "  ra_constraint: ";
218	switch (c->kind) {
219		case CK_PACKED_BS: sblog << "PACKED_BS"; break;
220		case CK_PHI: sblog << "PHI"; break;
221		case CK_SAME_REG: sblog << "SAME_REG"; break;
222		default: sblog << "UNKNOWN_KIND"; assert(0); break;
223	}
224
225	sblog << "  cost = " << c->cost << "  : ";
226	dump::dump_vec(c->values);
227
228	sblog << "\n";
229}
230
231void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
232
233	for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
234			++I) {
235		value *v = *I;
236		s.add_set(v->interferences);
237	}
238	s.remove_vec(c->values);
239}
240
241void coalescer::build_chunk_queue() {
242	for (chunk_vec::iterator I = all_chunks.begin(),
243			E = all_chunks.end(); I != E; ++I) {
244		ra_chunk *c = *I;
245
246		if (!c->is_fixed())
247			chunks.insert(c);
248	}
249}
250
251void coalescer::build_constraint_queue() {
252	for (constraint_vec::iterator I = all_constraints.begin(),
253			E = all_constraints.end(); I != E; ++I) {
254		ra_constraint *c = *I;
255		unsigned cost = 0;
256
257		if (c->values.empty() || !c->values.front()->is_sgpr())
258			continue;
259
260		if (c->kind != CK_SAME_REG)
261			continue;
262
263		for (vvec::iterator I = c->values.begin(), E = c->values.end();
264				I != E; ++I) {
265			value *v = *I;
266			if (!v->chunk)
267				create_chunk(v);
268			else
269				cost += v->chunk->cost;
270		}
271		c->cost = cost;
272		constraints.insert(c);
273	}
274}
275
276void coalescer::color_chunks() {
277
278	for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
279			I != E; ++I) {
280		ra_chunk *c = *I;
281		if (c->is_fixed() || c->values.size() == 1)
282			continue;
283
284		sb_bitset rb;
285		val_set interf;
286
287		get_chunk_interferences(c, interf);
288
289		RA_DUMP(
290			sblog << "color_chunks: ";
291			dump_chunk(c);
292			sblog << "\n interferences: ";
293			dump::dump_set(sh,interf);
294			sblog << "\n";
295		);
296
297		init_reg_bitset(rb, interf);
298
299		unsigned pass = c->is_reg_pinned() ? 0 : 1;
300
301		unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
302		unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
303
304		unsigned color = 0;
305
306		while (pass < 2) {
307
308			unsigned rs, re;
309
310			if (pass == 0) {
311				rs = c->pin.sel();
312				re = rs + 1;
313			} else {
314				rs = 0;
315				re = sh.num_nontemp_gpr();
316			}
317
318			for (unsigned reg = rs; reg < re; ++reg) {
319				for (unsigned chan = cs; chan < ce; ++chan) {
320					unsigned bit = sel_chan(reg, chan);
321					if (bit >= rb.size() || !rb.get(bit)) {
322						color = bit;
323						break;
324					}
325				}
326				if (color)
327					break;
328			}
329
330			if (color)
331				break;
332
333			++pass;
334		}
335
336		assert(color);
337		color_chunk(c, color);
338	}
339}
340
341void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
342
343	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
344		value *v = *I;
345
346		if (!v->is_any_gpr())
347			continue;
348
349		unsigned gpr = v->get_final_gpr();
350		if (!gpr)
351			continue;
352
353		if (gpr) {
354			if (gpr >= bs.size())
355				bs.resize(gpr + 64);
356			bs.set(gpr, 1);
357		}
358	}
359}
360
361void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
362
363	vvec vv = c->values;
364
365	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
366			++I) {
367		value *v = *I;
368
369		if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
370			detach_value(v);
371			continue;
372		}
373
374		if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
375			detach_value(v);
376			continue;
377		}
378
379		v->gpr = color;
380
381		if (v->constraint && v->constraint->kind == CK_PHI)
382			v->fix();
383
384
385		RA_DUMP(
386			sblog << " assigned " << color << " to ";
387			dump::dump_val(v);
388			sblog << "\n";
389		);
390	}
391
392	c->pin = color;
393
394	if (c->is_reg_pinned()) {
395		c->fix();
396	}
397}
398
399coalescer::~coalescer() {
400
401	// FIXME use pool allocator ??
402
403	for (constraint_vec::iterator I = all_constraints.begin(),
404			E = all_constraints.end(); I != E; ++I) {
405		delete (*I);
406	}
407
408	for (chunk_vec::iterator I = all_chunks.begin(),
409			E = all_chunks.end(); I != E; ++I) {
410		delete (*I);
411	}
412
413	for (edge_queue::iterator I = edges.begin(), E = edges.end();
414			I != E; ++I) {
415		delete (*I);
416	}
417}
418
419int coalescer::run() {
420	int r;
421
422	RA_DUMP( dump_edges(); );
423
424	build_chunks();
425	RA_DUMP( dump_chunks(); );
426
427	build_constraint_queue();
428	RA_DUMP( dump_constraint_queue(); );
429
430	if ((r = color_constraints()))
431		return r;
432
433	build_chunk_queue();
434	color_chunks();
435
436	return 0;
437}
438
439void coalescer::color_phi_constraint(ra_constraint* c) {
440}
441
442ra_chunk* coalescer::detach_value(value *v) {
443
444	vvec::iterator F = std::find(v->chunk->values.begin(),
445	                             v->chunk->values.end(), v);
446
447	assert(F != v->chunk->values.end());
448	v->chunk->values.erase(F);
449	create_chunk(v);
450
451	if (v->is_reg_pinned()) {
452		v->chunk->fix();
453	}
454
455	RA_DUMP(
456		sblog << "           detached : ";
457		dump_chunk(v->chunk);
458	);
459
460	return v->chunk;
461
462}
463
464int coalescer::color_reg_constraint(ra_constraint *c) {
465	unsigned k, cnt = c->values.size();
466	vvec & cv = c->values;
467
468	ra_chunk *ch[4];
469	unsigned swz[4] = {0, 1, 2, 3};
470	val_set interf[4];
471	sb_bitset rb[4];
472
473	bool reg_pinned = false;
474	unsigned pin_reg = ~0;
475
476	unsigned chan_mask = 0;
477
478	k = 0;
479	for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
480		value *v = *I;
481
482		if (!v->chunk)
483			create_chunk(v);
484
485		ch[k] = v->chunk;
486
487		if (v->chunk->is_chan_pinned()) {
488			unsigned chan = 1 << v->chunk->pin.chan();
489
490			if (chan & chan_mask) { // channel already in use
491				ch[k] = detach_value(v);
492				assert(!ch[k]->is_chan_pinned());
493			} else {
494				chan_mask |= chan;
495			}
496		}
497
498		if (v->chunk->is_reg_pinned()) {
499			if (!reg_pinned) {
500				reg_pinned = true;
501				pin_reg = v->chunk->pin.sel();
502			}
503		}
504
505		get_chunk_interferences(ch[k], interf[k]);
506		init_reg_bitset(rb[k], interf[k]);
507	}
508
509	unsigned start_reg, end_reg;
510
511	start_reg = 0;
512	end_reg = sh.num_nontemp_gpr();
513
514	unsigned min_reg = end_reg;
515	unsigned min_swz[4];
516	unsigned i, pass = reg_pinned ? 0 : 1;
517
518	bool done = false;
519
520	while (pass < 2) {
521
522		unsigned rs, re;
523
524		if (pass == 0) {
525			re = pin_reg + 1;
526			rs = pin_reg;
527		} else {
528			re = end_reg;
529			rs = start_reg;
530		}
531
532		min_reg = re;
533
534		// cycle on swizzle combinations
535		do {
536			for (i = 0; i < cnt; ++i) {
537				if (ch[i]->flags & RCF_PIN_CHAN)
538					if (ch[i]->pin.chan() != swz[i])
539						break;
540			}
541			if (i != cnt)
542				continue;
543
544			// looking for minimal reg number such that the constrained chunks
545			// may be colored with the current swizzle combination
546			for (unsigned reg = rs; reg < min_reg; ++reg) {
547				for (i = 0; i < cnt; ++i) {
548					unsigned bit = sel_chan(reg, swz[i]);
549					if (bit < rb[i].size() && rb[i].get(bit))
550						break;
551				}
552				if (i == cnt) {
553					done = true;
554					min_reg = reg;
555					std::copy(swz, swz + 4, min_swz);
556					break;
557				}
558			}
559
560			if (pass == 0 && done)
561				break;
562
563		} while (std::next_permutation(swz, swz + 4));
564
565		if (!done && pass) {
566			sblog << "sb: ra_coalesce - out of registers\n";
567			return -1;
568		}
569
570		if (pass == 0 && done)
571			break;
572
573		++pass;
574	};
575
576	assert(done);
577
578	RA_DUMP(
579	sblog << "min reg = " << min_reg << "   min_swz = "
580			<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
581	);
582
583	for (i = 0; i < cnt; ++i) {
584		sel_chan color(min_reg, min_swz[i]);
585		ra_chunk *cc = ch[i];
586
587		if (cc->is_fixed()) {
588			if (cc->pin != color)
589				cc = detach_value(cv[i]);
590			else
591				continue;
592		}
593
594		color_chunk(cc, color);
595		cc->fix();
596		cc->set_prealloc();
597	}
598
599	return 0;
600}
601
602int coalescer::color_constraints() {
603	int r;
604
605	for (constraint_queue::iterator I = constraints.begin(),
606			E = constraints.end(); I != E; ++I) {
607
608		ra_constraint *c = *I;
609
610		RA_DUMP(
611			sblog << "color_constraints: ";
612			dump_constraint(c);
613		);
614
615		if (c->kind == CK_SAME_REG) {
616			if ((r = color_reg_constraint(c)))
617				return r;
618		} else if (c->kind == CK_PHI)
619			color_phi_constraint(c);
620	}
621	return 0;
622}
623
624} // namespace r600_sb
625