1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#define FBC_DEBUG 0
28
29#if FBC_DEBUG
30#define FBC_DUMP(q) do { q } while (0)
31#else
32#define FBC_DUMP(q)
33#endif
34
35#include "sb_bc.h"
36#include "sb_shader.h"
37#include "sb_pass.h"
38
39namespace r600_sb {
40
41void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
42
43	alu_group_node *g = sh.create_alu_group();
44	alu_node *a = sh.create_alu();
45
46	a->bc.set_op(ALU_OP0_NOP);
47	a->bc.last = 1;
48
49	g->push_back(a);
50	b4->insert_before(g);
51}
52
53int bc_finalizer::run() {
54
55	run_on(sh.root);
56
57	regions_vec &rv = sh.get_regions();
58	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
59			++I) {
60		region_node *r = *I;
61
62		assert(r);
63
64		bool loop = r->is_loop();
65
66		if (loop)
67			finalize_loop(r);
68		else
69			finalize_if(r);
70
71		r->expand();
72	}
73
74	cf_peephole();
75
76	// workaround for some problems on r6xx/7xx
77	// add ALU NOP to each vertex shader
78	if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
79		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
80
81		alu_group_node *g = sh.create_alu_group();
82
83		alu_node *a = sh.create_alu();
84		a->bc.set_op(ALU_OP0_NOP);
85		a->bc.last = 1;
86
87		g->push_back(a);
88		c->push_back(g);
89
90		sh.root->push_back(c);
91
92		c = sh.create_cf(CF_OP_NOP);
93		sh.root->push_back(c);
94
95		last_cf = c;
96	}
97
98	if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
99		last_cf = sh.create_cf(CF_OP_NOP);
100		sh.root->push_back(last_cf);
101	}
102
103	if (ctx.is_cayman()) {
104		if (!last_cf) {
105			cf_node *c = sh.create_cf(CF_OP_CF_END);
106			sh.root->push_back(c);
107		} else
108			last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
109	} else
110		last_cf->bc.end_of_program = 1;
111
112	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
113		cf_node *le = last_export[t];
114		if (le)
115			le->bc.set_op(CF_OP_EXPORT_DONE);
116	}
117
118	sh.ngpr = ngpr;
119	sh.nstack = nstack;
120	return 0;
121}
122
123void bc_finalizer::finalize_loop(region_node* r) {
124
125	update_nstack(r);
126
127	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
128	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
129
130	// Update last_cf, but don't overwrite it if it's outside the current loop nest since
131	// it may point to a cf that is later in program order.
132	// The single parent level check is sufficient since finalize_loop() is processed in
133	// reverse order from innermost to outermost loop nest level.
134	if (!last_cf || last_cf->get_parent_region() == r) {
135		last_cf = loop_end;
136	}
137
138	loop_start->jump_after(loop_end);
139	loop_end->jump_after(loop_start);
140
141	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
142			I != E; ++I) {
143		depart_node *dep = *I;
144		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
145		loop_break->jump(loop_end);
146		dep->push_back(loop_break);
147		dep->expand();
148	}
149
150	// FIXME produces unnecessary LOOP_CONTINUE
151	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
152			I != E; ++I) {
153		repeat_node *rep = *I;
154		if (!(rep->parent == r && rep->prev == NULL)) {
155			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
156			loop_cont->jump(loop_end);
157			rep->push_back(loop_cont);
158		}
159		rep->expand();
160	}
161
162	r->push_front(loop_start);
163	r->push_back(loop_end);
164}
165
166void bc_finalizer::finalize_if(region_node* r) {
167
168	update_nstack(r);
169
170	// expecting the following control flow structure here:
171	//   - region
172	//     {
173	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
174	//         {
175	//           - if
176	//             {
177	//               - depart/repeat 2 (possibly for outer region)
178	//                 {
179	//                   - some optional code
180	//                 }
181	//             }
182	//           - optional <else> code> ...
183	//         }
184	//     }
185
186	container_node *repdep1 = static_cast<container_node*>(r->first);
187	assert(repdep1->is_depart() || repdep1->is_repeat());
188
189	if_node *n_if = static_cast<if_node*>(repdep1->first);
190
191	if (n_if) {
192
193
194		assert(n_if->is_if());
195
196		container_node *repdep2 = static_cast<container_node*>(n_if->first);
197		assert(repdep2->is_depart() || repdep2->is_repeat());
198
199		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
200		cf_node *if_pop = sh.create_cf(CF_OP_POP);
201
202		if (!last_cf || last_cf->get_parent_region() == r) {
203			last_cf = if_pop;
204		}
205		if_pop->bc.pop_count = 1;
206		if_pop->jump_after(if_pop);
207
208		r->push_front(if_jump);
209		r->push_back(if_pop);
210
211		/* the depart/repeat 1 is actually part of the "else" code.
212		 * if it's a depart for an outer loop region it will want to
213		 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
214		 * to emit the else clause.
215		 */
216		bool has_else = n_if->next;
217
218		if (repdep1->is_depart()) {
219			depart_node *dep1 = static_cast<depart_node*>(repdep1);
220			if (dep1->target != r && dep1->target->is_loop())
221				has_else = true;
222		}
223
224		if (repdep1->is_repeat()) {
225			repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
226			if (rep1->target != r && rep1->target->is_loop())
227				has_else = true;
228		}
229
230		if (has_else) {
231			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
232			n_if->insert_after(nelse);
233			if_jump->jump(nelse);
234			nelse->jump_after(if_pop);
235			nelse->bc.pop_count = 1;
236
237		} else {
238			if_jump->jump_after(if_pop);
239			if_jump->bc.pop_count = 1;
240		}
241
242		n_if->expand();
243	}
244
245	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
246			I != E; ++I) {
247		(*I)->expand();
248	}
249	r->departs.clear();
250	assert(r->repeats.empty());
251}
252
253void bc_finalizer::run_on(container_node* c) {
254	node *prev_node = NULL;
255	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
256		node *n = *I;
257
258		if (n->is_alu_group()) {
259			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
260		} else {
261			if (n->is_alu_clause()) {
262				cf_node *c = static_cast<cf_node*>(n);
263
264				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
265					if (ctx.stack_workaround_8xx) {
266						region_node *r = c->get_parent_region();
267						if (r) {
268							unsigned ifs, loops;
269							unsigned elems = get_stack_depth(r, loops, ifs);
270							unsigned dmod1 = elems % ctx.stack_entry_size;
271							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
272
273							if (elems && (!dmod1 || !dmod2))
274								c->flags |= NF_ALU_STACK_WORKAROUND;
275						}
276					} else if (ctx.stack_workaround_9xx) {
277						region_node *r = c->get_parent_region();
278						if (r) {
279							unsigned ifs, loops;
280							get_stack_depth(r, loops, ifs);
281							if (loops >= 2)
282								c->flags |= NF_ALU_STACK_WORKAROUND;
283						}
284					}
285				}
286				last_cf = c;
287			} else if (n->is_fetch_inst()) {
288				finalize_fetch(static_cast<fetch_node*>(n));
289			} else if (n->is_cf_inst()) {
290				finalize_cf(static_cast<cf_node*>(n));
291			}
292			if (n->is_container())
293				run_on(static_cast<container_node*>(n));
294		}
295		prev_node = n;
296	}
297}
298
299void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
300
301	alu_node *last = NULL;
302	alu_group_node *prev_g = NULL;
303	bool add_nop = false;
304	if (prev_node && prev_node->is_alu_group()) {
305		prev_g = static_cast<alu_group_node*>(prev_node);
306	}
307
308	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
309		alu_node *n = static_cast<alu_node*>(*I);
310		unsigned slot = n->bc.slot;
311		value *d = n->dst.empty() ? NULL : n->dst[0];
312
313		if (d && d->is_special_reg()) {
314			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access() || d->is_scratch());
315			d = NULL;
316		}
317
318		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
319
320		if (d) {
321			assert(fdst.chan() == slot || slot == SLOT_TRANS);
322		}
323
324		if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
325			n->bc.dst_gpr = fdst.sel();
326		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
327
328
329		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
330			n->bc.dst_rel = 1;
331			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
332		} else {
333			n->bc.dst_rel = 0;
334		}
335
336		n->bc.write_mask = d != NULL;
337		n->bc.last = 0;
338
339		if (n->bc.op_ptr->flags & AF_PRED) {
340			n->bc.update_pred = (n->dst[1] != NULL);
341			n->bc.update_exec_mask = (n->dst[2] != NULL);
342		}
343
344		// FIXME handle predication here
345		n->bc.pred_sel = PRED_SEL_OFF;
346
347		update_ngpr(n->bc.dst_gpr);
348
349		add_nop |= finalize_alu_src(g, n, prev_g);
350
351		last = n;
352	}
353
354	if (add_nop) {
355		if (sh.get_ctx().r6xx_gpr_index_workaround) {
356			insert_rv6xx_load_ar_workaround(g);
357		}
358	}
359	last->bc.last = 1;
360}
361
362bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
363	vvec &sv = a->src;
364	bool add_nop = false;
365	FBC_DUMP(
366		sblog << "finalize_alu_src: ";
367		dump::dump_op(a);
368		sblog << "\n";
369	);
370
371	unsigned si = 0;
372
373	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
374		value *v = *I;
375		assert(v);
376
377		bc_alu_src &src = a->bc.src[si];
378		sel_chan sc;
379		src.rel = 0;
380
381		sel_chan gpr;
382
383		switch (v->kind) {
384		case VLK_REL_REG:
385			sc = v->get_final_gpr();
386			src.sel = sc.sel();
387			src.chan = sc.chan();
388			if (!v->rel->is_const()) {
389				src.rel = 1;
390				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
391				if (prev && !add_nop) {
392					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
393						alu_node *pn = static_cast<alu_node*>(*pI);
394						if (pn->bc.dst_gpr == src.sel) {
395							add_nop = true;
396							break;
397						}
398					}
399				}
400			} else
401				src.rel = 0;
402
403			break;
404		case VLK_REG:
405			gpr = v->get_final_gpr();
406			src.sel = gpr.sel();
407			src.chan = gpr.chan();
408			update_ngpr(src.sel);
409			break;
410		case VLK_TEMP:
411			src.sel = v->gpr.sel();
412			src.chan = v->gpr.chan();
413			update_ngpr(src.sel);
414			break;
415		case VLK_UNDEF:
416		case VLK_CONST: {
417			literal lv = v->literal_value;
418			src.chan = 0;
419
420			if (lv == literal(0))
421				src.sel = ALU_SRC_0;
422			else if (lv == literal(0.5f))
423				src.sel = ALU_SRC_0_5;
424			else if (lv == literal(1.0f))
425				src.sel = ALU_SRC_1;
426			else if (lv == literal(1))
427				src.sel = ALU_SRC_1_INT;
428			else if (lv == literal(-1))
429				src.sel = ALU_SRC_M_1_INT;
430			else {
431				src.sel = ALU_SRC_LITERAL;
432				src.chan = g->literal_chan(lv);
433				src.value = lv;
434			}
435			break;
436		}
437		case VLK_KCACHE: {
438			cf_node *clause = static_cast<cf_node*>(g->parent);
439			assert(clause->is_alu_clause());
440			sel_chan k = translate_kcache(clause, v);
441
442			assert(k && "kcache translation failed");
443
444			src.sel = k.sel();
445			src.chan = k.chan();
446			break;
447		}
448		case VLK_SPECIAL_REG:
449			if (v->select.sel() == SV_LDS_OQA) {
450				src.sel = ALU_SRC_LDS_OQ_A_POP;
451				src.chan = 0;
452			} else if (v->select.sel() == SV_LDS_OQB) {
453				src.sel = ALU_SRC_LDS_OQ_B_POP;
454				src.chan = 0;
455			} else {
456				src.sel = ALU_SRC_0;
457				src.chan = 0;
458			}
459			break;
460		case VLK_PARAM:
461		case VLK_SPECIAL_CONST:
462			src.sel = v->select.sel();
463			src.chan = v->select.chan();
464			break;
465		default:
466			assert(!"unknown value kind");
467			break;
468		}
469		if (prev && !add_nop) {
470			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
471				alu_node *pn = static_cast<alu_node*>(*pI);
472				if (pn->bc.dst_rel) {
473					if (pn->bc.dst_gpr == src.sel) {
474						add_nop = true;
475						break;
476					}
477				}
478			}
479		}
480	}
481
482	while (si < 3) {
483		a->bc.src[si++].sel = 0;
484	}
485	return add_nop;
486}
487
488void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
489{
490	int reg = -1;
491
492	for (unsigned chan = 0; chan < 4; ++chan) {
493
494		dst.bc.dst_sel[chan] = SEL_MASK;
495
496		unsigned sel = SEL_MASK;
497
498		value *v = src.src[arg_start + chan];
499
500		if (!v || v->is_undef()) {
501			sel = SEL_MASK;
502		} else if (v->is_const()) {
503			literal l = v->literal_value;
504			if (l == literal(0))
505				sel = SEL_0;
506			else if (l == literal(1.0f))
507				sel = SEL_1;
508			else {
509				sblog << "invalid fetch constant operand  " << chan << " ";
510				dump::dump_op(&src);
511				sblog << "\n";
512				abort();
513			}
514
515		} else if (v->is_any_gpr()) {
516			unsigned vreg = v->gpr.sel();
517			unsigned vchan = v->gpr.chan();
518
519			if (reg == -1)
520				reg = vreg;
521			else if ((unsigned)reg != vreg) {
522				sblog << "invalid fetch source operand  " << chan << " ";
523				dump::dump_op(&src);
524				sblog << "\n";
525				abort();
526			}
527
528			sel = vchan;
529
530		} else {
531			sblog << "invalid fetch source operand  " << chan << " ";
532			dump::dump_op(&src);
533			sblog << "\n";
534			abort();
535		}
536
537		dst.bc.src_sel[chan] = sel;
538	}
539
540	if (reg >= 0)
541		update_ngpr(reg);
542
543	dst.bc.src_gpr = reg >= 0 ? reg : 0;
544}
545
546void bc_finalizer::emit_set_grad(fetch_node* f) {
547
548	assert(f->src.size() == 12 || f->src.size() == 13);
549	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
550
551	unsigned arg_start = 0;
552
553	for (unsigned op = 0; op < 2; ++op) {
554		fetch_node *n = sh.create_fetch();
555		n->bc.set_op(ops[op]);
556
557		arg_start += 4;
558
559		copy_fetch_src(*n, *f, arg_start);
560
561		f->insert_before(n);
562	}
563
564}
565
566void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
567	assert(f.src.size() == 8);
568
569	fetch_node *n = sh.create_fetch();
570
571	n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
572
573	copy_fetch_src(*n, f, 4);
574
575	f.insert_before(n);
576}
577
578void bc_finalizer::finalize_fetch(fetch_node* f) {
579
580	int reg = -1;
581
582	// src
583
584	unsigned src_count = 4;
585
586	unsigned flags = f->bc.op_ptr->flags;
587
588	if (flags & FF_VTX) {
589		src_count = 1;
590	} else if (flags & FF_GDS) {
591		src_count = 2;
592	} else if (flags & FF_USEGRAD) {
593		emit_set_grad(f);
594	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
595		emit_set_texture_offsets(*f);
596	}
597
598	for (unsigned chan = 0; chan < src_count; ++chan) {
599
600		unsigned sel = f->bc.src_sel[chan];
601
602		if (sel > SEL_W)
603			continue;
604
605		value *v = f->src[chan];
606
607		if (v->is_undef()) {
608			sel = SEL_MASK;
609		} else if (v->is_const()) {
610			literal l = v->literal_value;
611			if (l == literal(0))
612				sel = SEL_0;
613			else if (l == literal(1.0f))
614				sel = SEL_1;
615			else {
616				sblog << "invalid fetch constant operand  " << chan << " ";
617				dump::dump_op(f);
618				sblog << "\n";
619				abort();
620			}
621
622		} else if (v->is_any_gpr()) {
623			unsigned vreg = v->gpr.sel();
624			unsigned vchan = v->gpr.chan();
625
626			if (reg == -1)
627				reg = vreg;
628			else if ((unsigned)reg != vreg) {
629				sblog << "invalid fetch source operand  " << chan << " ";
630				dump::dump_op(f);
631				sblog << "\n";
632				abort();
633			}
634
635			sel = vchan;
636
637		} else {
638			sblog << "invalid fetch source operand  " << chan << " ";
639			dump::dump_op(f);
640			sblog << "\n";
641			abort();
642		}
643
644		f->bc.src_sel[chan] = sel;
645	}
646
647	if (reg >= 0)
648		update_ngpr(reg);
649
650	f->bc.src_gpr = reg >= 0 ? reg : 0;
651
652	// dst
653
654	reg = -1;
655
656	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
657
658	for (unsigned chan = 0; chan < 4; ++chan) {
659
660		unsigned sel = f->bc.dst_sel[chan];
661
662		if (sel == SEL_MASK)
663			continue;
664
665		value *v = f->dst[chan];
666		if (!v)
667			continue;
668
669		if (v->is_any_gpr()) {
670			unsigned vreg = v->gpr.sel();
671			unsigned vchan = v->gpr.chan();
672
673			if (reg == -1)
674				reg = vreg;
675			else if ((unsigned)reg != vreg) {
676				sblog << "invalid fetch dst operand  " << chan << " ";
677				dump::dump_op(f);
678				sblog << "\n";
679				abort();
680			}
681
682			dst_swz[vchan] = sel;
683
684		} else {
685			sblog << "invalid fetch dst operand  " << chan << " ";
686			dump::dump_op(f);
687			sblog << "\n";
688			abort();
689		}
690
691	}
692
693	for (unsigned i = 0; i < 4; ++i)
694		f->bc.dst_sel[i] = dst_swz[i];
695
696	if ((flags & FF_GDS) && reg == -1) {
697		f->bc.dst_sel[0] = SEL_MASK;
698		f->bc.dst_gpr = 0;
699		return ;
700	}
701	assert(reg >= 0);
702
703	if (reg >= 0)
704		update_ngpr(reg);
705
706	f->bc.dst_gpr = reg >= 0 ? reg : 0;
707}
708
709void bc_finalizer::finalize_cf(cf_node* c) {
710
711	unsigned flags = c->bc.op_ptr->flags;
712
713	c->bc.end_of_program = 0;
714	last_cf = c;
715
716	if (flags & CF_EXP) {
717		c->bc.set_op(CF_OP_EXPORT);
718		last_export[c->bc.type] = c;
719
720		int reg = -1;
721
722		for (unsigned chan = 0; chan < 4; ++chan) {
723
724			unsigned sel = c->bc.sel[chan];
725
726			if (sel > SEL_W)
727				continue;
728
729			value *v = c->src[chan];
730
731			if (v->is_undef()) {
732				sel = SEL_MASK;
733			} else if (v->is_const()) {
734				literal l = v->literal_value;
735				if (l == literal(0))
736					sel = SEL_0;
737				else if (l == literal(1.0f))
738					sel = SEL_1;
739				else {
740					sblog << "invalid export constant operand  " << chan << " ";
741					dump::dump_op(c);
742					sblog << "\n";
743					abort();
744				}
745
746			} else if (v->is_any_gpr()) {
747				unsigned vreg = v->gpr.sel();
748				unsigned vchan = v->gpr.chan();
749
750				if (reg == -1)
751					reg = vreg;
752				else if ((unsigned)reg != vreg) {
753					sblog << "invalid export source operand  " << chan << " ";
754					dump::dump_op(c);
755					sblog << "\n";
756					abort();
757				}
758
759				sel = vchan;
760
761			} else {
762				sblog << "invalid export source operand  " << chan << " ";
763				dump::dump_op(c);
764				sblog << "\n";
765				abort();
766			}
767
768			c->bc.sel[chan] = sel;
769		}
770
771		if (reg >= 0)
772			update_ngpr(reg);
773
774		c->bc.rw_gpr = reg >= 0 ? reg : 0;
775
776	} else if (flags & CF_MEM) {
777
778		int reg = -1;
779		unsigned mask = 0;
780
781
782		for (unsigned chan = 0; chan < 4; ++chan) {
783			value *v;
784			if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH &&
785			    (c->bc.type == 2 || c->bc.type == 3))
786				v = c->dst[chan];
787			else
788				v = c->src[chan];
789
790			if (!v || v->is_undef())
791				continue;
792
793			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
794				sblog << "invalid source operand  " << chan << " ";
795				dump::dump_op(c);
796				sblog << "\n";
797				abort();
798			}
799			unsigned vreg = v->gpr.sel();
800			if (reg == -1)
801				reg = vreg;
802			else if ((unsigned)reg != vreg) {
803				sblog << "invalid source operand  " << chan << " ";
804				dump::dump_op(c);
805				sblog << "\n";
806				abort();
807			}
808
809			mask |= (1 << chan);
810		}
811
812		if (reg >= 0)
813			update_ngpr(reg);
814
815		c->bc.rw_gpr = reg >= 0 ? reg : 0;
816		c->bc.comp_mask = mask;
817
818		if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
819
820			reg = -1;
821
822			for (unsigned chan = 0; chan < 4; ++chan) {
823				value *v = c->src[4 + chan];
824				if (!v || v->is_undef())
825					continue;
826
827				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
828					sblog << "invalid source operand  " << chan << " ";
829					dump::dump_op(c);
830					sblog << "\n";
831					abort();
832				}
833				unsigned vreg = v->gpr.sel();
834				if (reg == -1)
835					reg = vreg;
836				else if ((unsigned)reg != vreg) {
837					sblog << "invalid source operand  " << chan << " ";
838					dump::dump_op(c);
839					sblog << "\n";
840					abort();
841				}
842			}
843
844			assert(reg >= 0);
845
846			if (reg >= 0)
847				update_ngpr(reg);
848
849			c->bc.index_gpr = reg >= 0 ? reg : 0;
850		}
851	} else if (flags & CF_CALL) {
852		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
853	}
854}
855
856sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
857	unsigned sel = v->select.kcache_sel();
858	unsigned bank = v->select.kcache_bank();
859	unsigned chan = v->select.chan();
860	static const unsigned kc_base[] = {128, 160, 256, 288};
861
862	sel &= 4095;
863
864	unsigned line = sel >> 4;
865
866	for (unsigned k = 0; k < 4; ++k) {
867		bc_kcache &kc = alu->bc.kc[k];
868
869		if (kc.mode == KC_LOCK_NONE)
870			break;
871
872		if (kc.bank == bank && (kc.addr == line ||
873				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
874
875			sel = kc_base[k] + (sel - (kc.addr << 4));
876
877			return sel_chan(sel, chan);
878		}
879	}
880
881	assert(!"kcache translation error");
882	return 0;
883}
884
885void bc_finalizer::update_ngpr(unsigned gpr) {
886	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
887		ngpr = gpr + 1;
888}
889
890unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
891                                           unsigned &ifs, unsigned add) {
892	unsigned stack_elements = add;
893	bool has_non_wqm_push = (add != 0);
894	region_node *r = n->is_region() ?
895			static_cast<region_node*>(n) : n->get_parent_region();
896
897	loops = 0;
898	ifs = 0;
899
900	while (r) {
901		if (r->is_loop()) {
902			++loops;
903		} else {
904			++ifs;
905			has_non_wqm_push = true;
906		}
907		r = r->get_parent_region();
908	}
909	stack_elements += (loops * ctx.stack_entry_size) + ifs;
910
911	// reserve additional elements in some cases
912	switch (ctx.hw_class) {
913	case HW_CLASS_R600:
914	case HW_CLASS_R700:
915		// If any non-WQM push is invoked, 2 elements should be reserved.
916		if (has_non_wqm_push)
917			stack_elements += 2;
918		break;
919	case HW_CLASS_CAYMAN:
920		// If any stack operation is invoked, 2 elements should be reserved
921		if (stack_elements)
922			stack_elements += 2;
923		break;
924	case HW_CLASS_EVERGREEN:
925		// According to the docs we need to reserve 1 element for each of the
926		// following cases:
927		//   1) non-WQM push is used with WQM/LOOP frames on stack
928		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
929		// NOTE:
930		// It was found that the conditions above are not sufficient, there are
931		// other cases where we also need to reserve stack space, that's why
932		// we always reserve 1 stack element if we have non-WQM push on stack.
933		// Condition 2 is ignored for now because we don't use this instruction.
934		if (has_non_wqm_push)
935			++stack_elements;
936		break;
937	case HW_CLASS_UNKNOWN:
938		assert(0);
939	}
940	return stack_elements;
941}
942
943void bc_finalizer::update_nstack(region_node* r, unsigned add) {
944	unsigned loops = 0;
945	unsigned ifs = 0;
946	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
947
948	// XXX all chips expect this value to be computed using 4 as entry size,
949	// not the real entry size
950	unsigned stack_entries = (elems + 3) >> 2;
951
952	if (nstack < stack_entries)
953		nstack = stack_entries;
954}
955
956void bc_finalizer::cf_peephole() {
957	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
958		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
959				I = N) {
960			N = I; ++N;
961			cf_node *c = static_cast<cf_node*>(*I);
962
963			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
964					(c->flags & NF_ALU_STACK_WORKAROUND)) {
965				cf_node *push = sh.create_cf(CF_OP_PUSH);
966				c->insert_before(push);
967				push->jump(c);
968				c->bc.set_op(CF_OP_ALU);
969			}
970		}
971	}
972
973	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
974			I = N) {
975		N = I; ++N;
976
977		cf_node *c = static_cast<cf_node*>(*I);
978
979		if (c->jump_after_target) {
980			if (c->jump_target->next == NULL) {
981				c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
982				if (last_cf == c->jump_target)
983					last_cf = static_cast<cf_node*>(c->jump_target->next);
984			}
985			c->jump_target = static_cast<cf_node*>(c->jump_target->next);
986			c->jump_after_target = false;
987		}
988
989		if (c->is_cf_op(CF_OP_POP)) {
990			node *p = c->prev;
991			if (p->is_alu_clause()) {
992				cf_node *a = static_cast<cf_node*>(p);
993
994				if (a->bc.op == CF_OP_ALU) {
995					a->bc.set_op(CF_OP_ALU_POP_AFTER);
996					c->remove();
997				}
998			}
999		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
1000			// if JUMP is immediately followed by its jump target,
1001			// then JUMP is useless and we can eliminate it
1002			c->remove();
1003		}
1004	}
1005}
1006
1007} // namespace r600_sb
1008