1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#include "sb_bc.h"
28#include "sb_shader.h"
29#include "sb_pass.h"
30#include "eg_sq.h" // V_SQ_CF_INDEX_0/1
31
32namespace r600_sb {
33
34static const char* chans = "xyzw01?_";
35
36static const char* vec_bs[] = {
37		"VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210"
38};
39
40static const char* scl_bs[] = {
41		"SCL_210", "SCL_122", "SCL_212", "SCL_221"
42};
43
44
45bool bc_dump::visit(cf_node& n, bool enter) {
46	if (enter) {
47
48		id = n.bc.id << 1;
49
50		if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) {
51			dump_dw(id, 2);
52			id += 2;
53			sblog << "\n";
54		}
55
56		dump_dw(id, 2);
57		dump(n);
58
59		if (n.bc.op_ptr->flags & CF_CLAUSE) {
60			id = n.bc.addr << 1;
61			new_group = 1;
62		}
63	}
64	return true;
65}
66
67bool bc_dump::visit(alu_node& n, bool enter) {
68	if (enter) {
69		sblog << " ";
70		dump_dw(id, 2);
71
72		if (new_group) {
73			sblog.print_w(++group_index, 5);
74			sblog << " ";
75		} else
76			sblog << "      ";
77
78		dump(n);
79		id += 2;
80
81		new_group = n.bc.last;
82	} else {
83		if (n.bc.last) {
84			alu_group_node *g =
85					static_cast<alu_group_node*>(n.get_alu_group_node());
86			assert(g);
87			for (unsigned k = 0; k < g->literals.size(); ++k) {
88				sblog << " ";
89				dump_dw(id, 1);
90				id += 1;
91				sblog << "\n";
92			}
93
94			id = (id + 1) & ~1u;
95		}
96	}
97
98	return false;
99}
100
101bool bc_dump::visit(fetch_node& n, bool enter) {
102	if (enter) {
103		sblog << " ";
104		dump_dw(id, 3);
105		dump(n);
106		id += 4;
107	}
108	return false;
109}
110
111static void fill_to(sb_ostringstream &s, int pos) {
112	int l = s.str().length();
113	if (l < pos)
114		s << std::string(pos-l, ' ');
115}
116
117void bc_dump::dump(cf_node& n) {
118	sb_ostringstream s;
119	s << n.bc.op_ptr->name;
120
121	if (n.bc.op_ptr->flags & CF_EXP) {
122		static const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
123
124		fill_to(s, 18);
125		s << " " << exp_type[n.bc.type] << " ";
126
127		if (n.bc.burst_count) {
128			sb_ostringstream s2;
129			s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count;
130			s.print_wl(s2.str(), 5);
131			s << " R" << n.bc.rw_gpr << "-" <<
132					n.bc.rw_gpr + n.bc.burst_count << ".";
133		} else {
134			s.print_wl(n.bc.array_base, 5);
135			s << " R" << n.bc.rw_gpr << ".";
136		}
137
138		for (int k = 0; k < 4; ++k)
139			s << chans[n.bc.sel[k]];
140
141	} else if (n.bc.op_ptr->flags & CF_MEM) {
142		static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
143				"WRITE_IND_ACK"};
144		fill_to(s, 18);
145		s << " " << exp_type[n.bc.type] << " ";
146		s.print_wl(n.bc.array_base, 5);
147		s << " R" << n.bc.rw_gpr << ".";
148		for (int k = 0; k < 4; ++k)
149			s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_');
150
151		if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) {
152			s << ", @R" << n.bc.index_gpr << ".xyz";
153		}
154		if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) {
155			s << ", @R" << n.bc.index_gpr << ".x";
156		}
157
158		s << "  ES:" << n.bc.elem_size;
159
160		if (n.bc.mark)
161			s << " MARK";
162
163	} else {
164
165		if (n.bc.op_ptr->flags & CF_CLAUSE) {
166			s << " " << n.bc.count+1;
167		}
168
169		s << " @" << (n.bc.addr << 1);
170
171		if (n.bc.op_ptr->flags & CF_ALU) {
172			static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"};
173
174			for (int k = 0; k < 4; ++k) {
175				bc_kcache &kc = n.bc.kc[k];
176				if (kc.mode) {
177					s << " KC" << k << "[CB" << kc.bank << ":" <<
178							(kc.addr << 4) << "-" <<
179							(((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]";
180				}
181			}
182		}
183
184		if (n.bc.cond)
185			s << " CND:" << n.bc.cond;
186
187		if (n.bc.pop_count)
188			s << " POP:" << n.bc.pop_count;
189
190		if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
191			s << " STREAM" << n.bc.count;
192	}
193
194	if (!n.bc.barrier)
195		s << "  NO_BARRIER";
196
197	if (n.bc.valid_pixel_mode)
198		s << "  VPM";
199
200	if (n.bc.whole_quad_mode)
201		s << "  WQM";
202
203	if (n.bc.end_of_program)
204		s << "  EOP";
205
206	sblog << s.str() << "\n";
207}
208
209
210static void print_sel(sb_ostream &s, int sel, int rel, int index_mode,
211                      int need_brackets) {
212	if (rel && index_mode >= 5 && sel < 128)
213		s << "G";
214	if (rel || need_brackets) {
215		s << "[";
216	}
217	s << sel;
218	if (rel) {
219		if (index_mode == 0 || index_mode == 6)
220			s << "+AR";
221		else if (index_mode == 4)
222			s << "+AL";
223	}
224	if (rel || need_brackets) {
225		s << "]";
226	}
227}
228
229static void print_dst(sb_ostream &s, bc_alu &alu)
230{
231	unsigned sel = alu.dst_gpr;
232	char reg_char = 'R';
233	if (sel >= 128 - 4) { // clause temporary gpr
234		sel -= 128 - 4;
235		reg_char = 'T';
236	}
237
238	if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) {
239		s << reg_char;
240		print_sel(s, sel, alu.dst_rel, alu.index_mode, 0);
241	} else {
242		s << "__";
243	}
244	s << ".";
245	s << chans[alu.dst_chan];
246}
247
248static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx)
249{
250	bc_alu_src *src = &alu.src[idx];
251	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
252
253	if (src->neg)
254		s <<"-";
255	if (src->abs)
256		s <<"|";
257
258	if (sel < 128 - 4) {
259		s << "R";
260	} else if (sel < 128) {
261		s << "T";
262		sel -= 128 - 4;
263	} else if (sel < 160) {
264		s << "KC0";
265		need_brackets = 1;
266		sel -= 128;
267	} else if (sel < 192) {
268		s << "KC1";
269		need_brackets = 1;
270		sel -= 160;
271	} else if (sel >= 448) {
272		s << "Param";
273		sel -= 448;
274	} else if (sel >= 288) {
275		s << "KC3";
276		need_brackets = 1;
277		sel -= 288;
278	} else if (sel >= 256) {
279		s << "KC2";
280		need_brackets = 1;
281		sel -= 256;
282	} else {
283		need_sel = 0;
284		need_chan = 0;
285		switch (sel) {
286		case ALU_SRC_LDS_OQ_A:
287			s << "LDS_OQ_A";
288			need_chan = 1;
289			break;
290		case ALU_SRC_LDS_OQ_B:
291			s << "LDS_OQ_B";
292			need_chan = 1;
293			break;
294		case ALU_SRC_LDS_OQ_A_POP:
295			s << "LDS_OQ_A_POP";
296			need_chan = 1;
297			break;
298		case ALU_SRC_LDS_OQ_B_POP:
299			s << "LDS_OQ_B_POP";
300			need_chan = 1;
301			break;
302		case ALU_SRC_LDS_DIRECT_A:
303			s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]";
304			break;
305		case ALU_SRC_LDS_DIRECT_B:
306			s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]";
307			break;
308		case ALU_SRC_PS:
309			s << "PS";
310			break;
311		case ALU_SRC_PV:
312			s << "PV";
313			need_chan = 1;
314			break;
315		case ALU_SRC_LITERAL:
316			s << "[0x";
317			s.print_zw_hex(src->value.u, 8);
318			s << " " << src->value.f << "]";
319			need_chan = 1;
320			break;
321		case ALU_SRC_0_5:
322			s << "0.5";
323			break;
324		case ALU_SRC_M_1_INT:
325			s << "-1";
326			break;
327		case ALU_SRC_1_INT:
328			s << "1";
329			break;
330		case ALU_SRC_1:
331			s << "1.0";
332			break;
333		case ALU_SRC_0:
334			s << "0";
335			break;
336		case ALU_SRC_TIME_LO:
337			s << "TIME_LO";
338			break;
339		case ALU_SRC_TIME_HI:
340			s << "TIME_HI";
341			break;
342		case ALU_SRC_MASK_LO:
343			s << "MASK_LO";
344			break;
345		case ALU_SRC_MASK_HI:
346			s << "MASK_HI";
347			break;
348		case ALU_SRC_HW_WAVE_ID:
349			s << "HW_WAVE_ID";
350			break;
351		case ALU_SRC_SIMD_ID:
352			s << "SIMD_ID";
353			break;
354		case ALU_SRC_SE_ID:
355			s << "SE_ID";
356			break;
357		default:
358			s << "??IMM_" <<  sel;
359			break;
360		}
361	}
362
363	if (need_sel)
364		print_sel(s, sel, src->rel, alu.index_mode, need_brackets);
365
366	if (need_chan) {
367		s << "." << chans[src->chan];
368	}
369
370	if (src->abs)
371		s << "|";
372}
373void bc_dump::dump(alu_node& n) {
374	sb_ostringstream s;
375	static const char *omod_str[] = {"","*2","*4","/2"};
376	static const char *slots = "xyzwt";
377
378	s << (n.bc.update_exec_mask ? "M" : " ");
379	s << (n.bc.update_pred ? "P" : " ");
380	s << " ";
381	s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " ");
382	s << " ";
383
384	s << slots[n.bc.slot] << ": ";
385
386	s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : "");
387	fill_to(s, 26);
388	s << " ";
389
390	print_dst(s, n.bc);
391	for (int k = 0; k < n.bc.op_ptr->src_count; ++k) {
392		s << (k ? ", " : ",  ");
393		print_src(s, n.bc, k);
394	}
395
396	if (n.bc.bank_swizzle) {
397		fill_to(s, 55);
398		if (n.bc.slot == SLOT_TRANS)
399			s << "  " << scl_bs[n.bc.bank_swizzle];
400		else
401			s << "  " << vec_bs[n.bc.bank_swizzle];
402	}
403
404	if (ctx.is_cayman()) {
405		if (n.bc.op == ALU_OP1_MOVA_INT) {
406			static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
407				" Unknown MOVA_INT dest" };
408			s << mova_str[std::min(n.bc.dst_gpr, 4u)];  // CM_V_SQ_MOVA_DST_AR_*
409		}
410	}
411
412	if (n.bc.lds_idx_offset) {
413		s << " IDX_OFFSET:" << n.bc.lds_idx_offset;
414	}
415
416	sblog << s.str() << "\n";
417}
418
419int bc_dump::init() {
420	sb_ostringstream s;
421	s << "===== SHADER #" << sh.id;
422
423	if (sh.optimized)
424		s << " OPT";
425
426	s << " ";
427
428	std::string target = std::string(" ") +
429			sh.get_full_target_name() + " =====";
430
431	while (s.str().length() + target.length() < 80)
432		s << "=";
433
434	s << target;
435
436	sblog << "\n" << s.str() << "\n";
437
438	s.clear();
439
440	if (bc_data) {
441		s << "===== " << ndw << " dw ===== " << sh.ngpr
442				<< " gprs ===== " << sh.nstack << " stack ";
443	}
444
445	while (s.str().length() < 80)
446		s << "=";
447
448	sblog << s.str() << "\n";
449
450	return 0;
451}
452
453int bc_dump::done() {
454	sb_ostringstream s;
455	s << "===== SHADER_END ";
456
457	while (s.str().length() < 80)
458		s << "=";
459
460	sblog << s.str() << "\n\n";
461
462	return 0;
463}
464
465bc_dump::bc_dump(shader& s, bytecode* bc)  :
466	vpass(s), bc_data(), ndw(), id(),
467	new_group(), group_index() {
468
469	if (bc) {
470		bc_data = bc->data();
471		ndw = bc->ndw();
472	}
473}
474
475void bc_dump::dump(fetch_node& n) {
476	sb_ostringstream s;
477	static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
478	unsigned gds = n.bc.op_ptr->flags & FF_GDS;
479	bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
480		n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
481	bool show_dst = !gds || (gds && gds_has_ret);
482
483	s << n.bc.op_ptr->name;
484	fill_to(s, 20);
485
486	if (show_dst) {
487		s << "R";
488		print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
489		s << ".";
490		for (int k = 0; k < 4; ++k)
491			s << chans[n.bc.dst_sel[k]];
492		s << ", ";
493	}
494
495	s << "R";
496	print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
497	s << ".";
498
499	unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
500	unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
501
502	for (unsigned k = 0; k < num_src_comp; ++k)
503		s << chans[n.bc.src_sel[k]];
504
505	if (vtx && n.bc.offset[0]) {
506		s << " + " << n.bc.offset[0] << "b ";
507	}
508
509	if (!gds)
510		s << ",   RID:" << n.bc.resource_id;
511
512	if (gds) {
513		s << " UAV:" << n.bc.uav_id;
514		if (n.bc.uav_index_mode)
515			s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
516		if (n.bc.bcast_first_req)
517			s << " BFQ";
518		if (n.bc.alloc_consume)
519			s << " AC";
520	} else if (vtx) {
521		s << "  " << fetch_type[n.bc.fetch_type];
522		if (!ctx.is_cayman() && n.bc.mega_fetch_count)
523			s << " MFC:" << n.bc.mega_fetch_count;
524		if (n.bc.fetch_whole_quad)
525			s << " FWQ";
526		if (ctx.is_egcm() && n.bc.resource_index_mode)
527			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
528		if (ctx.is_egcm() && n.bc.sampler_index_mode)
529			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
530
531		s << " UCF:" << n.bc.use_const_fields
532				<< " FMT(DTA:" << n.bc.data_format
533				<< " NUM:" << n.bc.num_format_all
534				<< " COMP:" << n.bc.format_comp_all
535				<< " MODE:" << n.bc.srf_mode_all << ")";
536	} else {
537		s << ", SID:" << n.bc.sampler_id;
538		if (n.bc.lod_bias)
539			s << " LB:" << n.bc.lod_bias;
540		s << " CT:";
541		for (unsigned k = 0; k < 4; ++k)
542			s << (n.bc.coord_type[k] ? "N" : "U");
543		for (unsigned k = 0; k < 3; ++k)
544			if (n.bc.offset[k])
545				s << " O" << chans[k] << ":" << n.bc.offset[k];
546		if (ctx.is_egcm() && n.bc.resource_index_mode)
547			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
548		if (ctx.is_egcm() && n.bc.sampler_index_mode)
549			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
550	}
551
552	if (n.bc.op_ptr->flags & FF_MEM) {
553		s << ", ELEM_SIZE:" << n.bc.elem_size;
554		if (n.bc.uncached)
555			s << ", UNCACHED";
556		if (n.bc.indexed)
557			s << ", INDEXED";
558		if (n.bc.burst_count)
559			s << ", BURST_COUNT:" << n.bc.burst_count;
560		s << ", ARRAY_BASE:" << n.bc.array_base;
561		s << ", ARRAY_SIZE:" << n.bc.array_size;
562	}
563
564	sblog << s.str() << "\n";
565}
566
567void bc_dump::dump_dw(unsigned dw_id, unsigned count) {
568	if (!bc_data)
569		return;
570
571	assert(dw_id + count <= ndw);
572
573	sblog.print_zw(dw_id, 4);
574	sblog << "  ";
575	while (count--) {
576		sblog.print_zw_hex(bc_data[dw_id++], 8);
577		sblog << " ";
578	}
579}
580
581} // namespace r600_sb
582