1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#ifndef SB_BC_H_
28#define SB_BC_H_
29
30#include <stdint.h>
31#include "r600_isa.h"
32
33#include <cstdio>
34#include <string>
35#include <vector>
36#include <stack>
37
38struct r600_bytecode;
39struct r600_shader;
40
41namespace r600_sb {
42
43class hw_encoding_format;
44class node;
45class alu_node;
46class cf_node;
47class fetch_node;
48class alu_group_node;
49class region_node;
50class shader;
51class value;
52
53class sb_ostream {
54public:
55	sb_ostream() {}
56
57	virtual void write(const char *s) = 0;
58
59	sb_ostream& operator <<(const char *s) {
60		write(s);
61		return *this;
62	}
63
64	sb_ostream& operator <<(const std::string& s) {
65		return *this << s.c_str();
66	}
67
68	sb_ostream& operator <<(void *p) {
69		char b[32];
70		sprintf(b, "%p", p);
71		return *this << b;
72	}
73
74	sb_ostream& operator <<(char c) {
75		char b[2];
76		sprintf(b, "%c", c);
77		return *this << b;
78	}
79
80	sb_ostream& operator <<(int n) {
81		char b[32];
82		sprintf(b, "%d", n);
83		return *this << b;
84	}
85
86	sb_ostream& operator <<(unsigned n) {
87		char b[32];
88		sprintf(b, "%u", n);
89		return *this << b;
90	}
91
92	sb_ostream& operator <<(double d) {
93		char b[32];
94		snprintf(b, 32, "%g", d);
95		return *this << b;
96	}
97
98	// print as field of specified width, right aligned
99	void print_w(int n, int width) {
100		char b[256],f[8];
101		sprintf(f, "%%%dd", width);
102		snprintf(b, 256, f, n);
103		write(b);
104	}
105
106	// print as field of specified width, left aligned
107	void print_wl(int n, int width) {
108		char b[256],f[8];
109		sprintf(f, "%%-%dd", width);
110		snprintf(b, 256, f, n);
111		write(b);
112	}
113
114	// print as field of specified width, left aligned
115	void print_wl(const std::string &s, int width) {
116		write(s.c_str());
117		int l = s.length();
118		while (l++ < width) {
119			write(" ");
120		}
121	}
122
123	// print int as field of specified width, right aligned, zero-padded
124	void print_zw(int n, int width) {
125		char b[256],f[8];
126		sprintf(f, "%%0%dd", width);
127		snprintf(b, 256, f, n);
128		write(b);
129	}
130
131	// print int as field of specified width, right aligned, zero-padded, hex
132	void print_zw_hex(int n, int width) {
133		char b[256],f[8];
134		sprintf(f, "%%0%dx", width);
135		snprintf(b, 256, f, n);
136		write(b);
137	}
138};
139
140class sb_ostringstream : public sb_ostream {
141	std::string data;
142public:
143	sb_ostringstream() : data() {}
144
145	virtual void write(const char *s) {
146		data += s;
147	}
148
149	void clear() { data.clear(); }
150
151	const char* c_str() { return data.c_str(); }
152	std::string& str() { return data; }
153};
154
155class sb_log : public sb_ostream {
156	FILE *o;
157public:
158	sb_log() : o(stderr) {}
159
160	virtual void write(const char *s) {
161		fputs(s, o);
162	}
163};
164
165extern sb_log sblog;
166
167enum shader_target
168{
169	TARGET_UNKNOWN,
170	TARGET_VS,
171	TARGET_ES,
172	TARGET_PS,
173	TARGET_GS,
174	TARGET_GS_COPY,
175	TARGET_COMPUTE,
176	TARGET_FETCH,
177	TARGET_HS,
178	TARGET_LS,
179
180	TARGET_NUM
181};
182
183enum sb_hw_class_bits
184{
185	HB_R6	= (1<<0),
186	HB_R7	= (1<<1),
187	HB_EG	= (1<<2),
188	HB_CM	= (1<<3),
189
190	HB_R6R7 = (HB_R6 | HB_R7),
191	HB_EGCM = (HB_EG | HB_CM),
192	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194
195	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196};
197
198enum sb_hw_chip
199{
200	HW_CHIP_UNKNOWN,
201	HW_CHIP_R600,
202	HW_CHIP_RV610,
203	HW_CHIP_RV630,
204	HW_CHIP_RV670,
205	HW_CHIP_RV620,
206	HW_CHIP_RV635,
207	HW_CHIP_RS780,
208	HW_CHIP_RS880,
209	HW_CHIP_RV770,
210	HW_CHIP_RV730,
211	HW_CHIP_RV710,
212	HW_CHIP_RV740,
213	HW_CHIP_CEDAR,
214	HW_CHIP_REDWOOD,
215	HW_CHIP_JUNIPER,
216	HW_CHIP_CYPRESS,
217	HW_CHIP_HEMLOCK,
218	HW_CHIP_PALM,
219	HW_CHIP_SUMO,
220	HW_CHIP_SUMO2,
221	HW_CHIP_BARTS,
222	HW_CHIP_TURKS,
223	HW_CHIP_CAICOS,
224	HW_CHIP_CAYMAN,
225	HW_CHIP_ARUBA
226};
227
228enum sb_hw_class
229{
230	HW_CLASS_UNKNOWN,
231	HW_CLASS_R600,
232	HW_CLASS_R700,
233	HW_CLASS_EVERGREEN,
234	HW_CLASS_CAYMAN
235};
236
237enum alu_slots {
238	SLOT_X = 0,
239	SLOT_Y = 1,
240	SLOT_Z = 2,
241	SLOT_W = 3,
242	SLOT_TRANS = 4
243};
244
245enum misc_consts {
246	MAX_ALU_LITERALS = 4,
247	MAX_ALU_SLOTS = 128,
248	MAX_GPR = 128,
249	MAX_CHAN = 4
250
251};
252
253enum alu_src_sel {
254
255	ALU_SRC_LDS_OQ_A = 219,
256	ALU_SRC_LDS_OQ_B = 220,
257	ALU_SRC_LDS_OQ_A_POP = 221,
258	ALU_SRC_LDS_OQ_B_POP = 222,
259	ALU_SRC_LDS_DIRECT_A = 223,
260	ALU_SRC_LDS_DIRECT_B = 224,
261	ALU_SRC_TIME_HI = 227,
262	ALU_SRC_TIME_LO = 228,
263	ALU_SRC_MASK_HI = 229,
264	ALU_SRC_MASK_LO = 230,
265	ALU_SRC_HW_WAVE_ID = 231,
266	ALU_SRC_SIMD_ID = 232,
267	ALU_SRC_SE_ID = 233,
268	ALU_SRC_HW_THREADGRP_ID = 234,
269	ALU_SRC_WAVE_ID_IN_GRP = 235,
270	ALU_SRC_NUM_THREADGRP_WAVES = 236,
271	ALU_SRC_HW_ALU_ODD = 237,
272	ALU_SRC_LOOP_IDX = 238,
273	ALU_SRC_PARAM_BASE_ADDR = 240,
274	ALU_SRC_NEW_PRIM_MASK = 241,
275	ALU_SRC_PRIM_MASK_HI = 242,
276	ALU_SRC_PRIM_MASK_LO = 243,
277	ALU_SRC_1_DBL_L = 244,
278	ALU_SRC_1_DBL_M = 245,
279	ALU_SRC_0_5_DBL_L = 246,
280	ALU_SRC_0_5_DBL_M = 247,
281	ALU_SRC_0 = 248,
282	ALU_SRC_1 = 249,
283	ALU_SRC_1_INT = 250,
284	ALU_SRC_M_1_INT = 251,
285	ALU_SRC_0_5 = 252,
286	ALU_SRC_LITERAL = 253,
287	ALU_SRC_PV = 254,
288	ALU_SRC_PS = 255,
289
290	ALU_SRC_PARAM_OFFSET = 448
291};
292
293enum alu_predicate_select
294{
295	PRED_SEL_OFF	= 0,
296//	RESERVED		= 1,
297	PRED_SEL_0		= 2,
298	PRED_SEL_1		= 3
299};
300
301
302enum alu_omod {
303	OMOD_OFF  = 0,
304	OMOD_M2   = 1,
305	OMOD_M4   = 2,
306	OMOD_D2   = 3
307};
308
309enum alu_index_mode {
310	INDEX_AR_X        = 0,
311	INDEX_AR_Y_R600   = 1,
312	INDEX_AR_Z_R600   = 2,
313	INDEX_AR_W_R600   = 3,
314
315	INDEX_LOOP        = 4,
316	INDEX_GLOBAL      = 5,
317	INDEX_GLOBAL_AR_X = 6
318};
319
320enum alu_cayman_mova_dst {
321	CM_MOVADST_AR_X,
322	CM_MOVADST_PC,
323	CM_MOVADST_IDX0,
324	CM_MOVADST_IDX1,
325	CM_MOVADST_CG0,		// clause-global byte 0
326	CM_MOVADST_CG1,
327	CM_MOVADST_CG2,
328	CM_MOVADST_CG3
329};
330
331enum alu_cayman_exec_mask_op {
332	CM_EMO_DEACTIVATE,
333	CM_EMO_BREAK,
334	CM_EMO_CONTINUE,
335	CM_EMO_KILL
336};
337
338
339enum cf_exp_type {
340	EXP_PIXEL,
341	EXP_POS,
342	EXP_PARAM,
343
344	EXP_TYPE_COUNT
345};
346
347enum cf_mem_type {
348	MEM_WRITE,
349	MEM_WRITE_IND,
350	MEM_WRITE_ACK,
351	MEM_WRITE_IND_ACK
352};
353
354
355enum alu_kcache_mode {
356	KC_LOCK_NONE,
357	KC_LOCK_1,
358	KC_LOCK_2,
359	KC_LOCK_LOOP
360};
361
362enum alu_kcache_index_mode {
363	KC_INDEX_NONE,
364	KC_INDEX_0,
365	KC_INDEX_1,
366	KC_INDEX_INVALID
367};
368
369enum chan_select {
370	SEL_X	= 0,
371	SEL_Y	= 1,
372	SEL_Z	= 2,
373	SEL_W	= 3,
374	SEL_0	= 4,
375	SEL_1	= 5,
376//	RESERVED = 6,
377	SEL_MASK = 7
378};
379
380enum bank_swizzle {
381	VEC_012 = 0,
382	VEC_021 = 1,
383	VEC_120 = 2,
384	VEC_102 = 3,
385	VEC_201 = 4,
386	VEC_210 = 5,
387
388	VEC_NUM = 6,
389
390	SCL_210 = 0,
391	SCL_122 = 1,
392	SCL_212 = 2,
393	SCL_221 = 3,
394
395	SCL_NUM = 4
396
397};
398
399enum sched_queue_id {
400	SQ_CF,
401	SQ_ALU,
402	SQ_TEX,
403	SQ_VTX,
404	SQ_GDS,
405
406	SQ_NUM
407};
408
409struct literal {
410	union {
411		int32_t i;
412		uint32_t u;
413		float f;
414	};
415
416	literal(int32_t i = 0) : i(i) {}
417	literal(uint32_t u) : u(u) {}
418	literal(float f) : f(f) {}
419	literal(double f) : f(f) {}
420	operator uint32_t() const { return u; }
421	bool operator ==(literal l) { return u == l.u; }
422	bool operator ==(int v_int) { return i == v_int; }
423	bool operator ==(unsigned v_uns) { return u == v_uns; }
424};
425
426struct bc_kcache {
427	unsigned mode;
428	unsigned bank;
429	unsigned addr;
430	unsigned index_mode;
431} ;
432
433// TODO optimize bc structures
434
435struct bc_cf {
436
437	bc_kcache kc[4];
438
439	unsigned id;
440
441
442	const cf_op_info * op_ptr;
443	unsigned op;
444
445	unsigned addr:32;
446
447	unsigned alt_const:1;
448	unsigned uses_waterfall:1;
449
450	unsigned barrier:1;
451	unsigned count:7;
452	unsigned pop_count:3;
453	unsigned call_count:6;
454	unsigned whole_quad_mode:1;
455	unsigned valid_pixel_mode:1;
456
457	unsigned jumptable_sel:3;
458	unsigned cf_const:5;
459	unsigned cond:2;
460	unsigned end_of_program:1;
461
462	unsigned array_base:13;
463	unsigned elem_size:2;
464	unsigned index_gpr:7;
465	unsigned rw_gpr:7;
466	unsigned rw_rel:1;
467	unsigned type:2;
468
469	unsigned burst_count:4;
470	unsigned mark:1;
471	unsigned sel[4];
472
473	unsigned array_size:12;
474	unsigned comp_mask:4;
475
476	unsigned rat_id:4;
477	unsigned rat_inst:6;
478	unsigned rat_index_mode:2;
479
480	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
481
482	bool is_alu_extended() {
483		assert(op_ptr->flags & CF_ALU);
484		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
485			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
486			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
487	}
488
489};
490
491struct bc_alu_src {
492	unsigned sel:9;
493	unsigned chan:2;
494	unsigned neg:1;
495	unsigned abs:1;
496	unsigned rel:1;
497	literal value;
498};
499
500struct bc_alu {
501	const alu_op_info * op_ptr;
502	unsigned op;
503
504	bc_alu_src src[3];
505
506	unsigned dst_gpr:7;
507	unsigned dst_chan:2;
508	unsigned dst_rel:1;
509	unsigned clamp:1;
510	unsigned omod:2;
511	unsigned bank_swizzle:3;
512
513	unsigned index_mode:3;
514	unsigned last:1;
515	unsigned pred_sel:2;
516
517	unsigned fog_merge:1;
518	unsigned write_mask:1;
519	unsigned update_exec_mask:1;
520	unsigned update_pred:1;
521
522	unsigned slot:3;
523
524	unsigned lds_idx_offset:6;
525
526	alu_op_flags slot_flags;
527
528	void set_op(unsigned op) {
529		this->op = op;
530		op_ptr = r600_isa_alu(op);
531	}
532};
533
534struct bc_fetch {
535	const fetch_op_info * op_ptr;
536	unsigned op;
537
538	unsigned bc_frac_mode:1;
539	unsigned fetch_whole_quad:1;
540	unsigned resource_id:8;
541
542	unsigned src_gpr:7;
543	unsigned src_rel:1;
544	unsigned src_rel_global:1; /* for GDS ops */
545	unsigned src_sel[4];
546
547	unsigned dst_gpr:7;
548	unsigned dst_rel:1;
549	unsigned dst_rel_global:1; /* for GDS ops */
550	unsigned dst_sel[4];
551
552	unsigned alt_const:1;
553
554	unsigned inst_mod:2;
555	unsigned resource_index_mode:2;
556	unsigned sampler_index_mode:2;
557
558	unsigned coord_type[4];
559	unsigned lod_bias:7;
560
561	unsigned offset[3];
562
563	unsigned sampler_id:5;
564
565
566	unsigned fetch_type:2;
567	unsigned mega_fetch_count:6;
568	unsigned coalesced_read:1;
569	unsigned structured_read:2;
570	unsigned lds_req:1;
571
572	unsigned data_format:6;
573	unsigned format_comp_all:1;
574	unsigned num_format_all:2;
575	unsigned semantic_id:8;
576	unsigned srf_mode_all:1;
577	unsigned use_const_fields:1;
578
579	unsigned const_buf_no_stride:1;
580	unsigned endian_swap:2;
581	unsigned mega_fetch:1;
582
583	unsigned src2_gpr:7; /* for GDS */
584	unsigned alloc_consume:1;
585	unsigned uav_id:4;
586	unsigned uav_index_mode:2;
587	unsigned bcast_first_req:1;
588
589	/* for MEM ops */
590	unsigned elem_size:2;
591	unsigned uncached:1;
592	unsigned indexed:1;
593	unsigned burst_count:4;
594	unsigned array_base:13;
595	unsigned array_size:12;
596
597	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
598};
599
600struct shader_stats {
601	unsigned	ndw;
602	unsigned	ngpr;
603	unsigned	nstack;
604
605	unsigned	cf; // clause instructions not included
606	unsigned	alu;
607	unsigned	alu_clauses;
608	unsigned	fetch_clauses;
609	unsigned	fetch;
610	unsigned	alu_groups;
611
612	unsigned	shaders;		// number of shaders (for accumulated stats)
613
614	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
615			fetch_clauses(), fetch(), alu_groups(), shaders() {}
616
617	void collect(node *n);
618	void accumulate(shader_stats &s);
619	void dump();
620	void dump_diff(shader_stats &s);
621};
622
623class sb_context {
624
625public:
626
627	shader_stats src_stats, opt_stats;
628
629	r600_isa *isa;
630
631	sb_hw_chip hw_chip;
632	sb_hw_class hw_class;
633
634	unsigned alu_temp_gprs;
635	unsigned max_fetch;
636	bool has_trans;
637	unsigned vtx_src_num;
638	unsigned num_slots;
639	bool uses_mova_gpr;
640
641	bool r6xx_gpr_index_workaround;
642
643	bool stack_workaround_8xx;
644	bool stack_workaround_9xx;
645
646	unsigned wavefront_size;
647	unsigned stack_entry_size;
648
649	static unsigned dump_pass;
650	static unsigned dump_stat;
651
652	static unsigned dry_run;
653	static unsigned no_fallback;
654	static unsigned safe_math;
655
656	static unsigned dskip_start;
657	static unsigned dskip_end;
658	static unsigned dskip_mode;
659
660	sb_context() : src_stats(), opt_stats(), isa(0),
661			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
662
663	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
664
665	bool is_r600() {return hw_class == HW_CLASS_R600;}
666	bool is_r700() {return hw_class == HW_CLASS_R700;}
667	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
668	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
669	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
670
671	bool needs_8xx_stack_workaround() {
672		if (!is_evergreen())
673			return false;
674
675		switch (hw_chip) {
676		case HW_CHIP_HEMLOCK:
677		case HW_CHIP_CYPRESS:
678		case HW_CHIP_JUNIPER:
679			return false;
680		default:
681			return true;
682		}
683	}
684
685	bool needs_9xx_stack_workaround() {
686		return is_cayman();
687	}
688
689	sb_hw_class_bits hw_class_bit() {
690		switch (hw_class) {
691		case HW_CLASS_R600:return HB_R6;
692		case HW_CLASS_R700:return HB_R7;
693		case HW_CLASS_EVERGREEN:return HB_EG;
694		case HW_CLASS_CAYMAN:return HB_CM;
695		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
696
697		}
698	}
699
700	unsigned cf_opcode(unsigned op) {
701		return r600_isa_cf_opcode(isa->hw_class, op);
702	}
703
704	unsigned alu_opcode(unsigned op) {
705		return r600_isa_alu_opcode(isa->hw_class, op);
706	}
707
708	unsigned alu_slots(unsigned op) {
709		return r600_isa_alu_slots(isa->hw_class, op);
710	}
711
712	unsigned alu_slots(const alu_op_info * op_ptr) {
713		return op_ptr->slots[isa->hw_class];
714	}
715
716	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
717		unsigned mask = 0;
718		unsigned slot_flags = alu_slots(op_ptr);
719		if (slot_flags & AF_V)
720			mask = 0x0F;
721		if (!is_cayman() && (slot_flags & AF_S))
722			mask |= 0x10;
723		/* Force LDS_IDX ops into SLOT_X */
724		if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
725			mask = 0x01;
726		return mask;
727	}
728
729	unsigned fetch_opcode(unsigned op) {
730		return r600_isa_fetch_opcode(isa->hw_class, op);
731	}
732
733	bool is_kcache_sel(unsigned sel) {
734		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
735	}
736
737	bool is_lds_oq(unsigned sel) {
738		return (sel >= 0xdb && sel <= 0xde);
739	}
740
741	const char * get_hw_class_name();
742	const char * get_hw_chip_name();
743
744};
745
746#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
747#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
748
749class bc_decoder {
750
751	sb_context &ctx;
752
753	uint32_t* dw;
754	unsigned ndw;
755
756public:
757
758	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
759		: ctx(sctx), dw(data), ndw(size) {}
760
761	int decode_cf(unsigned &i, bc_cf &bc);
762	int decode_alu(unsigned &i, bc_alu &bc);
763	int decode_fetch(unsigned &i, bc_fetch &bc);
764
765private:
766	int decode_cf_alu(unsigned &i, bc_cf &bc);
767	int decode_cf_exp(unsigned &i, bc_cf &bc);
768	int decode_cf_mem(unsigned &i, bc_cf &bc);
769
770	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
771	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
772	int decode_fetch_mem(unsigned &i, bc_fetch &bc);
773};
774
775// bytecode format definition
776
777class hw_encoding_format {
778	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
779	hw_encoding_format();
780protected:
781	uint32_t value;
782public:
783	hw_encoding_format(sb_hw_class_bits hw)
784		: hw_target(hw), value(0) {}
785	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
786		: hw_target(hw), value(v) {}
787	uint32_t get_value(sb_hw_class_bits hw) const {
788		assert((hw & hw_target) == hw);
789		return value;
790	}
791};
792
793#define BC_FORMAT_BEGIN_HW(fmt, hwset) \
794class fmt##_##hwset : public hw_encoding_format {\
795	typedef fmt##_##hwset thistype; \
796public: \
797	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
798	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
799
800#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
801
802#define BC_FORMAT_END(fmt) };
803
804// bytecode format field definition
805
806#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
807	thistype & name(unsigned v) { \
808		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
809		return *this; \
810	} \
811	unsigned get_##name() const { \
812		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
813	}
814
815#define BC_RSRVD(fmt, last_bit, first_bit)
816
817// CLAMP macro defined elsewhere interferes with bytecode field name
818#undef CLAMP
819#include "sb_bc_fmt_def.inc"
820
821#undef BC_FORMAT_BEGIN
822#undef BC_FORMAT_END
823#undef BC_FIELD
824#undef BC_RSRVD
825
826class bc_parser {
827	sb_context & ctx;
828
829	bc_decoder *dec;
830
831	r600_bytecode *bc;
832	r600_shader *pshader;
833
834	uint32_t *dw;
835	unsigned bc_ndw;
836
837	unsigned max_cf;
838
839	shader *sh;
840
841	int error;
842
843	alu_node *slots[2][5];
844	unsigned cgroup;
845
846	typedef std::vector<cf_node*> id_cf_map;
847	id_cf_map cf_map;
848
849	typedef std::stack<region_node*> region_stack;
850	region_stack loop_stack;
851
852	bool gpr_reladdr;
853
854	// Note: currently relies on input emitting SET_CF in same basic block as uses
855	value *cf_index_value[2];
856	alu_node *mova;
857public:
858
859	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
860		ctx(sctx), dec(), bc(bc), pshader(pshader),
861		dw(), bc_ndw(), max_cf(),
862		sh(), error(), slots(), cgroup(),
863		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
864
865	int decode();
866	int prepare();
867
868	shader* get_shader() { assert(!error); return sh; }
869
870private:
871
872	int decode_shader();
873
874	int parse_decls();
875
876	int decode_cf(unsigned &i, bool &eop);
877
878	int decode_alu_clause(cf_node *cf);
879	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
880
881	int decode_fetch_clause(cf_node *cf);
882
883	int prepare_ir();
884	int prepare_alu_clause(cf_node *cf);
885	int prepare_alu_group(cf_node* cf, alu_group_node *g);
886	int prepare_fetch_clause(cf_node *cf);
887
888	int prepare_loop(cf_node *c);
889	int prepare_if(cf_node *c);
890
891	void save_set_cf_index(value *val, unsigned idx);
892	value *get_cf_index_value(unsigned idx);
893	void save_mova(alu_node *mova);
894	alu_node *get_mova();
895};
896
897
898
899
900class bytecode {
901	typedef std::vector<uint32_t> bc_vector;
902	sb_hw_class_bits hw_class_bit;
903
904	bc_vector bc;
905
906	unsigned pos;
907
908public:
909
910	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
911		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
912
913	unsigned ndw() { return bc.size(); }
914
915	void write_data(uint32_t* dst) {
916		std::copy(bc.begin(), bc.end(), dst);
917	}
918
919	void align(unsigned a) {
920		unsigned size = bc.size();
921		size = (size + a - 1) & ~(a-1);
922		bc.resize(size);
923	}
924
925	void set_size(unsigned sz) {
926		assert(sz >= bc.size());
927		bc.resize(sz);
928	}
929
930	void seek(unsigned p) {
931		if (p != pos) {
932			if (p > bc.size()) {
933				bc.resize(p);
934			}
935			pos = p;
936		}
937	}
938
939	unsigned get_pos() { return pos; }
940	uint32_t *data() { return &bc[0]; }
941
942	bytecode & operator <<(uint32_t v) {
943		if (pos == ndw()) {
944			bc.push_back(v);
945		} else
946			bc.at(pos) = v;
947		++pos;
948		return *this;
949	}
950
951	bytecode & operator <<(const hw_encoding_format &e) {
952		*this << e.get_value(hw_class_bit);
953		return *this;
954	}
955
956	bytecode & operator <<(const bytecode &b) {
957		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
958		return *this;
959	}
960
961	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
962};
963
964
965class bc_builder {
966	shader &sh;
967	sb_context &ctx;
968	bytecode bb;
969	int error;
970
971public:
972
973	bc_builder(shader &s);
974	int build();
975	bytecode& get_bytecode() { assert(!error); return bb; }
976
977private:
978
979	int build_cf(cf_node *n);
980
981	int build_cf_alu(cf_node *n);
982	int build_cf_mem(cf_node *n);
983	int build_cf_exp(cf_node *n);
984
985	int build_alu_clause(cf_node *n);
986	int build_alu_group(alu_group_node *n);
987	int build_alu(alu_node *n);
988
989	int build_fetch_clause(cf_node *n);
990	int build_fetch_tex(fetch_node *n);
991	int build_fetch_vtx(fetch_node *n);
992	int build_fetch_gds(fetch_node *n);
993	int build_fetch_mem(fetch_node* n);
994};
995
996} // namespace r600_sb
997
998#endif /* SB_BC_H_ */
999