1/*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28/**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 */
38
39#include "r300_fragprog.h"
40
41#include "r300_reg.h"
42
43#include "radeon_program_pair.h"
44#include "r300_fragprog_swizzle.h"
45
46#include "util/compiler.h"
47
48
49struct r300_emit_state {
50	struct r300_fragment_program_compiler * compiler;
51
52	unsigned current_node : 2;
53	unsigned node_first_tex : 8;
54	unsigned node_first_alu : 8;
55	uint32_t node_flags;
56};
57
58#define PROG_CODE \
59	struct r300_fragment_program_compiler *c = emit->compiler; \
60	struct r300_fragment_program_code *code = &c->code->code.r300
61
62#define error(fmt, args...) do {			\
63		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
64			__FILE__, __FUNCTION__, ##args);	\
65	} while(0)
66
67static unsigned int get_msbs_alu(unsigned int bits)
68{
69	return (bits >> 6) & 0x7;
70}
71
72/**
73 * @param lsbs The number of least significant bits
74 */
75static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
76{
77	return (bits >> lsbs) & 0x15;
78}
79
80#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81
82/**
83 * Mark a temporary register as used.
84 */
85static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
86{
87	if (index > code->pixsize)
88		code->pixsize = index;
89}
90
91static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
92{
93	if (!src.Used)
94		return 0;
95
96	if (src.File == RC_FILE_CONSTANT) {
97		return src.Index | (1 << 5);
98	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
99		use_temporary(code, src.Index);
100		return src.Index & 0x1f;
101	}
102
103	return 0;
104}
105
106
107static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
108{
109	switch(opcode) {
110	case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
111	case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
112	case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
113	case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
114	case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
115	default:
116		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
117		FALLTHROUGH;
118	case RC_OPCODE_NOP:
119		FALLTHROUGH;
120	case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
121	case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
122	case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
123	case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
124	}
125}
126
127static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
128{
129	switch(opcode) {
130	case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
131	case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
132	case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
133	case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
134	case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
135	case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
136	case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
137	default:
138		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
139		FALLTHROUGH;
140	case RC_OPCODE_NOP:
141		FALLTHROUGH;
142	case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
143	case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
144	case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
145	case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
146	case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
147	}
148}
149
150/**
151 * Emit one paired ALU instruction.
152 */
153static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
154{
155	int ip;
156	int j;
157	PROG_CODE;
158
159	if (code->alu.length >= c->Base.max_alu_insts) {
160		error("Too many ALU instructions");
161		return 0;
162	}
163
164	ip = code->alu.length++;
165
166	code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
167	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
168
169	for(j = 0; j < 3; ++j) {
170		/* Set the RGB address */
171		unsigned int src = use_source(code, inst->RGB.Src[j]);
172		unsigned int arg;
173		if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
174			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
175
176		code->alu.inst[ip].rgb_addr |= src << (6*j);
177
178		/* Set the Alpha address */
179		src = use_source(code, inst->Alpha.Src[j]);
180		if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
181			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
182
183		code->alu.inst[ip].alpha_addr |= src << (6*j);
184
185		arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
186		arg |= inst->RGB.Arg[j].Abs << 6;
187		arg |= inst->RGB.Arg[j].Negate << 5;
188		code->alu.inst[ip].rgb_inst |= arg << (7*j);
189
190		arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
191		arg |= inst->Alpha.Arg[j].Abs << 6;
192		arg |= inst->Alpha.Arg[j].Negate << 5;
193		code->alu.inst[ip].alpha_inst |= arg << (7*j);
194	}
195
196	/* Presubtract */
197	if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
198		switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
199		case RC_PRESUB_BIAS:
200			code->alu.inst[ip].rgb_inst |=
201						R300_ALU_SRCP_1_MINUS_2_SRC0;
202			break;
203		case RC_PRESUB_ADD:
204			code->alu.inst[ip].rgb_inst |=
205						R300_ALU_SRCP_SRC1_PLUS_SRC0;
206			break;
207		case RC_PRESUB_SUB:
208			code->alu.inst[ip].rgb_inst |=
209						R300_ALU_SRCP_SRC1_MINUS_SRC0;
210			break;
211		case RC_PRESUB_INV:
212			code->alu.inst[ip].rgb_inst |=
213						R300_ALU_SRCP_1_MINUS_SRC0;
214			break;
215		default:
216			break;
217		}
218	}
219
220	if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
221		switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
222		case RC_PRESUB_BIAS:
223			code->alu.inst[ip].alpha_inst |=
224						R300_ALU_SRCP_1_MINUS_2_SRC0;
225			break;
226		case RC_PRESUB_ADD:
227			code->alu.inst[ip].alpha_inst |=
228						R300_ALU_SRCP_SRC1_PLUS_SRC0;
229			break;
230		case RC_PRESUB_SUB:
231			code->alu.inst[ip].alpha_inst |=
232						R300_ALU_SRCP_SRC1_MINUS_SRC0;
233			break;
234		case RC_PRESUB_INV:
235			code->alu.inst[ip].alpha_inst |=
236						R300_ALU_SRCP_1_MINUS_SRC0;
237			break;
238		default:
239			break;
240		}
241	}
242
243	if (inst->RGB.Saturate)
244		code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
245	if (inst->Alpha.Saturate)
246		code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
247
248	if (inst->RGB.WriteMask) {
249		use_temporary(code, inst->RGB.DestIndex);
250		if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
251			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
252		code->alu.inst[ip].rgb_addr |=
253			((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
254			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
255	}
256	if (inst->RGB.OutputWriteMask) {
257		code->alu.inst[ip].rgb_addr |=
258            (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
259            R300_RGB_TARGET(inst->RGB.Target);
260		emit->node_flags |= R300_RGBA_OUT;
261	}
262
263	if (inst->Alpha.WriteMask) {
264		use_temporary(code, inst->Alpha.DestIndex);
265		if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
266			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
267		code->alu.inst[ip].alpha_addr |=
268			((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
269			R300_ALU_DSTA_REG;
270	}
271	if (inst->Alpha.OutputWriteMask) {
272		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
273            R300_ALPHA_TARGET(inst->Alpha.Target);
274		emit->node_flags |= R300_RGBA_OUT;
275	}
276	if (inst->Alpha.DepthWriteMask) {
277		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
278		emit->node_flags |= R300_W_OUT;
279		c->code->writes_depth = 1;
280	}
281	if (inst->Nop)
282		code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
283
284	/* Handle Output Modifier
285	 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
286	if (inst->RGB.Omod) {
287		if (inst->RGB.Omod == RC_OMOD_DISABLE) {
288			rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
289		}
290		code->alu.inst[ip].rgb_inst |=
291			(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
292	}
293	if (inst->Alpha.Omod) {
294		if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
295			rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
296		}
297		code->alu.inst[ip].alpha_inst |=
298			(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
299	}
300	return 1;
301}
302
303
304/**
305 * Finish the current node without advancing to the next one.
306 */
307static int finish_node(struct r300_emit_state * emit)
308{
309	struct r300_fragment_program_compiler * c = emit->compiler;
310	struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
311	unsigned alu_offset;
312	unsigned alu_end;
313	unsigned tex_offset;
314	unsigned tex_end;
315
316	unsigned int alu_offset_msbs, alu_end_msbs;
317
318	if (code->alu.length == emit->node_first_alu) {
319		/* Generate a single NOP for this node */
320		struct rc_pair_instruction inst;
321		memset(&inst, 0, sizeof(inst));
322		if (!emit_alu(emit, &inst))
323			return 0;
324	}
325
326	alu_offset = emit->node_first_alu;
327	alu_end = code->alu.length - alu_offset - 1;
328	tex_offset = emit->node_first_tex;
329	tex_end = code->tex.length - tex_offset - 1;
330
331	if (code->tex.length == emit->node_first_tex) {
332		if (emit->current_node > 0) {
333			error("Node %i has no TEX instructions", emit->current_node);
334			return 0;
335		}
336
337		tex_end = 0;
338	} else {
339		if (emit->current_node == 0)
340			code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
341	}
342
343	/* Write the config register.
344	 * Note: The order in which the words for each node are written
345	 * is not correct here and needs to be fixed up once we're entirely
346	 * done
347	 *
348	 * Also note that the register specification from AMD is slightly
349	 * incorrect in its description of this register. */
350	code->code_addr[emit->current_node]  =
351			((alu_offset << R300_ALU_START_SHIFT)
352				& R300_ALU_START_MASK)
353			| ((alu_end << R300_ALU_SIZE_SHIFT)
354				& R300_ALU_SIZE_MASK)
355			| ((tex_offset << R300_TEX_START_SHIFT)
356				& R300_TEX_START_MASK)
357			| ((tex_end << R300_TEX_SIZE_SHIFT)
358				& R300_TEX_SIZE_MASK)
359			| emit->node_flags
360			| (get_msbs_tex(tex_offset, 5)
361				<< R400_TEX_START_MSB_SHIFT)
362			| (get_msbs_tex(tex_end, 5)
363				<< R400_TEX_SIZE_MSB_SHIFT)
364			;
365
366	/* Write r400 extended instruction fields.  These will be ignored on
367	 * r300 cards.  */
368	alu_offset_msbs = get_msbs_alu(alu_offset);
369	alu_end_msbs = get_msbs_alu(alu_end);
370	switch(emit->current_node) {
371	case 0:
372		code->r400_code_offset_ext |=
373			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
374			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
375		break;
376	case 1:
377		code->r400_code_offset_ext |=
378			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
379			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
380		break;
381	case 2:
382		code->r400_code_offset_ext |=
383			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
384			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
385		break;
386	case 3:
387		code->r400_code_offset_ext |=
388			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
389			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
390		break;
391	}
392	return 1;
393}
394
395
396/**
397 * Begin a block of texture instructions.
398 * Create the necessary indirection.
399 */
400static int begin_tex(struct r300_emit_state * emit)
401{
402	PROG_CODE;
403
404	if (code->alu.length == emit->node_first_alu &&
405	    code->tex.length == emit->node_first_tex) {
406		return 1;
407	}
408
409	if (emit->current_node == 3) {
410		error("Too many texture indirections");
411		return 0;
412	}
413
414	if (!finish_node(emit))
415		return 0;
416
417	emit->current_node++;
418	emit->node_first_tex = code->tex.length;
419	emit->node_first_alu = code->alu.length;
420	emit->node_flags = 0;
421	return 1;
422}
423
424
425static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
426{
427	unsigned int unit;
428	unsigned int dest;
429	unsigned int opcode;
430	PROG_CODE;
431
432	if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
433		error("Too many TEX instructions");
434		return 0;
435	}
436
437	unit = inst->U.I.TexSrcUnit;
438	dest = inst->U.I.DstReg.Index;
439
440	switch(inst->U.I.Opcode) {
441	case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
442	case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
443	case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
444	case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
445	default:
446		error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
447		return 0;
448	}
449
450	if (inst->U.I.Opcode == RC_OPCODE_KIL) {
451		unit = 0;
452		dest = 0;
453	} else {
454		use_temporary(code, dest);
455	}
456
457	use_temporary(code, inst->U.I.SrcReg[0].Index);
458
459	code->tex.inst[code->tex.length++] =
460		((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
461			& R300_SRC_ADDR_MASK)
462		| ((dest << R300_DST_ADDR_SHIFT)
463			& R300_DST_ADDR_MASK)
464		| (unit << R300_TEX_ID_SHIFT)
465		| (opcode << R300_TEX_INST_SHIFT)
466		| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
467			R400_SRC_ADDR_EXT_BIT : 0)
468		| (dest >= R300_PFS_NUM_TEMP_REGS ?
469			R400_DST_ADDR_EXT_BIT : 0)
470		;
471	return 1;
472}
473
474
475/**
476 * Final compilation step: Turn the intermediate radeon_program into
477 * machine-readable instructions.
478 */
479void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
480{
481	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
482	struct r300_emit_state emit;
483	struct r300_fragment_program_code *code = &compiler->code->code.r300;
484	unsigned int tex_end;
485
486	memset(&emit, 0, sizeof(emit));
487	emit.compiler = compiler;
488
489	memset(code, 0, sizeof(struct r300_fragment_program_code));
490
491	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
492	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
493	    inst = inst->Next) {
494		if (inst->Type == RC_INSTRUCTION_NORMAL) {
495			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
496				begin_tex(&emit);
497				continue;
498			}
499
500			emit_tex(&emit, inst);
501		} else {
502			emit_alu(&emit, &inst->U.P);
503		}
504	}
505
506	if (code->pixsize >= compiler->Base.max_temp_regs)
507		rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
508
509	if (compiler->Base.Error)
510		return;
511
512	/* Finish the program */
513	finish_node(&emit);
514
515	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
516
517	/* Set r400 extended instruction fields.  These values will be ignored
518	 * on r300 cards. */
519	code->r400_code_offset_ext |=
520		(get_msbs_alu(0)
521				<< R400_ALU_OFFSET_MSB_SHIFT)
522		| (get_msbs_alu(code->alu.length - 1)
523				<< R400_ALU_SIZE_MSB_SHIFT);
524
525	tex_end = code->tex.length ? code->tex.length - 1 : 0;
526	code->code_offset =
527		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
528			& R300_PFS_CNTL_ALU_OFFSET_MASK)
529		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
530			& R300_PFS_CNTL_ALU_END_MASK)
531		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
532			& R300_PFS_CNTL_TEX_OFFSET_MASK)
533		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
534			& R300_PFS_CNTL_TEX_END_MASK)
535		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
536		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
537		;
538
539	if (emit.current_node < 3) {
540		int shift = 3 - emit.current_node;
541		int i;
542		for(i = emit.current_node; i >= 0; --i)
543			code->code_addr[shift + i] = code->code_addr[i];
544		for(i = 0; i < shift; ++i)
545			code->code_addr[i] = 0;
546	}
547
548	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
549	    || code->alu.length > R300_PFS_MAX_ALU_INST
550	    || code->tex.length > R300_PFS_MAX_TEX_INST) {
551
552		code->r390_mode = 1;
553	}
554}
555