1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include "radeon_program_pair.h"
29
30#include "radeon_compiler.h"
31#include "radeon_compiler_util.h"
32
33#include "util/compiler.h"
34
35
36/**
37 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
38 * and reverse the order of arguments for CMP.
39 */
40static void final_rewrite(struct rc_sub_instruction *inst)
41{
42	struct rc_src_register tmp;
43
44	switch(inst->Opcode) {
45	case RC_OPCODE_ADD:
46		inst->SrcReg[2] = inst->SrcReg[1];
47		inst->SrcReg[1].File = RC_FILE_NONE;
48		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
49		inst->SrcReg[1].Negate = RC_MASK_NONE;
50		inst->Opcode = RC_OPCODE_MAD;
51		break;
52	case RC_OPCODE_CMP:
53		tmp = inst->SrcReg[2];
54		inst->SrcReg[2] = inst->SrcReg[0];
55		inst->SrcReg[0] = tmp;
56		break;
57	case RC_OPCODE_MOV:
58		/* AMD say we should use CMP.
59		 * However, when we transform
60		 *  KIL -r0;
61		 * into
62		 *  CMP tmp, -r0, -r0, 0;
63		 *  KIL tmp;
64		 * we get incorrect behaviour on R500 when r0 == 0.0.
65		 * It appears that the R500 KIL hardware treats -0.0 as less
66		 * than zero.
67		 */
68		inst->SrcReg[1].File = RC_FILE_NONE;
69		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
70		inst->SrcReg[2].File = RC_FILE_NONE;
71		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
72		inst->Opcode = RC_OPCODE_MAD;
73		break;
74	case RC_OPCODE_MUL:
75		inst->SrcReg[2].File = RC_FILE_NONE;
76		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
77		inst->Opcode = RC_OPCODE_MAD;
78		break;
79	default:
80		/* nothing to do */
81		break;
82	}
83}
84
85
86/**
87 * Classify an instruction according to which ALUs etc. it needs
88 */
89static void classify_instruction(struct rc_sub_instruction * inst,
90	int * needrgb, int * needalpha, int * istranscendent)
91{
92	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
93	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
94	*istranscendent = 0;
95
96	if (inst->WriteALUResult == RC_ALURESULT_X)
97		*needrgb = 1;
98	else if (inst->WriteALUResult == RC_ALURESULT_W)
99		*needalpha = 1;
100
101	switch(inst->Opcode) {
102	case RC_OPCODE_ADD:
103	case RC_OPCODE_CMP:
104	case RC_OPCODE_CND:
105	case RC_OPCODE_DDX:
106	case RC_OPCODE_DDY:
107	case RC_OPCODE_FRC:
108	case RC_OPCODE_MAD:
109	case RC_OPCODE_MAX:
110	case RC_OPCODE_MIN:
111	case RC_OPCODE_MOV:
112	case RC_OPCODE_MUL:
113		break;
114	case RC_OPCODE_COS:
115	case RC_OPCODE_EX2:
116	case RC_OPCODE_LG2:
117	case RC_OPCODE_RCP:
118	case RC_OPCODE_RSQ:
119	case RC_OPCODE_SIN:
120		*istranscendent = 1;
121		*needalpha = 1;
122		break;
123	case RC_OPCODE_DP4:
124		*needalpha = 1;
125		FALLTHROUGH;
126	case RC_OPCODE_DP3:
127		*needrgb = 1;
128		break;
129	default:
130		break;
131	}
132}
133
134static void src_uses(struct rc_src_register src, unsigned int * rgb,
135							unsigned int * alpha)
136{
137	int j;
138	for(j = 0; j < 4; ++j) {
139		unsigned int swz = GET_SWZ(src.Swizzle, j);
140		if (swz < 3)
141			*rgb = 1;
142		else if (swz < 4)
143			*alpha = 1;
144	}
145}
146
147/**
148 * Fill the given ALU instruction's opcodes and source operands into the given pair,
149 * if possible.
150 */
151static void set_pair_instruction(struct r300_fragment_program_compiler *c,
152	struct rc_pair_instruction * pair,
153	struct rc_sub_instruction * inst)
154{
155	int needrgb, needalpha, istranscendent;
156	const struct rc_opcode_info * opcode;
157	int i;
158
159	memset(pair, 0, sizeof(struct rc_pair_instruction));
160
161	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
162
163	if (needrgb) {
164		if (istranscendent)
165			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
166		else
167			pair->RGB.Opcode = inst->Opcode;
168		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
169			pair->RGB.Saturate = 1;
170	}
171	if (needalpha) {
172		pair->Alpha.Opcode = inst->Opcode;
173		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
174			pair->Alpha.Saturate = 1;
175	}
176
177	opcode = rc_get_opcode_info(inst->Opcode);
178
179	/* Presubtract handling:
180	 * We need to make sure that the values used by the presubtract
181	 * operation end up in src0 or src1. */
182	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
183		/* rc_pair_alloc_source() will fill in data for
184		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
185		int j;
186		for(j = 0; j < 3; j++) {
187			int src_regs;
188			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
189				continue;
190
191			src_regs = rc_presubtract_src_reg_count(
192							inst->PreSub.Opcode);
193			for(i = 0; i < src_regs; i++) {
194				unsigned int rgb = 0;
195				unsigned int alpha = 0;
196				src_uses(inst->SrcReg[j], &rgb, &alpha);
197				if(rgb) {
198					pair->RGB.Src[i].File =
199						inst->PreSub.SrcReg[i].File;
200					pair->RGB.Src[i].Index =
201						inst->PreSub.SrcReg[i].Index;
202					pair->RGB.Src[i].Used = 1;
203				}
204				if(alpha) {
205					pair->Alpha.Src[i].File =
206						inst->PreSub.SrcReg[i].File;
207					pair->Alpha.Src[i].Index =
208						inst->PreSub.SrcReg[i].Index;
209					pair->Alpha.Src[i].Used = 1;
210				}
211			}
212		}
213	}
214
215	for(i = 0; i < opcode->NumSrcRegs; ++i) {
216		int source;
217		if (needrgb && !istranscendent) {
218			unsigned int srcrgb = 0;
219			unsigned int srcalpha = 0;
220			unsigned int srcmask = 0;
221			int j;
222			/* We don't care about the alpha channel here.  We only
223			 * want the part of the swizzle that writes to rgb,
224			 * since we are creating an rgb instruction. */
225			for(j = 0; j < 3; ++j) {
226				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
227
228				if (swz < RC_SWIZZLE_W)
229					srcrgb = 1;
230				else if (swz == RC_SWIZZLE_W)
231					srcalpha = 1;
232
233				if (swz < RC_SWIZZLE_UNUSED)
234					srcmask |= 1 << j;
235			}
236			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
237							inst->SrcReg[i].File, inst->SrcReg[i].Index);
238			if (source < 0) {
239				rc_error(&c->Base, "Failed to translate "
240							"rgb instruction.\n");
241				return;
242			}
243			pair->RGB.Arg[i].Source = source;
244			pair->RGB.Arg[i].Swizzle =
245				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
246			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
247			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
248		}
249		if (needalpha) {
250			unsigned int srcrgb = 0;
251			unsigned int srcalpha = 0;
252			unsigned int swz;
253			if (istranscendent) {
254				swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
255			} else {
256				swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
257			}
258
259			if (swz < 3)
260				srcrgb = 1;
261			else if (swz < 4)
262				srcalpha = 1;
263			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
264							inst->SrcReg[i].File, inst->SrcReg[i].Index);
265			if (source < 0) {
266				rc_error(&c->Base, "Failed to translate "
267							"alpha instruction.\n");
268				return;
269			}
270			pair->Alpha.Arg[i].Source = source;
271			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
272			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
273
274			if (istranscendent) {
275				pair->Alpha.Arg[i].Negate =
276					!!(inst->SrcReg[i].Negate &
277							inst->DstReg.WriteMask);
278			} else {
279				pair->Alpha.Arg[i].Negate =
280					!!(inst->SrcReg[i].Negate & RC_MASK_W);
281			}
282		}
283	}
284
285	/* Destination handling */
286	if (inst->DstReg.File == RC_FILE_OUTPUT) {
287        if (inst->DstReg.Index == c->OutputDepth) {
288            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
289        } else {
290            for (i = 0; i < 4; i++) {
291                if (inst->DstReg.Index == c->OutputColor[i]) {
292                    pair->RGB.Target = i;
293                    pair->Alpha.Target = i;
294                    pair->RGB.OutputWriteMask |=
295                        inst->DstReg.WriteMask & RC_MASK_XYZ;
296                    pair->Alpha.OutputWriteMask |=
297                        GET_BIT(inst->DstReg.WriteMask, 3);
298                    break;
299                }
300            }
301        }
302	} else {
303		if (needrgb) {
304			pair->RGB.DestIndex = inst->DstReg.Index;
305			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
306		}
307
308		if (needalpha) {
309			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
310			if (pair->Alpha.WriteMask) {
311				pair->Alpha.DestIndex = inst->DstReg.Index;
312			}
313		}
314	}
315
316	if (needrgb) {
317		pair->RGB.Omod = inst->Omod;
318	}
319	if (needalpha) {
320		pair->Alpha.Omod = inst->Omod;
321	}
322
323	if (inst->WriteALUResult) {
324		pair->WriteALUResult = inst->WriteALUResult;
325		pair->ALUResultCompare = inst->ALUResultCompare;
326	}
327}
328
329
330static void check_opcode_support(struct r300_fragment_program_compiler *c,
331				 struct rc_sub_instruction *inst)
332{
333	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
334
335	if (opcode->HasDstReg) {
336		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
337			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
338			return;
339		}
340	}
341
342	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
343		if (inst->SrcReg[i].RelAddr) {
344			rc_error(&c->Base, "Fragment program does not support relative addressing "
345				 " of source operands.\n");
346			return;
347		}
348	}
349}
350
351
352/**
353 * Translate all ALU instructions into corresponding pair instructions,
354 * performing no other changes.
355 */
356void rc_pair_translate(struct radeon_compiler *cc, void *user)
357{
358	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
359
360	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
361	    inst != &c->Base.Program.Instructions;
362	    inst = inst->Next) {
363		const struct rc_opcode_info * opcode;
364		struct rc_sub_instruction copy;
365
366		if (inst->Type != RC_INSTRUCTION_NORMAL)
367			continue;
368
369		opcode = rc_get_opcode_info(inst->U.I.Opcode);
370
371		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
372			continue;
373
374		copy = inst->U.I;
375
376		check_opcode_support(c, &copy);
377
378		final_rewrite(&copy);
379		inst->Type = RC_INSTRUCTION_PAIR;
380		set_pair_instruction(c, &inst->U.P, &copy);
381	}
382}
383