1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Author: Tom Stellard <thomas.stellard@amd.com>
24 */
25
26#include "radeon_compiler.h"
27#include "radeon_compiler_util.h"
28#include "radeon_dataflow.h"
29#include "radeon_program.h"
30#include "radeon_program_constants.h"
31
32struct vert_fc_state {
33	struct radeon_compiler *C;
34	unsigned BranchDepth;
35	unsigned LoopDepth;
36	unsigned LoopsReserved;
37	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
38	int PredicateReg;
39	unsigned InCFBreak;
40};
41
42static void build_pred_src(
43	struct rc_src_register * src,
44	struct vert_fc_state * fc_state)
45{
46	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
47					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
48	src->File = RC_FILE_TEMPORARY;
49	src->Index = fc_state->PredicateReg;
50}
51
52static void build_pred_dst(
53	struct rc_dst_register * dst,
54	struct vert_fc_state * fc_state)
55{
56	dst->WriteMask = RC_MASK_W;
57	dst->File = RC_FILE_TEMPORARY;
58	dst->Index = fc_state->PredicateReg;
59}
60
61static void mark_write(void * userdata,	struct rc_instruction * inst,
62		rc_register_file file,	unsigned int index, unsigned int mask)
63{
64	unsigned int * writemasks = userdata;
65
66	if (file != RC_FILE_TEMPORARY)
67		return;
68
69	if (index >= R300_VS_MAX_TEMPS)
70		return;
71
72	writemasks[index] |= mask;
73}
74
75static int reserve_predicate_reg(struct vert_fc_state * fc_state)
76{
77	int i;
78	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
79	struct rc_instruction * inst;
80	memset(writemasks, 0, sizeof(writemasks));
81	for(inst = fc_state->C->Program.Instructions.Next;
82				inst != &fc_state->C->Program.Instructions;
83				inst = inst->Next) {
84		rc_for_all_writes_mask(inst, mark_write, writemasks);
85	}
86
87	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
88		/* Most of the control flow instructions only write the
89		 * W component of the Predicate Register, but
90		 * the docs say that ME_PRED_SET_CLR and
91		 * ME_PRED_SET_RESTORE write all components of the
92		 * register, so we must reserve a register that has
93		 * all its components free. */
94		if (!writemasks[i]) {
95			fc_state->PredicateReg = i;
96			break;
97		}
98	}
99	if (i == fc_state->C->max_temp_regs) {
100		rc_error(fc_state->C, "No free temporary to use for"
101				" predicate stack counter.\n");
102		return -1;
103	}
104	return 1;
105}
106
107static void lower_bgnloop(
108	struct rc_instruction * inst,
109	struct vert_fc_state * fc_state)
110{
111	struct rc_instruction * new_inst =
112			rc_insert_new_instruction(fc_state->C, inst->Prev);
113
114	if ((!fc_state->C->is_r500
115		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
116	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
117		rc_error(fc_state->C, "Loops are nested too deep.");
118		return;
119	}
120
121	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
122		if (fc_state->PredicateReg == -1) {
123			if (reserve_predicate_reg(fc_state) == -1) {
124				return;
125			}
126		}
127
128		/* Initialize the predicate bit to true. */
129		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
130		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
131		new_inst->U.I.SrcReg[0].Index = 0;
132		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
133		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
134	} else {
135		fc_state->PredStack[fc_state->LoopDepth] =
136						fc_state->PredicateReg;
137		/* Copy the current predicate value to this loop's
138		 * predicate register */
139
140		/* Use the old predicate value for src0 */
141		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
142
143		/* Reserve this loop's predicate register */
144		if (reserve_predicate_reg(fc_state) == -1) {
145			return;
146		}
147
148		/* Copy the old predicate value to the new register */
149		new_inst->U.I.Opcode = RC_OPCODE_ADD;
150		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
151		new_inst->U.I.SrcReg[1].Index = 0;
152		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
153		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
154	}
155
156}
157
158static void lower_brk(
159	struct rc_instruction * inst,
160	struct vert_fc_state * fc_state)
161{
162	if (fc_state->LoopDepth == 1) {
163		inst->U.I.Opcode = RC_OPCODE_RCP;
164		inst->U.I.DstReg.Pred = RC_PRED_INV;
165		inst->U.I.SrcReg[0].Index = 0;
166		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
167		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
168	} else {
169		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
170		inst->U.I.DstReg.Pred = RC_PRED_SET;
171	}
172
173	build_pred_dst(&inst->U.I.DstReg, fc_state);
174}
175
176static void lower_endloop(
177	struct rc_instruction * inst,
178	struct vert_fc_state * fc_state)
179{
180	struct rc_instruction * new_inst =
181			rc_insert_new_instruction(fc_state->C, inst);
182
183	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
184	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
185	/* Restore the previous predicate register. */
186	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
187	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
188}
189
190static void lower_if(
191	struct rc_instruction * inst,
192	struct vert_fc_state * fc_state)
193{
194	/* Reserve a temporary to use as our predicate stack counter, if we
195	 * don't already have one. */
196	if (fc_state->PredicateReg == -1) {
197		/* If we are inside a loop, the Predicate Register should
198		 * have already been defined. */
199		assert(fc_state->LoopDepth == 0);
200
201		if (reserve_predicate_reg(fc_state) == -1) {
202			return;
203		}
204	}
205
206	if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
207		fc_state->InCFBreak = 1;
208	}
209	if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
210			|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
211		if (fc_state->InCFBreak) {
212			inst->U.I.Opcode = RC_ME_PRED_SEQ;
213			inst->U.I.DstReg.Pred = RC_PRED_SET;
214		} else {
215			inst->U.I.Opcode = RC_ME_PRED_SNEQ;
216		}
217	} else {
218		unsigned swz;
219		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
220		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
221						sizeof(inst->U.I.SrcReg[1]));
222		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
223		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
224		 * w component */
225		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
226				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
227		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
228	}
229	build_pred_dst(&inst->U.I.DstReg, fc_state);
230}
231
232void rc_vert_fc(struct radeon_compiler *c, void *user)
233{
234	struct rc_instruction * inst;
235	struct vert_fc_state fc_state;
236
237	memset(&fc_state, 0, sizeof(fc_state));
238	fc_state.PredicateReg = -1;
239	fc_state.C = c;
240
241	for(inst = c->Program.Instructions.Next;
242					inst != &c->Program.Instructions;
243					inst = inst->Next) {
244
245		switch (inst->U.I.Opcode) {
246
247		case RC_OPCODE_BGNLOOP:
248			lower_bgnloop(inst, &fc_state);
249			fc_state.LoopDepth++;
250			break;
251
252		case RC_OPCODE_BRK:
253			lower_brk(inst, &fc_state);
254			break;
255
256		case RC_OPCODE_ENDLOOP:
257			if (fc_state.BranchDepth != 0
258					|| fc_state.LoopDepth != 1) {
259				lower_endloop(inst, &fc_state);
260			}
261			fc_state.LoopDepth--;
262			/* Skip PRED_RESTORE */
263			inst = inst->Next;
264			break;
265		case RC_OPCODE_IF:
266			lower_if(inst, &fc_state);
267			fc_state.BranchDepth++;
268			break;
269
270		case RC_OPCODE_ELSE:
271			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
272			build_pred_dst(&inst->U.I.DstReg, &fc_state);
273			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
274			break;
275
276		case RC_OPCODE_ENDIF:
277			if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
278				struct rc_instruction * to_delete = inst;
279				inst = inst->Prev;
280				rc_remove_instruction(to_delete);
281				/* XXX: Delete the endif instruction */
282			} else {
283				inst->U.I.Opcode = RC_ME_PRED_SET_POP;
284				build_pred_dst(&inst->U.I.DstReg, &fc_state);
285				build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
286			}
287			fc_state.InCFBreak = 0;
288			fc_state.BranchDepth--;
289			break;
290
291		default:
292			if (fc_state.BranchDepth || fc_state.LoopDepth) {
293				inst->U.I.DstReg.Pred = RC_PRED_SET;
294			}
295			break;
296		}
297
298		if (c->Error) {
299			return;
300		}
301	}
302}
303