1/*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include "radeon_dataflow.h"
29
30#include "radeon_compiler.h"
31
32
33struct updatemask_state {
34	unsigned char Output[RC_REGISTER_MAX_INDEX];
35	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
36	unsigned char Address;
37	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
38};
39
40struct instruction_state {
41	unsigned char WriteMask:4;
42	unsigned char WriteALUResult:1;
43	unsigned char SrcReg[3];
44};
45
46struct loopinfo {
47	struct updatemask_state * Breaks;
48	unsigned int BreakCount;
49	unsigned int BreaksReserved;
50};
51
52struct branchinfo {
53	unsigned int HaveElse:1;
54
55	struct updatemask_state StoreEndif;
56	struct updatemask_state StoreElse;
57};
58
59struct deadcode_state {
60	struct radeon_compiler * C;
61	struct instruction_state * Instructions;
62
63	struct updatemask_state R;
64
65	struct branchinfo * BranchStack;
66	unsigned int BranchStackSize;
67	unsigned int BranchStackReserved;
68
69	struct loopinfo * LoopStack;
70	unsigned int LoopStackSize;
71	unsigned int LoopStackReserved;
72};
73
74
75static void or_updatemasks(
76	struct updatemask_state * dst,
77	struct updatemask_state * a,
78	struct updatemask_state * b)
79{
80	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
81		dst->Output[i] = a->Output[i] | b->Output[i];
82		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
83	}
84
85	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
86		dst->Special[i] = a->Special[i] | b->Special[i];
87
88	dst->Address = a->Address | b->Address;
89}
90
91static void push_break(struct deadcode_state *s)
92{
93	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
94	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
95		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
96
97	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
98}
99
100static void push_loop(struct deadcode_state * s)
101{
102	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
103			s->LoopStackSize, s->LoopStackReserved, 1);
104	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
105}
106
107static void push_branch(struct deadcode_state * s)
108{
109	struct branchinfo * branch;
110
111	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
112			s->BranchStackSize, s->BranchStackReserved, 1);
113
114	branch = &s->BranchStack[s->BranchStackSize++];
115	branch->HaveElse = 0;
116	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
117}
118
119static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
120{
121	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
122		if (index >= RC_REGISTER_MAX_INDEX) {
123			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
124			return 0;
125		}
126
127		if (file == RC_FILE_OUTPUT)
128			return &s->R.Output[index];
129		else
130			return &s->R.Temporary[index];
131	} else if (file == RC_FILE_ADDRESS) {
132		return &s->R.Address;
133	} else if (file == RC_FILE_SPECIAL) {
134		if (index >= RC_NUM_SPECIAL_REGISTERS) {
135			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
136			return 0;
137		}
138
139		return &s->R.Special[index];
140	}
141
142	return 0;
143}
144
145static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
146{
147	unsigned char * pused = get_used_ptr(s, file, index);
148	if (pused)
149		*pused |= mask;
150}
151
152static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
153{
154	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155	struct instruction_state * insts = &s->Instructions[inst->IP];
156	unsigned int usedmask = 0;
157	unsigned int srcmasks[3];
158
159	if (opcode->HasDstReg) {
160		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
161		if (pused) {
162			usedmask = *pused & inst->U.I.DstReg.WriteMask;
163			*pused &= ~usedmask;
164		}
165	}
166
167	insts->WriteMask |= usedmask;
168
169	if (inst->U.I.WriteALUResult) {
170		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
171		if (pused && *pused) {
172			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
173				usedmask |= RC_MASK_X;
174			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
175				usedmask |= RC_MASK_W;
176
177			*pused = 0;
178			insts->WriteALUResult = 1;
179		}
180	}
181
182	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
183
184	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
185		unsigned int refmask = 0;
186		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
187		insts->SrcReg[src] |= newsrcmask;
188
189		for(unsigned int chan = 0; chan < 4; ++chan) {
190			if (GET_BIT(newsrcmask, chan))
191				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
192		}
193
194		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
195		refmask &= RC_MASK_XYZW;
196
197		if (!refmask)
198			continue;
199
200		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
201
202		if (inst->U.I.SrcReg[src].RelAddr)
203			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
204	}
205}
206
207static void mark_output_use(void * data, unsigned int index, unsigned int mask)
208{
209	struct deadcode_state * s = data;
210
211	mark_used(s, RC_FILE_OUTPUT, index, mask);
212}
213
214void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
215{
216	struct deadcode_state s;
217	unsigned int nr_instructions;
218	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
219	unsigned int ip;
220
221	memset(&s, 0, sizeof(s));
222	s.C = c;
223
224	nr_instructions = rc_recompute_ips(c);
225	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
226	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
227
228	dce(c, &s, &mark_output_use);
229
230	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
231	    inst != &c->Program.Instructions;
232	    inst = inst->Prev) {
233		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
234
235		switch(opcode->Opcode){
236		/* Mark all sources in the loop body as used before doing
237		 * normal deadcode analysis.  This is probably not optimal.
238		 */
239		case RC_OPCODE_ENDLOOP:
240		{
241			int endloops = 1;
242			struct rc_instruction *ptr;
243			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
244				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
245				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
246					endloops--;
247					continue;
248				}
249				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
250					endloops++;
251					continue;
252				}
253				if(opcode->HasDstReg){
254					int src = 0;
255					unsigned int srcmasks[3];
256					unsigned int writemask = ptr->U.I.DstReg.WriteMask;
257					if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
258						writemask |= RC_MASK_X;
259					else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
260						writemask |= RC_MASK_W;
261
262					rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
263					for(src=0; src < opcode->NumSrcRegs; src++){
264						mark_used(&s,
265							ptr->U.I.SrcReg[src].File,
266							ptr->U.I.SrcReg[src].Index,
267							srcmasks[src]);
268					}
269				}
270			}
271			push_loop(&s);
272			break;
273		}
274		case RC_OPCODE_BRK:
275			push_break(&s);
276			break;
277		case RC_OPCODE_BGNLOOP:
278		{
279			unsigned int i;
280			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
281			for(i = 0; i < loop->BreakCount; i++) {
282				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
283			}
284			break;
285		}
286		case RC_OPCODE_CONT:
287			break;
288		case RC_OPCODE_ENDIF:
289			push_branch(&s);
290			break;
291		default:
292			if (opcode->IsFlowControl && s.BranchStackSize) {
293				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
294				if (opcode->Opcode == RC_OPCODE_IF) {
295					or_updatemasks(&s.R,
296							&s.R,
297							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
298
299					s.BranchStackSize--;
300				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
301					if (branch->HaveElse) {
302						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
303					} else {
304						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
305						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
306						branch->HaveElse = 1;
307					}
308				} else {
309					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
310				}
311			}
312		}
313
314		update_instruction(&s, inst);
315	}
316
317	ip = 0;
318	for(struct rc_instruction * inst = c->Program.Instructions.Next;
319	    inst != &c->Program.Instructions;
320	    inst = inst->Next, ++ip) {
321		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
322		int dead = 1;
323		unsigned int srcmasks[3];
324		unsigned int usemask;
325
326		if (!opcode->HasDstReg) {
327			dead = 0;
328		} else {
329			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
330			if (s.Instructions[ip].WriteMask)
331				dead = 0;
332
333			if (s.Instructions[ip].WriteALUResult)
334				dead = 0;
335			else
336				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
337		}
338
339		if (dead) {
340			struct rc_instruction * todelete = inst;
341			inst = inst->Prev;
342			rc_remove_instruction(todelete);
343			continue;
344		}
345
346		usemask = s.Instructions[ip].WriteMask;
347
348		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
349			usemask |= RC_MASK_X;
350		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
351			usemask |= RC_MASK_W;
352
353		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
354
355		for(unsigned int src = 0; src < 3; ++src) {
356			for(unsigned int chan = 0; chan < 4; ++chan) {
357				if (!GET_BIT(srcmasks[src], chan))
358					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
359			}
360		}
361	}
362
363	rc_calculate_inputs_outputs(c);
364}
365