1b8e80941Smrg/*
2b8e80941Smrg * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Rob Clark <robclark@freedesktop.org>
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg#include "ir3.h"
28b8e80941Smrg
29b8e80941Smrg/*
30b8e80941Smrg * Find/group instruction neighbors:
31b8e80941Smrg */
32b8e80941Smrg
33b8e80941Smrg/* bleh.. we need to do the same group_n() thing for both inputs/outputs
34b8e80941Smrg * (where we have a simple instr[] array), and fanin nodes (where we have
35b8e80941Smrg * an extra indirection via reg->instr).
36b8e80941Smrg */
37b8e80941Smrgstruct group_ops {
38b8e80941Smrg	struct ir3_instruction *(*get)(void *arr, int idx);
39b8e80941Smrg	void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
40b8e80941Smrg};
41b8e80941Smrg
42b8e80941Smrgstatic struct ir3_instruction *arr_get(void *arr, int idx)
43b8e80941Smrg{
44b8e80941Smrg	return ((struct ir3_instruction **)arr)[idx];
45b8e80941Smrg}
46b8e80941Smrgstatic void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
47b8e80941Smrg{
48b8e80941Smrg	((struct ir3_instruction **)arr)[idx] =
49b8e80941Smrg			ir3_MOV(instr->block, instr, TYPE_F32);
50b8e80941Smrg}
51b8e80941Smrgstatic void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
52b8e80941Smrg{
53b8e80941Smrg	/* so, we can't insert a mov in front of a meta:in.. and the downstream
54b8e80941Smrg	 * instruction already has a pointer to 'instr'.  So we cheat a bit and
55b8e80941Smrg	 * morph the meta:in instruction into a mov and insert a new meta:in
56b8e80941Smrg	 * in front.
57b8e80941Smrg	 */
58b8e80941Smrg	struct ir3_instruction *in;
59b8e80941Smrg
60b8e80941Smrg	debug_assert(instr->regs_count == 1);
61b8e80941Smrg
62b8e80941Smrg	in = ir3_instr_create(instr->block, OPC_META_INPUT);
63b8e80941Smrg	in->inout.block = instr->block;
64b8e80941Smrg	ir3_reg_create(in, instr->regs[0]->num, 0);
65b8e80941Smrg
66b8e80941Smrg	/* create src reg for meta:in and fixup to now be a mov: */
67b8e80941Smrg	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
68b8e80941Smrg	instr->opc = OPC_MOV;
69b8e80941Smrg	instr->cat1.src_type = TYPE_F32;
70b8e80941Smrg	instr->cat1.dst_type = TYPE_F32;
71b8e80941Smrg
72b8e80941Smrg	((struct ir3_instruction **)arr)[idx] = in;
73b8e80941Smrg}
74b8e80941Smrgstatic struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
75b8e80941Smrgstatic struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
76b8e80941Smrg
77b8e80941Smrgstatic struct ir3_instruction *instr_get(void *arr, int idx)
78b8e80941Smrg{
79b8e80941Smrg	return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
80b8e80941Smrg}
81b8e80941Smrgstatic void
82b8e80941Smrginstr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
83b8e80941Smrg{
84b8e80941Smrg	((struct ir3_instruction *)arr)->regs[idx+1]->instr =
85b8e80941Smrg			ir3_MOV(instr->block, instr, TYPE_F32);
86b8e80941Smrg}
87b8e80941Smrgstatic struct group_ops instr_ops = { instr_get, instr_insert_mov };
88b8e80941Smrg
89b8e80941Smrg/* verify that cur != instr, but cur is also not in instr's neighbor-list: */
90b8e80941Smrgstatic bool
91b8e80941Smrgin_neighbor_list(struct ir3_instruction *instr, struct ir3_instruction *cur, int pos)
92b8e80941Smrg{
93b8e80941Smrg	int idx = 0;
94b8e80941Smrg
95b8e80941Smrg	if (!instr)
96b8e80941Smrg		return false;
97b8e80941Smrg
98b8e80941Smrg	if (instr == cur)
99b8e80941Smrg		return true;
100b8e80941Smrg
101b8e80941Smrg	for (instr = ir3_neighbor_first(instr); instr; instr = instr->cp.right)
102b8e80941Smrg		if ((idx++ != pos) && (instr == cur))
103b8e80941Smrg			return true;
104b8e80941Smrg
105b8e80941Smrg	return false;
106b8e80941Smrg}
107b8e80941Smrg
108b8e80941Smrgstatic void
109b8e80941Smrggroup_n(struct group_ops *ops, void *arr, unsigned n)
110b8e80941Smrg{
111b8e80941Smrg	unsigned i, j;
112b8e80941Smrg
113b8e80941Smrg	/* first pass, figure out what has conflicts and needs a mov
114b8e80941Smrg	 * inserted.  Do this up front, before starting to setup
115b8e80941Smrg	 * left/right neighbor pointers.  Trying to do it in a single
116b8e80941Smrg	 * pass could result in a situation where we can't even setup
117b8e80941Smrg	 * the mov's right neighbor ptr if the next instr also needs
118b8e80941Smrg	 * a mov.
119b8e80941Smrg	 */
120b8e80941Smrgrestart:
121b8e80941Smrg	for (i = 0; i < n; i++) {
122b8e80941Smrg		struct ir3_instruction *instr = ops->get(arr, i);
123b8e80941Smrg		if (instr) {
124b8e80941Smrg			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
125b8e80941Smrg			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
126b8e80941Smrg			bool conflict;
127b8e80941Smrg
128b8e80941Smrg			/* check for left/right neighbor conflicts: */
129b8e80941Smrg			conflict = conflicts(instr->cp.left, left) ||
130b8e80941Smrg				conflicts(instr->cp.right, right);
131b8e80941Smrg
132b8e80941Smrg			/* Mixing array elements and higher register classes
133b8e80941Smrg			 * (ie. groups) doesn't really work out in RA.  See:
134b8e80941Smrg			 *
135b8e80941Smrg			 * https://trello.com/c/DqeDkeVf/156-bug-with-stk-70frag
136b8e80941Smrg			 */
137b8e80941Smrg			if (instr->regs[0]->flags & IR3_REG_ARRAY)
138b8e80941Smrg				conflict = true;
139b8e80941Smrg
140b8e80941Smrg			/* we also can't have an instr twice in the group: */
141b8e80941Smrg			for (j = i + 1; (j < n) && !conflict; j++)
142b8e80941Smrg				if (in_neighbor_list(ops->get(arr, j), instr, i))
143b8e80941Smrg					conflict = true;
144b8e80941Smrg
145b8e80941Smrg			if (conflict) {
146b8e80941Smrg				ops->insert_mov(arr, i, instr);
147b8e80941Smrg				/* inserting the mov may have caused a conflict
148b8e80941Smrg				 * against the previous:
149b8e80941Smrg				 */
150b8e80941Smrg				goto restart;
151b8e80941Smrg			}
152b8e80941Smrg		}
153b8e80941Smrg	}
154b8e80941Smrg
155b8e80941Smrg	/* second pass, now that we've inserted mov's, fixup left/right
156b8e80941Smrg	 * neighbors.  This is guaranteed to succeed, since by definition
157b8e80941Smrg	 * the newly inserted mov's cannot conflict with anything.
158b8e80941Smrg	 */
159b8e80941Smrg	for (i = 0; i < n; i++) {
160b8e80941Smrg		struct ir3_instruction *instr = ops->get(arr, i);
161b8e80941Smrg		if (instr) {
162b8e80941Smrg			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
163b8e80941Smrg			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
164b8e80941Smrg
165b8e80941Smrg			debug_assert(!conflicts(instr->cp.left, left));
166b8e80941Smrg			if (left) {
167b8e80941Smrg				instr->cp.left_cnt++;
168b8e80941Smrg				instr->cp.left = left;
169b8e80941Smrg			}
170b8e80941Smrg
171b8e80941Smrg			debug_assert(!conflicts(instr->cp.right, right));
172b8e80941Smrg			if (right) {
173b8e80941Smrg				instr->cp.right_cnt++;
174b8e80941Smrg				instr->cp.right = right;
175b8e80941Smrg			}
176b8e80941Smrg		}
177b8e80941Smrg	}
178b8e80941Smrg}
179b8e80941Smrg
180b8e80941Smrgstatic void
181b8e80941Smrginstr_find_neighbors(struct ir3_instruction *instr)
182b8e80941Smrg{
183b8e80941Smrg	struct ir3_instruction *src;
184b8e80941Smrg
185b8e80941Smrg	if (ir3_instr_check_mark(instr))
186b8e80941Smrg		return;
187b8e80941Smrg
188b8e80941Smrg	if (instr->opc == OPC_META_FI)
189b8e80941Smrg		group_n(&instr_ops, instr, instr->regs_count - 1);
190b8e80941Smrg
191b8e80941Smrg	foreach_ssa_src(src, instr)
192b8e80941Smrg		instr_find_neighbors(src);
193b8e80941Smrg}
194b8e80941Smrg
195b8e80941Smrg/* a bit of sadness.. we can't have "holes" in inputs from PoV of
196b8e80941Smrg * register assignment, they still need to be grouped together.  So
197b8e80941Smrg * we need to insert dummy/padding instruction for grouping, and
198b8e80941Smrg * then take it back out again before anyone notices.
199b8e80941Smrg */
200b8e80941Smrgstatic void
201b8e80941Smrgpad_and_group_input(struct ir3_instruction **input, unsigned n)
202b8e80941Smrg{
203b8e80941Smrg	int i, mask = 0;
204b8e80941Smrg	struct ir3_block *block = NULL;
205b8e80941Smrg
206b8e80941Smrg	for (i = n - 1; i >= 0; i--) {
207b8e80941Smrg		struct ir3_instruction *instr = input[i];
208b8e80941Smrg		if (instr) {
209b8e80941Smrg			block = instr->block;
210b8e80941Smrg		} else if (block) {
211b8e80941Smrg			instr = ir3_NOP(block);
212b8e80941Smrg			ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dummy dst */
213b8e80941Smrg			input[i] = instr;
214b8e80941Smrg			mask |= (1 << i);
215b8e80941Smrg		}
216b8e80941Smrg	}
217b8e80941Smrg
218b8e80941Smrg	group_n(&arr_ops_in, input, n);
219b8e80941Smrg
220b8e80941Smrg	for (i = 0; i < n; i++) {
221b8e80941Smrg		if (mask & (1 << i))
222b8e80941Smrg			input[i] = NULL;
223b8e80941Smrg	}
224b8e80941Smrg}
225b8e80941Smrg
226b8e80941Smrgstatic void
227b8e80941Smrgfind_neighbors(struct ir3 *ir)
228b8e80941Smrg{
229b8e80941Smrg	unsigned i;
230b8e80941Smrg
231b8e80941Smrg	/* shader inputs/outputs themselves must be contiguous as well:
232b8e80941Smrg	 *
233b8e80941Smrg	 * NOTE: group inputs first, since we only insert mov's
234b8e80941Smrg	 * *before* the conflicted instr (and that would go badly
235b8e80941Smrg	 * for inputs).  By doing inputs first, we should never
236b8e80941Smrg	 * have a conflict on inputs.. pushing any conflict to
237b8e80941Smrg	 * resolve to the outputs, for stuff like:
238b8e80941Smrg	 *
239b8e80941Smrg	 *     MOV OUT[n], IN[m].wzyx
240b8e80941Smrg	 *
241b8e80941Smrg	 * NOTE: we assume here inputs/outputs are grouped in vec4.
242b8e80941Smrg	 * This logic won't quite cut it if we don't align smaller
243b8e80941Smrg	 * on vec4 boundaries
244b8e80941Smrg	 */
245b8e80941Smrg	for (i = 0; i < ir->ninputs; i += 4)
246b8e80941Smrg		pad_and_group_input(&ir->inputs[i], 4);
247b8e80941Smrg	for (i = 0; i < ir->noutputs; i += 4)
248b8e80941Smrg		group_n(&arr_ops_out, &ir->outputs[i], 4);
249b8e80941Smrg
250b8e80941Smrg	for (i = 0; i < ir->noutputs; i++) {
251b8e80941Smrg		if (ir->outputs[i]) {
252b8e80941Smrg			struct ir3_instruction *instr = ir->outputs[i];
253b8e80941Smrg			instr_find_neighbors(instr);
254b8e80941Smrg		}
255b8e80941Smrg	}
256b8e80941Smrg
257b8e80941Smrg	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
258b8e80941Smrg		for (i = 0; i < block->keeps_count; i++) {
259b8e80941Smrg			struct ir3_instruction *instr = block->keeps[i];
260b8e80941Smrg			instr_find_neighbors(instr);
261b8e80941Smrg		}
262b8e80941Smrg
263b8e80941Smrg		/* We also need to account for if-condition: */
264b8e80941Smrg		if (block->condition)
265b8e80941Smrg			instr_find_neighbors(block->condition);
266b8e80941Smrg	}
267b8e80941Smrg}
268b8e80941Smrg
269b8e80941Smrgvoid
270b8e80941Smrgir3_group(struct ir3 *ir)
271b8e80941Smrg{
272b8e80941Smrg	ir3_clear_mark(ir);
273b8e80941Smrg	find_neighbors(ir);
274b8e80941Smrg}
275