1/*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Jonathan Marek <jonathan@marek.ca>
25 */
26
27#include "ir2_private.h"
28
29static bool is_mov(struct ir2_instr *instr)
30{
31	return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
32		instr->src_count == 1;
33}
34
35static void src_combine(struct ir2_src *src, struct ir2_src b)
36{
37	src->num = b.num;
38	src->type = b.type;
39	src->swizzle = swiz_merge(b.swizzle, src->swizzle);
40	if (!src->abs) /* if we have abs we don't care about previous negate */
41		src->negate ^= b.negate;
42	src->abs |= b.abs;
43}
44
45/* cp_src: replace src regs when they refer to a mov instruction
46 * example:
47 *	ALU:      MAXv    R7 = C7, C7
48 *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
49 * becomes:
50 *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
51 */
52void cp_src(struct ir2_context *ctx)
53{
54	struct ir2_instr *p;
55
56	ir2_foreach_instr(instr, ctx) {
57		ir2_foreach_src(src, instr) {
58			/* loop to replace recursively */
59			do {
60				if (src->type != IR2_SRC_SSA)
61					break;
62
63				p = &ctx->instr[src->num];
64				/* don't work across blocks to avoid possible issues */
65				if (p->block_idx != instr->block_idx)
66					break;
67
68				if (!is_mov(p))
69					break;
70
71				/* cant apply abs to const src, const src only for alu */
72				if (p->src[0].type == IR2_SRC_CONST &&
73					(src->abs || instr->type != IR2_ALU))
74					break;
75
76				src_combine(src, p->src[0]);
77			} while (1);
78		}
79	}
80}
81
82/* cp_export: replace mov to export when possible
83 * in the cp_src pass we bypass any mov instructions related
84 * to the src registers, but for exports for need something different
85 * example:
86 *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
87 *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
88 *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
89 * becomes:
90 *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
91 *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
92 *
93 */
94void cp_export(struct ir2_context *ctx)
95{
96	struct ir2_instr *c[4], *ins[4];
97	struct ir2_src *src;
98	struct ir2_reg *reg;
99	unsigned ncomp;
100
101	ir2_foreach_instr(instr, ctx) {
102		if (!is_export(instr)) /* TODO */
103			continue;
104
105		if (!is_mov(instr))
106			continue;
107
108		src = &instr->src[0];
109
110		if (src->negate || src->abs) /* TODO handle these cases */
111			continue;
112
113		if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
114			continue;
115
116		reg = get_reg_src(ctx, src);
117		ncomp = dst_ncomp(instr);
118
119		unsigned reswiz[4] = {};
120		unsigned num_instr = 0;
121
122		/* fill array c with pointers to instrs that write each component */
123		if (src->type == IR2_SRC_SSA) {
124			struct ir2_instr *instr = &ctx->instr[src->num];
125
126			if (instr->type != IR2_ALU)
127				continue;
128
129			for (int i = 0; i < ncomp; i++)
130				c[i] = instr;
131
132			ins[num_instr++] = instr;
133			reswiz[0] = src->swizzle;
134		} else {
135			bool ok = true;
136			unsigned write_mask = 0;
137
138			ir2_foreach_instr(instr, ctx) {
139				if (instr->is_ssa || instr->reg != reg)
140					continue;
141
142				/* set by non-ALU */
143				if (instr->type != IR2_ALU) {
144					ok = false;
145					break;
146				}
147
148				/* component written more than once */
149				if (write_mask & instr->alu.write_mask) {
150					ok = false;
151					break;
152				}
153
154				write_mask |= instr->alu.write_mask;
155
156				/* src pointers for components */
157				for (int i = 0, j = 0; i < 4; i++) {
158					unsigned k = swiz_get(src->swizzle, i);
159					if (instr->alu.write_mask & 1 << k) {
160						c[i] = instr;
161
162						/* reswiz = compressed src->swizzle */
163						unsigned x = 0;
164						for (int i = 0; i < k; i++)
165							x += !!(instr->alu.write_mask & 1 << i);
166
167						assert(src->swizzle || x == j);
168						reswiz[num_instr] |= swiz_set(x, j++);
169					}
170				}
171				ins[num_instr++] = instr;
172			}
173			if (!ok)
174				continue;
175		}
176
177		bool redirect = true;
178
179		/* must all be in same block */
180		for (int i = 0; i < ncomp; i++)
181			redirect &= (c[i]->block_idx == instr->block_idx);
182
183		/* no other instr using the value */
184		ir2_foreach_instr(p, ctx) {
185			if (p == instr)
186				continue;
187			ir2_foreach_src(src, p)
188				redirect &= reg != get_reg_src(ctx, src);
189		}
190
191		if (!redirect)
192			continue;
193
194		/* redirect the instructions writing to the register */
195		for (int i = 0; i < num_instr; i++) {
196			struct ir2_instr *p = ins[i];
197
198			p->alu.export = instr->alu.export;
199			p->alu.write_mask = 0;
200			p->is_ssa = true;
201			p->ssa.ncomp = 0;
202			memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
203
204			switch (instr->alu.vector_opc) {
205			case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
206			case DOT2ADDv:
207			case DOT3v:
208			case DOT4v:
209			case CUBEv:
210				continue;
211			default:
212				break;
213			}
214			ir2_foreach_src(s, p)
215				swiz_merge_p(&s->swizzle, reswiz[i]);
216		}
217
218		for (int i = 0; i < ncomp; i++) {
219			c[i]->alu.write_mask |= (1 << i);
220			c[i]->ssa.ncomp++;
221		}
222		instr->type = IR2_NONE;
223		instr->need_emit = false;
224	}
225}
226