1b8e80941Smrg/* Author(s):
2b8e80941Smrg *  Alyssa Rosenzweig
3b8e80941Smrg *
4b8e80941Smrg * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
5b8e80941Smrg *
6b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a copy
7b8e80941Smrg * of this software and associated documentation files (the "Software"), to deal
8b8e80941Smrg * in the Software without restriction, including without limitation the rights
9b8e80941Smrg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10b8e80941Smrg * copies of the Software, and to permit persons to whom the Software is
11b8e80941Smrg * furnished to do so, subject to the following conditions:
12b8e80941Smrg *
13b8e80941Smrg * The above copyright notice and this permission notice shall be included in
14b8e80941Smrg * all copies or substantial portions of the Software.
15b8e80941Smrg *
16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19b8e80941Smrg * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22b8e80941Smrg * THE SOFTWARE.
23b8e80941Smrg */
24b8e80941Smrg
25b8e80941Smrg#define OP_IS_STORE_VARY(op) (\
26b8e80941Smrg		op == midgard_op_store_vary_16 || \
27b8e80941Smrg		op == midgard_op_store_vary_32 \
28b8e80941Smrg	)
29b8e80941Smrg
30b8e80941Smrg#define OP_IS_STORE(op) (\
31b8e80941Smrg                OP_IS_STORE_VARY(op) || \
32b8e80941Smrg                op == midgard_op_store_cubemap_coords \
33b8e80941Smrg	)
34b8e80941Smrg
35b8e80941Smrg#define OP_IS_MOVE(op) ( \
36b8e80941Smrg                op == midgard_alu_op_fmov || \
37b8e80941Smrg                op == midgard_alu_op_imov \
38b8e80941Smrg        )
39b8e80941Smrg
40b8e80941Smrg/* ALU control words are single bit fields with a lot of space */
41b8e80941Smrg
42b8e80941Smrg#define ALU_ENAB_VEC_MUL  (1 << 17)
43b8e80941Smrg#define ALU_ENAB_SCAL_ADD  (1 << 19)
44b8e80941Smrg#define ALU_ENAB_VEC_ADD  (1 << 21)
45b8e80941Smrg#define ALU_ENAB_SCAL_MUL  (1 << 23)
46b8e80941Smrg#define ALU_ENAB_VEC_LUT  (1 << 25)
47b8e80941Smrg#define ALU_ENAB_BR_COMPACT (1 << 26)
48b8e80941Smrg#define ALU_ENAB_BRANCH   (1 << 27)
49b8e80941Smrg
50b8e80941Smrg/* Other opcode properties that don't conflict with the ALU_ENABs, non-ISA */
51b8e80941Smrg
52b8e80941Smrg/* Denotes an opcode that takes a vector input with a fixed-number of
53b8e80941Smrg * channels, but outputs to only a single output channel, like dot products.
54b8e80941Smrg * For these, to determine the effective mask, this quirk can be set. We have
55b8e80941Smrg * an intentional off-by-one (a la MALI_POSITIVE), since 0-channel makes no
56b8e80941Smrg * sense but we need to fit 4 channels in 2-bits. Similarly, 1-channel doesn't
57b8e80941Smrg * make sense (since then why are we quirked?), so that corresponds to "no
58b8e80941Smrg * count set" */
59b8e80941Smrg
60b8e80941Smrg#define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
61b8e80941Smrg#define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
62b8e80941Smrg
63b8e80941Smrg/* For instructions that take a single argument, normally the first argument
64b8e80941Smrg * slot is used for the argument and the second slot is a dummy #0 constant.
65b8e80941Smrg * However, there are exceptions: instructions like fmov store their argument
66b8e80941Smrg * in the _second_ slot and store a dummy r24 in the first slot, designated by
67b8e80941Smrg * QUIRK_FLIPPED_R24 */
68b8e80941Smrg
69b8e80941Smrg#define QUIRK_FLIPPED_R24 (1 << 2)
70b8e80941Smrg
71b8e80941Smrg/* Is the op commutative? */
72b8e80941Smrg#define OP_COMMUTES (1 << 3)
73b8e80941Smrg
74b8e80941Smrg/* Vector-independant shorthands for the above; these numbers are arbitrary and
75b8e80941Smrg * not from the ISA. Convert to the above with unit_enum_to_midgard */
76b8e80941Smrg
77b8e80941Smrg#define UNIT_MUL 0
78b8e80941Smrg#define UNIT_ADD 1
79b8e80941Smrg#define UNIT_LUT 2
80b8e80941Smrg
81b8e80941Smrg/* 4-bit type tags */
82b8e80941Smrg
83b8e80941Smrg#define TAG_TEXTURE_4 0x3
84b8e80941Smrg#define TAG_LOAD_STORE_4 0x5
85b8e80941Smrg#define TAG_ALU_4 0x8
86b8e80941Smrg#define TAG_ALU_8 0x9
87b8e80941Smrg#define TAG_ALU_12 0xA
88b8e80941Smrg#define TAG_ALU_16 0xB
89b8e80941Smrg
90b8e80941Smrg/* Special register aliases */
91b8e80941Smrg
92b8e80941Smrg#define MAX_WORK_REGISTERS 16
93b8e80941Smrg
94b8e80941Smrg/* Uniforms are begin at (REGISTER_UNIFORMS - uniform_count) */
95b8e80941Smrg#define REGISTER_UNIFORMS 24
96b8e80941Smrg
97b8e80941Smrg#define REGISTER_UNUSED 24
98b8e80941Smrg#define REGISTER_CONSTANT 26
99b8e80941Smrg#define REGISTER_VARYING_BASE 26
100b8e80941Smrg#define REGISTER_OFFSET 27
101b8e80941Smrg#define REGISTER_TEXTURE_BASE 28
102b8e80941Smrg#define REGISTER_SELECT 31
103b8e80941Smrg
104b8e80941Smrg/* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline
105b8e80941Smrg * constant. UNUSED_1 encoded as REGISTER_UNUSED */
106b8e80941Smrg
107b8e80941Smrg#define SSA_UNUSED_0 0
108b8e80941Smrg#define SSA_UNUSED_1 -2
109b8e80941Smrg
110b8e80941Smrg#define SSA_FIXED_SHIFT 24
111b8e80941Smrg#define SSA_FIXED_REGISTER(reg) ((1 + reg) << SSA_FIXED_SHIFT)
112b8e80941Smrg#define SSA_REG_FROM_FIXED(reg) ((reg >> SSA_FIXED_SHIFT) - 1)
113b8e80941Smrg#define SSA_FIXED_MINIMUM SSA_FIXED_REGISTER(0)
114b8e80941Smrg
115b8e80941Smrg/* Swizzle support */
116b8e80941Smrg
117b8e80941Smrg#define SWIZZLE(A, B, C, D) ((D << 6) | (C << 4) | (B << 2) | (A << 0))
118b8e80941Smrg#define SWIZZLE_FROM_ARRAY(r) SWIZZLE(r[0], r[1], r[2], r[3])
119b8e80941Smrg#define COMPONENT_X 0x0
120b8e80941Smrg#define COMPONENT_Y 0x1
121b8e80941Smrg#define COMPONENT_Z 0x2
122b8e80941Smrg#define COMPONENT_W 0x3
123b8e80941Smrg
124b8e80941Smrg/* See ISA notes */
125b8e80941Smrg
126b8e80941Smrg#define LDST_NOP (3)
127b8e80941Smrg
128b8e80941Smrg/* There are five ALU units: VMUL, VADD, SMUL, SADD, LUT. A given opcode is
129b8e80941Smrg * implemented on some subset of these units (or occassionally all of them).
130b8e80941Smrg * This table encodes a bit mask of valid units for each opcode, so the
131b8e80941Smrg * scheduler can figure where to plonk the instruction. */
132b8e80941Smrg
133b8e80941Smrg/* Shorthands for each unit */
134b8e80941Smrg#define UNIT_VMUL ALU_ENAB_VEC_MUL
135b8e80941Smrg#define UNIT_SADD ALU_ENAB_SCAL_ADD
136b8e80941Smrg#define UNIT_VADD ALU_ENAB_VEC_ADD
137b8e80941Smrg#define UNIT_SMUL ALU_ENAB_SCAL_MUL
138b8e80941Smrg#define UNIT_VLUT ALU_ENAB_VEC_LUT
139b8e80941Smrg
140b8e80941Smrg/* Shorthands for usual combinations of units */
141b8e80941Smrg
142b8e80941Smrg#define UNITS_MUL (UNIT_VMUL | UNIT_SMUL)
143b8e80941Smrg#define UNITS_ADD (UNIT_VADD | UNIT_SADD)
144b8e80941Smrg#define UNITS_MOST (UNITS_MUL | UNITS_ADD)
145b8e80941Smrg#define UNITS_ALL (UNITS_MOST | UNIT_VLUT)
146b8e80941Smrg#define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL)
147b8e80941Smrg#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
148b8e80941Smrg#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
149b8e80941Smrg
150b8e80941Smrg/* Table of mapping opcodes to accompanying properties relevant to
151b8e80941Smrg * scheduling/emission/etc */
152b8e80941Smrg
153b8e80941Smrgstatic struct {
154b8e80941Smrg        const char *name;
155b8e80941Smrg        unsigned props;
156b8e80941Smrg} alu_opcode_props[256] = {
157b8e80941Smrg        [midgard_alu_op_fadd]		 = {"fadd", UNITS_ADD | OP_COMMUTES},
158b8e80941Smrg        [midgard_alu_op_fmul]		 = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
159b8e80941Smrg        [midgard_alu_op_fmin]		 = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
160b8e80941Smrg        [midgard_alu_op_fmax]		 = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
161b8e80941Smrg        [midgard_alu_op_imin]		 = {"imin", UNITS_MOST | OP_COMMUTES},
162b8e80941Smrg        [midgard_alu_op_imax]		 = {"imax", UNITS_MOST | OP_COMMUTES},
163b8e80941Smrg        [midgard_alu_op_umin]		 = {"umin", UNITS_MOST | OP_COMMUTES},
164b8e80941Smrg        [midgard_alu_op_umax]		 = {"umax", UNITS_MOST | OP_COMMUTES},
165b8e80941Smrg        [midgard_alu_op_fmov]		 = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
166b8e80941Smrg        [midgard_alu_op_fround]          = {"fround", UNITS_ADD},
167b8e80941Smrg        [midgard_alu_op_froundeven]      = {"froundeven", UNITS_ADD},
168b8e80941Smrg        [midgard_alu_op_ftrunc]          = {"ftrunc", UNITS_ADD},
169b8e80941Smrg        [midgard_alu_op_ffloor]		 = {"ffloor", UNITS_ADD},
170b8e80941Smrg        [midgard_alu_op_fceil]		 = {"fceil", UNITS_ADD},
171b8e80941Smrg        [midgard_alu_op_ffma]		 = {"ffma", UNIT_VLUT},
172b8e80941Smrg
173b8e80941Smrg        /* Though they output a scalar, they need to run on a vector unit
174b8e80941Smrg         * since they process vectors */
175b8e80941Smrg        [midgard_alu_op_fdot3]		 = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
176b8e80941Smrg        [midgard_alu_op_fdot3r]		 = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
177b8e80941Smrg        [midgard_alu_op_fdot4]		 = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
178b8e80941Smrg
179b8e80941Smrg        /* Incredibly, iadd can run on vmul, etc */
180b8e80941Smrg        [midgard_alu_op_iadd]		 = {"iadd", UNITS_MOST | OP_COMMUTES},
181b8e80941Smrg        [midgard_alu_op_iabs]		 = {"iabs", UNITS_ADD},
182b8e80941Smrg        [midgard_alu_op_isub]		 = {"isub", UNITS_MOST},
183b8e80941Smrg        [midgard_alu_op_imul]		 = {"imul", UNITS_MUL | OP_COMMUTES},
184b8e80941Smrg        [midgard_alu_op_imov]		 = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
185b8e80941Smrg
186b8e80941Smrg        /* For vector comparisons, use ball etc */
187b8e80941Smrg        [midgard_alu_op_feq]		 = {"feq", UNITS_MOST | OP_COMMUTES},
188b8e80941Smrg        [midgard_alu_op_fne]		 = {"fne", UNITS_MOST | OP_COMMUTES},
189b8e80941Smrg        [midgard_alu_op_fle]		 = {"fle", UNITS_MOST},
190b8e80941Smrg        [midgard_alu_op_flt]		 = {"flt", UNITS_MOST},
191b8e80941Smrg        [midgard_alu_op_ieq]		 = {"ieq", UNITS_MOST | OP_COMMUTES},
192b8e80941Smrg        [midgard_alu_op_ine]		 = {"ine", UNITS_MOST | OP_COMMUTES},
193b8e80941Smrg        [midgard_alu_op_ilt]		 = {"ilt", UNITS_MOST},
194b8e80941Smrg        [midgard_alu_op_ile]		 = {"ile", UNITS_MOST},
195b8e80941Smrg        [midgard_alu_op_ult]		 = {"ult", UNITS_MOST},
196b8e80941Smrg        [midgard_alu_op_ule]		 = {"ule", UNITS_MOST},
197b8e80941Smrg
198b8e80941Smrg        [midgard_alu_op_icsel]		 = {"icsel", UNITS_ADD},
199b8e80941Smrg        [midgard_alu_op_fcsel_i]	 = {"fcsel_i", UNITS_ADD},
200b8e80941Smrg        [midgard_alu_op_fcsel]		 = {"fcsel", UNITS_ADD | UNIT_SMUL},
201b8e80941Smrg
202b8e80941Smrg        [midgard_alu_op_frcp]		 = {"frcp", UNIT_VLUT},
203b8e80941Smrg        [midgard_alu_op_frsqrt]		 = {"frsqrt", UNIT_VLUT},
204b8e80941Smrg        [midgard_alu_op_fsqrt]		 = {"fsqrt", UNIT_VLUT},
205b8e80941Smrg        [midgard_alu_op_fpow_pt1]	 = {"fpow_pt1", UNIT_VLUT},
206b8e80941Smrg        [midgard_alu_op_fexp2]		 = {"fexp2", UNIT_VLUT},
207b8e80941Smrg        [midgard_alu_op_flog2]		 = {"flog2", UNIT_VLUT},
208b8e80941Smrg
209b8e80941Smrg        [midgard_alu_op_f2i]		 = {"f2i", UNITS_ADD},
210b8e80941Smrg        [midgard_alu_op_f2u]		 = {"f2u", UNITS_ADD},
211b8e80941Smrg        [midgard_alu_op_f2u8]		 = {"f2u8", UNITS_ADD},
212b8e80941Smrg        [midgard_alu_op_i2f]		 = {"i2f", UNITS_ADD},
213b8e80941Smrg        [midgard_alu_op_u2f]		 = {"u2f", UNITS_ADD},
214b8e80941Smrg
215b8e80941Smrg        [midgard_alu_op_fsin]		 = {"fsin", UNIT_VLUT},
216b8e80941Smrg        [midgard_alu_op_fcos]		 = {"fcos", UNIT_VLUT},
217b8e80941Smrg
218b8e80941Smrg        /* XXX: Test case where it's right on smul but not sadd */
219b8e80941Smrg        [midgard_alu_op_iand]		 = {"iand", UNITS_MOST | OP_COMMUTES},
220b8e80941Smrg        [midgard_alu_op_iandnot]         = {"iandnot", UNITS_MOST},
221b8e80941Smrg
222b8e80941Smrg        [midgard_alu_op_ior]		 = {"ior", UNITS_MOST | OP_COMMUTES},
223b8e80941Smrg        [midgard_alu_op_iornot]		 = {"iornot", UNITS_MOST | OP_COMMUTES},
224b8e80941Smrg        [midgard_alu_op_inor]		 = {"inor", UNITS_MOST | OP_COMMUTES},
225b8e80941Smrg        [midgard_alu_op_ixor]		 = {"ixor", UNITS_MOST | OP_COMMUTES},
226b8e80941Smrg        [midgard_alu_op_inxor]		 = {"inxor", UNITS_MOST | OP_COMMUTES},
227b8e80941Smrg        [midgard_alu_op_iclz]		 = {"iclz", UNITS_ADD},
228b8e80941Smrg        [midgard_alu_op_ibitcount8]	 = {"ibitcount8", UNITS_ADD},
229b8e80941Smrg        [midgard_alu_op_inand]		 = {"inand", UNITS_MOST},
230b8e80941Smrg        [midgard_alu_op_ishl]		 = {"ishl", UNITS_ADD},
231b8e80941Smrg        [midgard_alu_op_iasr]		 = {"iasr", UNITS_ADD},
232b8e80941Smrg        [midgard_alu_op_ilsr]		 = {"ilsr", UNITS_ADD},
233b8e80941Smrg
234b8e80941Smrg        [midgard_alu_op_fball_eq]	 = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
235b8e80941Smrg        [midgard_alu_op_fbany_neq]	 = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
236b8e80941Smrg        [midgard_alu_op_iball_eq]	 = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
237b8e80941Smrg        [midgard_alu_op_iball_neq]	 = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
238b8e80941Smrg        [midgard_alu_op_ibany_eq]	 = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
239b8e80941Smrg        [midgard_alu_op_ibany_neq]	 = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
240b8e80941Smrg
241b8e80941Smrg        /* These instructions are not yet emitted by the compiler, so
242b8e80941Smrg         * don't speculate about units yet */
243b8e80941Smrg        [midgard_alu_op_ishladd]        = {"ishladd", 0},
244b8e80941Smrg
245b8e80941Smrg        [midgard_alu_op_uball_lt]       = {"uball_lt", 0},
246b8e80941Smrg        [midgard_alu_op_uball_lte]      = {"uball_lte", 0},
247b8e80941Smrg        [midgard_alu_op_iball_lt]       = {"iball_lt", 0},
248b8e80941Smrg        [midgard_alu_op_iball_lte]      = {"iball_lte", 0},
249b8e80941Smrg        [midgard_alu_op_ubany_lt]       = {"ubany_lt", 0},
250b8e80941Smrg        [midgard_alu_op_ubany_lte]      = {"ubany_lte", 0},
251b8e80941Smrg        [midgard_alu_op_ibany_lt]       = {"ibany_lt", 0},
252b8e80941Smrg        [midgard_alu_op_ibany_lte]      = {"ibany_lte", 0},
253b8e80941Smrg
254b8e80941Smrg        [midgard_alu_op_freduce]        = {"freduce", 0},
255b8e80941Smrg        [midgard_alu_op_bball_eq]       = {"bball_eq", 0 | OP_COMMUTES},
256b8e80941Smrg        [midgard_alu_op_bbany_neq]      = {"bball_eq", 0 | OP_COMMUTES},
257b8e80941Smrg        [midgard_alu_op_fatan2_pt1]     = {"fatan2_pt1", 0},
258b8e80941Smrg        [midgard_alu_op_fatan_pt2]      = {"fatan_pt2", 0},
259b8e80941Smrg};
260b8e80941Smrg
261b8e80941Smrg/* Is this opcode that of an integer (regardless of signedness)? Instruction
262b8e80941Smrg * names authoritatively determine types */
263b8e80941Smrg
264b8e80941Smrgstatic bool
265b8e80941Smrgmidgard_is_integer_op(int op)
266b8e80941Smrg{
267b8e80941Smrg        const char *name = alu_opcode_props[op].name;
268b8e80941Smrg
269b8e80941Smrg        if (!name)
270b8e80941Smrg                return false;
271b8e80941Smrg
272b8e80941Smrg        return (name[0] == 'i') || (name[0] == 'u');
273b8e80941Smrg}
274