1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2016 Broadcom
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include <string.h>
25b8e80941Smrg#include "util/macros.h"
26b8e80941Smrg
27b8e80941Smrg#include "broadcom/common/v3d_device_info.h"
28b8e80941Smrg#include "qpu_instr.h"
29b8e80941Smrg
30b8e80941Smrg#ifndef QPU_MASK
31b8e80941Smrg#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32b8e80941Smrg/* Using the GNU statement expression extension */
33b8e80941Smrg#define QPU_SET_FIELD(value, field)                                       \
34b8e80941Smrg        ({                                                                \
35b8e80941Smrg                uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36b8e80941Smrg                assert((fieldval & ~ field ## _MASK) == 0);               \
37b8e80941Smrg                fieldval & field ## _MASK;                                \
38b8e80941Smrg         })
39b8e80941Smrg
40b8e80941Smrg#define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
41b8e80941Smrg
42b8e80941Smrg#define QPU_UPDATE_FIELD(inst, value, field)                              \
43b8e80941Smrg        (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44b8e80941Smrg#endif /* QPU_MASK */
45b8e80941Smrg
46b8e80941Smrg#define VC5_QPU_OP_MUL_SHIFT                58
47b8e80941Smrg#define VC5_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
48b8e80941Smrg
49b8e80941Smrg#define VC5_QPU_SIG_SHIFT                   53
50b8e80941Smrg#define VC5_QPU_SIG_MASK                    QPU_MASK(57, 53)
51b8e80941Smrg
52b8e80941Smrg#define VC5_QPU_COND_SHIFT                  46
53b8e80941Smrg#define VC5_QPU_COND_MASK                   QPU_MASK(52, 46)
54b8e80941Smrg#define VC5_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
55b8e80941Smrg
56b8e80941Smrg#define VC5_QPU_MM                          QPU_MASK(45, 45)
57b8e80941Smrg#define VC5_QPU_MA                          QPU_MASK(44, 44)
58b8e80941Smrg
59b8e80941Smrg#define V3D_QPU_WADDR_M_SHIFT               38
60b8e80941Smrg#define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
61b8e80941Smrg
62b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT       35
63b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
64b8e80941Smrg
65b8e80941Smrg#define V3D_QPU_WADDR_A_SHIFT               32
66b8e80941Smrg#define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
67b8e80941Smrg
68b8e80941Smrg#define VC5_QPU_BRANCH_COND_SHIFT           32
69b8e80941Smrg#define VC5_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
70b8e80941Smrg
71b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT      24
72b8e80941Smrg#define VC5_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
73b8e80941Smrg
74b8e80941Smrg#define VC5_QPU_OP_ADD_SHIFT                24
75b8e80941Smrg#define VC5_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
76b8e80941Smrg
77b8e80941Smrg#define VC5_QPU_MUL_B_SHIFT                 21
78b8e80941Smrg#define VC5_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
79b8e80941Smrg
80b8e80941Smrg#define VC5_QPU_BRANCH_MSFIGN_SHIFT         21
81b8e80941Smrg#define VC5_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
82b8e80941Smrg
83b8e80941Smrg#define VC5_QPU_MUL_A_SHIFT                 18
84b8e80941Smrg#define VC5_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
85b8e80941Smrg
86b8e80941Smrg#define VC5_QPU_ADD_B_SHIFT                 15
87b8e80941Smrg#define VC5_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
88b8e80941Smrg
89b8e80941Smrg#define VC5_QPU_BRANCH_BDU_SHIFT            15
90b8e80941Smrg#define VC5_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
91b8e80941Smrg
92b8e80941Smrg#define VC5_QPU_BRANCH_UB                   QPU_MASK(14, 14)
93b8e80941Smrg
94b8e80941Smrg#define VC5_QPU_ADD_A_SHIFT                 12
95b8e80941Smrg#define VC5_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
96b8e80941Smrg
97b8e80941Smrg#define VC5_QPU_BRANCH_BDI_SHIFT            12
98b8e80941Smrg#define VC5_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
99b8e80941Smrg
100b8e80941Smrg#define VC5_QPU_RADDR_A_SHIFT               6
101b8e80941Smrg#define VC5_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
102b8e80941Smrg
103b8e80941Smrg#define VC5_QPU_RADDR_B_SHIFT               0
104b8e80941Smrg#define VC5_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
105b8e80941Smrg
106b8e80941Smrg#define THRSW .thrsw = true
107b8e80941Smrg#define LDUNIF .ldunif = true
108b8e80941Smrg#define LDUNIFRF .ldunifrf = true
109b8e80941Smrg#define LDUNIFA .ldunifa = true
110b8e80941Smrg#define LDUNIFARF .ldunifarf = true
111b8e80941Smrg#define LDTMU .ldtmu = true
112b8e80941Smrg#define LDVARY .ldvary = true
113b8e80941Smrg#define LDVPM .ldvpm = true
114b8e80941Smrg#define SMIMM .small_imm = true
115b8e80941Smrg#define LDTLB .ldtlb = true
116b8e80941Smrg#define LDTLBU .ldtlbu = true
117b8e80941Smrg#define UCB .ucb = true
118b8e80941Smrg#define ROT .rotate = true
119b8e80941Smrg#define WRTMUC .wrtmuc = true
120b8e80941Smrg
121b8e80941Smrgstatic const struct v3d_qpu_sig v33_sig_map[] = {
122b8e80941Smrg        /*      MISC   R3       R4      R5 */
123b8e80941Smrg        [0]  = {                               },
124b8e80941Smrg        [1]  = { THRSW,                        },
125b8e80941Smrg        [2]  = {                        LDUNIF },
126b8e80941Smrg        [3]  = { THRSW,                 LDUNIF },
127b8e80941Smrg        [4]  = {                LDTMU,         },
128b8e80941Smrg        [5]  = { THRSW,         LDTMU,         },
129b8e80941Smrg        [6]  = {                LDTMU,  LDUNIF },
130b8e80941Smrg        [7]  = { THRSW,         LDTMU,  LDUNIF },
131b8e80941Smrg        [8]  = {        LDVARY,                },
132b8e80941Smrg        [9]  = { THRSW, LDVARY,                },
133b8e80941Smrg        [10] = {        LDVARY,         LDUNIF },
134b8e80941Smrg        [11] = { THRSW, LDVARY,         LDUNIF },
135b8e80941Smrg        [12] = {        LDVARY, LDTMU,         },
136b8e80941Smrg        [13] = { THRSW, LDVARY, LDTMU,         },
137b8e80941Smrg        [14] = { SMIMM, LDVARY,                },
138b8e80941Smrg        [15] = { SMIMM,                        },
139b8e80941Smrg        [16] = {        LDTLB,                 },
140b8e80941Smrg        [17] = {        LDTLBU,                },
141b8e80941Smrg        /* 18-21 reserved */
142b8e80941Smrg        [22] = { UCB,                          },
143b8e80941Smrg        [23] = { ROT,                          },
144b8e80941Smrg        [24] = {        LDVPM,                 },
145b8e80941Smrg        [25] = { THRSW, LDVPM,                 },
146b8e80941Smrg        [26] = {        LDVPM,          LDUNIF },
147b8e80941Smrg        [27] = { THRSW, LDVPM,          LDUNIF },
148b8e80941Smrg        [28] = {        LDVPM, LDTMU,          },
149b8e80941Smrg        [29] = { THRSW, LDVPM, LDTMU,          },
150b8e80941Smrg        [30] = { SMIMM, LDVPM,                 },
151b8e80941Smrg        [31] = { SMIMM,                        },
152b8e80941Smrg};
153b8e80941Smrg
154b8e80941Smrgstatic const struct v3d_qpu_sig v40_sig_map[] = {
155b8e80941Smrg        /*      MISC    R3      R4      R5 */
156b8e80941Smrg        [0]  = {                               },
157b8e80941Smrg        [1]  = { THRSW,                        },
158b8e80941Smrg        [2]  = {                        LDUNIF },
159b8e80941Smrg        [3]  = { THRSW,                 LDUNIF },
160b8e80941Smrg        [4]  = {                LDTMU,         },
161b8e80941Smrg        [5]  = { THRSW,         LDTMU,         },
162b8e80941Smrg        [6]  = {                LDTMU,  LDUNIF },
163b8e80941Smrg        [7]  = { THRSW,         LDTMU,  LDUNIF },
164b8e80941Smrg        [8]  = {        LDVARY,                },
165b8e80941Smrg        [9]  = { THRSW, LDVARY,                },
166b8e80941Smrg        [10] = {        LDVARY,         LDUNIF },
167b8e80941Smrg        [11] = { THRSW, LDVARY,         LDUNIF },
168b8e80941Smrg        /* 12-13 reserved */
169b8e80941Smrg        [14] = { SMIMM, LDVARY,                },
170b8e80941Smrg        [15] = { SMIMM,                        },
171b8e80941Smrg        [16] = {        LDTLB,                 },
172b8e80941Smrg        [17] = {        LDTLBU,                },
173b8e80941Smrg        [18] = {                        WRTMUC },
174b8e80941Smrg        [19] = { THRSW,                 WRTMUC },
175b8e80941Smrg        [20] = {        LDVARY,         WRTMUC },
176b8e80941Smrg        [21] = { THRSW, LDVARY,         WRTMUC },
177b8e80941Smrg        [22] = { UCB,                          },
178b8e80941Smrg        [23] = { ROT,                          },
179b8e80941Smrg        /* 24-30 reserved */
180b8e80941Smrg        [31] = { SMIMM,         LDTMU,         },
181b8e80941Smrg};
182b8e80941Smrg
183b8e80941Smrgstatic const struct v3d_qpu_sig v41_sig_map[] = {
184b8e80941Smrg        /*      MISC       phys    R5 */
185b8e80941Smrg        [0]  = {                          },
186b8e80941Smrg        [1]  = { THRSW,                   },
187b8e80941Smrg        [2]  = {                   LDUNIF },
188b8e80941Smrg        [3]  = { THRSW,            LDUNIF },
189b8e80941Smrg        [4]  = {           LDTMU,         },
190b8e80941Smrg        [5]  = { THRSW,    LDTMU,         },
191b8e80941Smrg        [6]  = {           LDTMU,  LDUNIF },
192b8e80941Smrg        [7]  = { THRSW,    LDTMU,  LDUNIF },
193b8e80941Smrg        [8]  = {           LDVARY,        },
194b8e80941Smrg        [9]  = { THRSW,    LDVARY,        },
195b8e80941Smrg        [10] = {           LDVARY, LDUNIF },
196b8e80941Smrg        [11] = { THRSW,    LDVARY, LDUNIF },
197b8e80941Smrg        [12] = { LDUNIFRF                 },
198b8e80941Smrg        [13] = { THRSW,    LDUNIFRF       },
199b8e80941Smrg        [14] = { SMIMM,    LDVARY,        },
200b8e80941Smrg        [15] = { SMIMM,                   },
201b8e80941Smrg        [16] = {           LDTLB,         },
202b8e80941Smrg        [17] = {           LDTLBU,        },
203b8e80941Smrg        [18] = {                          WRTMUC },
204b8e80941Smrg        [19] = { THRSW,                   WRTMUC },
205b8e80941Smrg        [20] = {           LDVARY,        WRTMUC },
206b8e80941Smrg        [21] = { THRSW,    LDVARY,        WRTMUC },
207b8e80941Smrg        [22] = { UCB,                     },
208b8e80941Smrg        [23] = { ROT,                     },
209b8e80941Smrg        /* 24-30 reserved */
210b8e80941Smrg        [24] = {                   LDUNIFA},
211b8e80941Smrg        [25] = { LDUNIFARF                },
212b8e80941Smrg        [31] = { SMIMM,            LDTMU, },
213b8e80941Smrg};
214b8e80941Smrg
215b8e80941Smrgbool
216b8e80941Smrgv3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
217b8e80941Smrg                   uint32_t packed_sig,
218b8e80941Smrg                   struct v3d_qpu_sig *sig)
219b8e80941Smrg{
220b8e80941Smrg        if (packed_sig >= ARRAY_SIZE(v33_sig_map))
221b8e80941Smrg                return false;
222b8e80941Smrg
223b8e80941Smrg        if (devinfo->ver >= 41)
224b8e80941Smrg                *sig = v41_sig_map[packed_sig];
225b8e80941Smrg        else if (devinfo->ver == 40)
226b8e80941Smrg                *sig = v40_sig_map[packed_sig];
227b8e80941Smrg        else
228b8e80941Smrg                *sig = v33_sig_map[packed_sig];
229b8e80941Smrg
230b8e80941Smrg        /* Signals with zeroed unpacked contents after element 0 are reserved. */
231b8e80941Smrg        return (packed_sig == 0 ||
232b8e80941Smrg                memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
233b8e80941Smrg}
234b8e80941Smrg
235b8e80941Smrgbool
236b8e80941Smrgv3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
237b8e80941Smrg                 const struct v3d_qpu_sig *sig,
238b8e80941Smrg                 uint32_t *packed_sig)
239b8e80941Smrg{
240b8e80941Smrg        static const struct v3d_qpu_sig *map;
241b8e80941Smrg
242b8e80941Smrg        if (devinfo->ver >= 41)
243b8e80941Smrg                map = v41_sig_map;
244b8e80941Smrg        else if (devinfo->ver == 40)
245b8e80941Smrg                map = v40_sig_map;
246b8e80941Smrg        else
247b8e80941Smrg                map = v33_sig_map;
248b8e80941Smrg
249b8e80941Smrg        for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
250b8e80941Smrg                if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
251b8e80941Smrg                        *packed_sig = i;
252b8e80941Smrg                        return true;
253b8e80941Smrg                }
254b8e80941Smrg        }
255b8e80941Smrg
256b8e80941Smrg        return false;
257b8e80941Smrg}
258b8e80941Smrgstatic inline unsigned
259b8e80941Smrgfui( float f )
260b8e80941Smrg{
261b8e80941Smrg        union {float f; unsigned ui;} fi;
262b8e80941Smrg   fi.f = f;
263b8e80941Smrg   return fi.ui;
264b8e80941Smrg}
265b8e80941Smrg
266b8e80941Smrgstatic const uint32_t small_immediates[] = {
267b8e80941Smrg        0, 1, 2, 3,
268b8e80941Smrg        4, 5, 6, 7,
269b8e80941Smrg        8, 9, 10, 11,
270b8e80941Smrg        12, 13, 14, 15,
271b8e80941Smrg        -16, -15, -14, -13,
272b8e80941Smrg        -12, -11, -10, -9,
273b8e80941Smrg        -8, -7, -6, -5,
274b8e80941Smrg        -4, -3, -2, -1,
275b8e80941Smrg        0x3b800000, /* 2.0^-8 */
276b8e80941Smrg        0x3c000000, /* 2.0^-7 */
277b8e80941Smrg        0x3c800000, /* 2.0^-6 */
278b8e80941Smrg        0x3d000000, /* 2.0^-5 */
279b8e80941Smrg        0x3d800000, /* 2.0^-4 */
280b8e80941Smrg        0x3e000000, /* 2.0^-3 */
281b8e80941Smrg        0x3e800000, /* 2.0^-2 */
282b8e80941Smrg        0x3f000000, /* 2.0^-1 */
283b8e80941Smrg        0x3f800000, /* 2.0^0 */
284b8e80941Smrg        0x40000000, /* 2.0^1 */
285b8e80941Smrg        0x40800000, /* 2.0^2 */
286b8e80941Smrg        0x41000000, /* 2.0^3 */
287b8e80941Smrg        0x41800000, /* 2.0^4 */
288b8e80941Smrg        0x42000000, /* 2.0^5 */
289b8e80941Smrg        0x42800000, /* 2.0^6 */
290b8e80941Smrg        0x43000000, /* 2.0^7 */
291b8e80941Smrg};
292b8e80941Smrg
293b8e80941Smrgbool
294b8e80941Smrgv3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
295b8e80941Smrg                         uint32_t packed_small_immediate,
296b8e80941Smrg                         uint32_t *small_immediate)
297b8e80941Smrg{
298b8e80941Smrg        if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
299b8e80941Smrg                return false;
300b8e80941Smrg
301b8e80941Smrg        *small_immediate = small_immediates[packed_small_immediate];
302b8e80941Smrg        return true;
303b8e80941Smrg}
304b8e80941Smrg
305b8e80941Smrgbool
306b8e80941Smrgv3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
307b8e80941Smrg                       uint32_t value,
308b8e80941Smrg                       uint32_t *packed_small_immediate)
309b8e80941Smrg{
310b8e80941Smrg        STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
311b8e80941Smrg
312b8e80941Smrg        for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
313b8e80941Smrg                if (small_immediates[i] == value) {
314b8e80941Smrg                        *packed_small_immediate = i;
315b8e80941Smrg                        return true;
316b8e80941Smrg                }
317b8e80941Smrg        }
318b8e80941Smrg
319b8e80941Smrg        return false;
320b8e80941Smrg}
321b8e80941Smrg
322b8e80941Smrgbool
323b8e80941Smrgv3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
324b8e80941Smrg                     uint32_t packed_cond,
325b8e80941Smrg                     struct v3d_qpu_flags *cond)
326b8e80941Smrg{
327b8e80941Smrg        static const enum v3d_qpu_cond cond_map[4] = {
328b8e80941Smrg                [0] = V3D_QPU_COND_IFA,
329b8e80941Smrg                [1] = V3D_QPU_COND_IFB,
330b8e80941Smrg                [2] = V3D_QPU_COND_IFNA,
331b8e80941Smrg                [3] = V3D_QPU_COND_IFNB,
332b8e80941Smrg        };
333b8e80941Smrg
334b8e80941Smrg        cond->ac = V3D_QPU_COND_NONE;
335b8e80941Smrg        cond->mc = V3D_QPU_COND_NONE;
336b8e80941Smrg        cond->apf = V3D_QPU_PF_NONE;
337b8e80941Smrg        cond->mpf = V3D_QPU_PF_NONE;
338b8e80941Smrg        cond->auf = V3D_QPU_UF_NONE;
339b8e80941Smrg        cond->muf = V3D_QPU_UF_NONE;
340b8e80941Smrg
341b8e80941Smrg        if (packed_cond == 0) {
342b8e80941Smrg                return true;
343b8e80941Smrg        } else if (packed_cond >> 2 == 0) {
344b8e80941Smrg                cond->apf = packed_cond & 0x3;
345b8e80941Smrg        } else if (packed_cond >> 4 == 0) {
346b8e80941Smrg                cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347b8e80941Smrg        } else if (packed_cond == 0x10) {
348b8e80941Smrg                return false;
349b8e80941Smrg        } else if (packed_cond >> 2 == 0x4) {
350b8e80941Smrg                cond->mpf = packed_cond & 0x3;
351b8e80941Smrg        } else if (packed_cond >> 4 == 0x1) {
352b8e80941Smrg                cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
353b8e80941Smrg        } else if (packed_cond >> 4 == 0x2) {
354b8e80941Smrg                cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
355b8e80941Smrg                cond->mpf = packed_cond & 0x3;
356b8e80941Smrg        } else if (packed_cond >> 4 == 0x3) {
357b8e80941Smrg                cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
358b8e80941Smrg                cond->apf = packed_cond & 0x3;
359b8e80941Smrg        } else if (packed_cond >> 6) {
360b8e80941Smrg                cond->mc = cond_map[(packed_cond >> 4) & 0x3];
361b8e80941Smrg                if (((packed_cond >> 2) & 0x3) == 0) {
362b8e80941Smrg                        cond->ac = cond_map[packed_cond & 0x3];
363b8e80941Smrg                } else {
364b8e80941Smrg                        cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
365b8e80941Smrg                }
366b8e80941Smrg        }
367b8e80941Smrg
368b8e80941Smrg        return true;
369b8e80941Smrg}
370b8e80941Smrg
371b8e80941Smrgbool
372b8e80941Smrgv3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
373b8e80941Smrg                   const struct v3d_qpu_flags *cond,
374b8e80941Smrg                   uint32_t *packed_cond)
375b8e80941Smrg{
376b8e80941Smrg#define AC (1 << 0)
377b8e80941Smrg#define MC (1 << 1)
378b8e80941Smrg#define APF (1 << 2)
379b8e80941Smrg#define MPF (1 << 3)
380b8e80941Smrg#define AUF (1 << 4)
381b8e80941Smrg#define MUF (1 << 5)
382b8e80941Smrg        static const struct {
383b8e80941Smrg                uint8_t flags_present;
384b8e80941Smrg                uint8_t bits;
385b8e80941Smrg        } flags_table[] = {
386b8e80941Smrg                { 0,        0 },
387b8e80941Smrg                { APF,      0 },
388b8e80941Smrg                { AUF,      0 },
389b8e80941Smrg                { MPF,      (1 << 4) },
390b8e80941Smrg                { MUF,      (1 << 4) },
391b8e80941Smrg                { AC,       (1 << 5) },
392b8e80941Smrg                { AC | MPF, (1 << 5) },
393b8e80941Smrg                { MC,       (1 << 5) | (1 << 4) },
394b8e80941Smrg                { MC | APF, (1 << 5) | (1 << 4) },
395b8e80941Smrg                { MC | AC,  (1 << 6) },
396b8e80941Smrg                { MC | AUF, (1 << 6) },
397b8e80941Smrg        };
398b8e80941Smrg
399b8e80941Smrg        uint8_t flags_present = 0;
400b8e80941Smrg        if (cond->ac != V3D_QPU_COND_NONE)
401b8e80941Smrg                flags_present |= AC;
402b8e80941Smrg        if (cond->mc != V3D_QPU_COND_NONE)
403b8e80941Smrg                flags_present |= MC;
404b8e80941Smrg        if (cond->apf != V3D_QPU_PF_NONE)
405b8e80941Smrg                flags_present |= APF;
406b8e80941Smrg        if (cond->mpf != V3D_QPU_PF_NONE)
407b8e80941Smrg                flags_present |= MPF;
408b8e80941Smrg        if (cond->auf != V3D_QPU_UF_NONE)
409b8e80941Smrg                flags_present |= AUF;
410b8e80941Smrg        if (cond->muf != V3D_QPU_UF_NONE)
411b8e80941Smrg                flags_present |= MUF;
412b8e80941Smrg
413b8e80941Smrg        for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
414b8e80941Smrg                if (flags_table[i].flags_present != flags_present)
415b8e80941Smrg                        continue;
416b8e80941Smrg
417b8e80941Smrg                *packed_cond = flags_table[i].bits;
418b8e80941Smrg
419b8e80941Smrg                *packed_cond |= cond->apf;
420b8e80941Smrg                *packed_cond |= cond->mpf;
421b8e80941Smrg
422b8e80941Smrg                if (flags_present & AUF)
423b8e80941Smrg                        *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
424b8e80941Smrg                if (flags_present & MUF)
425b8e80941Smrg                        *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
426b8e80941Smrg
427b8e80941Smrg                if (flags_present & AC)
428b8e80941Smrg                        *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
429b8e80941Smrg
430b8e80941Smrg                if (flags_present & MC) {
431b8e80941Smrg                        if (*packed_cond & (1 << 6))
432b8e80941Smrg                                *packed_cond |= (cond->mc -
433b8e80941Smrg                                                 V3D_QPU_COND_IFA) << 4;
434b8e80941Smrg                        else
435b8e80941Smrg                                *packed_cond |= (cond->mc -
436b8e80941Smrg                                                 V3D_QPU_COND_IFA) << 2;
437b8e80941Smrg                }
438b8e80941Smrg
439b8e80941Smrg                return true;
440b8e80941Smrg        }
441b8e80941Smrg
442b8e80941Smrg        return false;
443b8e80941Smrg}
444b8e80941Smrg
445b8e80941Smrg/* Make a mapping of the table of opcodes in the spec.  The opcode is
446b8e80941Smrg * determined by a combination of the opcode field, and in the case of 0 or
447b8e80941Smrg * 1-arg opcodes, the mux_b field as well.
448b8e80941Smrg */
449b8e80941Smrg#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
450b8e80941Smrg#define ANYMUX MUX_MASK(0, 7)
451b8e80941Smrg
452b8e80941Smrgstruct opcode_desc {
453b8e80941Smrg        uint8_t opcode_first;
454b8e80941Smrg        uint8_t opcode_last;
455b8e80941Smrg        uint8_t mux_b_mask;
456b8e80941Smrg        uint8_t mux_a_mask;
457b8e80941Smrg        uint8_t op;
458b8e80941Smrg        /* 0 if it's the same across V3D versions, or a specific V3D version. */
459b8e80941Smrg        uint8_t ver;
460b8e80941Smrg};
461b8e80941Smrg
462b8e80941Smrgstatic const struct opcode_desc add_ops[] = {
463b8e80941Smrg        /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
464b8e80941Smrg        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
465b8e80941Smrg        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
466b8e80941Smrg        { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
467b8e80941Smrg        { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
468b8e80941Smrg        { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
469b8e80941Smrg        { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
470b8e80941Smrg        { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
471b8e80941Smrg        { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
472b8e80941Smrg        { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
473b8e80941Smrg        { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
474b8e80941Smrg        { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
475b8e80941Smrg        { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
476b8e80941Smrg        { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
477b8e80941Smrg        { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
478b8e80941Smrg        { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
479b8e80941Smrg        { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
480b8e80941Smrg        /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
481b8e80941Smrg        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
482b8e80941Smrg        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
483b8e80941Smrg        { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
484b8e80941Smrg
485b8e80941Smrg        { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
486b8e80941Smrg        { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
487b8e80941Smrg        { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
488b8e80941Smrg
489b8e80941Smrg        { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
490b8e80941Smrg        { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
491b8e80941Smrg        { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
492b8e80941Smrg        { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
493b8e80941Smrg        { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
494b8e80941Smrg        { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
495b8e80941Smrg        { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
496b8e80941Smrg        { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
497b8e80941Smrg        { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
498b8e80941Smrg        { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
499b8e80941Smrg        { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
500b8e80941Smrg        { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
501b8e80941Smrg        { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
502b8e80941Smrg        { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
503b8e80941Smrg        { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
504b8e80941Smrg        { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
505b8e80941Smrg        { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
506b8e80941Smrg        { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
507b8e80941Smrg
508b8e80941Smrg        { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
509b8e80941Smrg        { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
510b8e80941Smrg        { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
511b8e80941Smrg        { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
512b8e80941Smrg
513b8e80941Smrg        { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
514b8e80941Smrg        { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
515b8e80941Smrg        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
516b8e80941Smrg        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
517b8e80941Smrg        { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
518b8e80941Smrg        { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
519b8e80941Smrg        { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
520b8e80941Smrg        { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
521b8e80941Smrg
522b8e80941Smrg        { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
523b8e80941Smrg        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
524b8e80941Smrg        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
525b8e80941Smrg        { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
526b8e80941Smrg        { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
527b8e80941Smrg        { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
528b8e80941Smrg        { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
529b8e80941Smrg        { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
530b8e80941Smrg        { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
531b8e80941Smrg        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
532b8e80941Smrg
533b8e80941Smrg        /* FIXME: MORE COMPLICATED */
534b8e80941Smrg        /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
535b8e80941Smrg
536b8e80941Smrg        { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
537b8e80941Smrg        { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
538b8e80941Smrg
539b8e80941Smrg        { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
540b8e80941Smrg        { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
541b8e80941Smrg        { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
542b8e80941Smrg        { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
543b8e80941Smrg        { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
544b8e80941Smrg        { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
545b8e80941Smrg        { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
546b8e80941Smrg        { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
547b8e80941Smrg
548b8e80941Smrg        { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
549b8e80941Smrg        { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
550b8e80941Smrg
551b8e80941Smrg        /* The stvpms are distinguished by the waddr field. */
552b8e80941Smrg        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
553b8e80941Smrg        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
554b8e80941Smrg        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
555b8e80941Smrg
556b8e80941Smrg        { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
557b8e80941Smrg        { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
558b8e80941Smrg        { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
559b8e80941Smrg};
560b8e80941Smrg
561b8e80941Smrgstatic const struct opcode_desc mul_ops[] = {
562b8e80941Smrg        { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
563b8e80941Smrg        { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
564b8e80941Smrg        { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
565b8e80941Smrg        { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
566b8e80941Smrg        { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
567b8e80941Smrg        { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
568b8e80941Smrg        { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
569b8e80941Smrg        { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
570b8e80941Smrg        { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
571b8e80941Smrg        { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
572b8e80941Smrg        { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
573b8e80941Smrg};
574b8e80941Smrg
575b8e80941Smrgstatic const struct opcode_desc *
576b8e80941Smrglookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
577b8e80941Smrg              uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
578b8e80941Smrg{
579b8e80941Smrg        for (int i = 0; i < num_opcodes; i++) {
580b8e80941Smrg                const struct opcode_desc *op_desc = &opcodes[i];
581b8e80941Smrg
582b8e80941Smrg                if (opcode < op_desc->opcode_first ||
583b8e80941Smrg                    opcode > op_desc->opcode_last)
584b8e80941Smrg                        continue;
585b8e80941Smrg
586b8e80941Smrg                if (!(op_desc->mux_b_mask & (1 << mux_b)))
587b8e80941Smrg                        continue;
588b8e80941Smrg
589b8e80941Smrg                if (!(op_desc->mux_a_mask & (1 << mux_a)))
590b8e80941Smrg                        continue;
591b8e80941Smrg
592b8e80941Smrg                return op_desc;
593b8e80941Smrg        }
594b8e80941Smrg
595b8e80941Smrg        return NULL;
596b8e80941Smrg}
597b8e80941Smrg
598b8e80941Smrgstatic bool
599b8e80941Smrgv3d_qpu_float32_unpack_unpack(uint32_t packed,
600b8e80941Smrg                              enum v3d_qpu_input_unpack *unpacked)
601b8e80941Smrg{
602b8e80941Smrg        switch (packed) {
603b8e80941Smrg        case 0:
604b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_ABS;
605b8e80941Smrg                return true;
606b8e80941Smrg        case 1:
607b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_NONE;
608b8e80941Smrg                return true;
609b8e80941Smrg        case 2:
610b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_L;
611b8e80941Smrg                return true;
612b8e80941Smrg        case 3:
613b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_H;
614b8e80941Smrg                return true;
615b8e80941Smrg        default:
616b8e80941Smrg                return false;
617b8e80941Smrg        }
618b8e80941Smrg}
619b8e80941Smrg
620b8e80941Smrgstatic bool
621b8e80941Smrgv3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
622b8e80941Smrg                            uint32_t *packed)
623b8e80941Smrg{
624b8e80941Smrg        switch (unpacked) {
625b8e80941Smrg        case V3D_QPU_UNPACK_ABS:
626b8e80941Smrg                *packed = 0;
627b8e80941Smrg                return true;
628b8e80941Smrg        case V3D_QPU_UNPACK_NONE:
629b8e80941Smrg                *packed = 1;
630b8e80941Smrg                return true;
631b8e80941Smrg        case V3D_QPU_UNPACK_L:
632b8e80941Smrg                *packed = 2;
633b8e80941Smrg                return true;
634b8e80941Smrg        case V3D_QPU_UNPACK_H:
635b8e80941Smrg                *packed = 3;
636b8e80941Smrg                return true;
637b8e80941Smrg        default:
638b8e80941Smrg                return false;
639b8e80941Smrg        }
640b8e80941Smrg}
641b8e80941Smrg
642b8e80941Smrgstatic bool
643b8e80941Smrgv3d_qpu_float16_unpack_unpack(uint32_t packed,
644b8e80941Smrg                              enum v3d_qpu_input_unpack *unpacked)
645b8e80941Smrg{
646b8e80941Smrg        switch (packed) {
647b8e80941Smrg        case 0:
648b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_NONE;
649b8e80941Smrg                return true;
650b8e80941Smrg        case 1:
651b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
652b8e80941Smrg                return true;
653b8e80941Smrg        case 2:
654b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
655b8e80941Smrg                return true;
656b8e80941Smrg        case 3:
657b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
658b8e80941Smrg                return true;
659b8e80941Smrg        case 4:
660b8e80941Smrg                *unpacked = V3D_QPU_UNPACK_SWAP_16;
661b8e80941Smrg                return true;
662b8e80941Smrg        default:
663b8e80941Smrg                return false;
664b8e80941Smrg        }
665b8e80941Smrg}
666b8e80941Smrg
667b8e80941Smrgstatic bool
668b8e80941Smrgv3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
669b8e80941Smrg                            uint32_t *packed)
670b8e80941Smrg{
671b8e80941Smrg        switch (unpacked) {
672b8e80941Smrg        case V3D_QPU_UNPACK_NONE:
673b8e80941Smrg                *packed = 0;
674b8e80941Smrg                return true;
675b8e80941Smrg        case V3D_QPU_UNPACK_REPLICATE_32F_16:
676b8e80941Smrg                *packed = 1;
677b8e80941Smrg                return true;
678b8e80941Smrg        case V3D_QPU_UNPACK_REPLICATE_L_16:
679b8e80941Smrg                *packed = 2;
680b8e80941Smrg                return true;
681b8e80941Smrg        case V3D_QPU_UNPACK_REPLICATE_H_16:
682b8e80941Smrg                *packed = 3;
683b8e80941Smrg                return true;
684b8e80941Smrg        case V3D_QPU_UNPACK_SWAP_16:
685b8e80941Smrg                *packed = 4;
686b8e80941Smrg                return true;
687b8e80941Smrg        default:
688b8e80941Smrg                return false;
689b8e80941Smrg        }
690b8e80941Smrg}
691b8e80941Smrg
692b8e80941Smrgstatic bool
693b8e80941Smrgv3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
694b8e80941Smrg                          uint32_t *packed)
695b8e80941Smrg{
696b8e80941Smrg        switch (unpacked) {
697b8e80941Smrg        case V3D_QPU_PACK_NONE:
698b8e80941Smrg                *packed = 0;
699b8e80941Smrg                return true;
700b8e80941Smrg        case V3D_QPU_PACK_L:
701b8e80941Smrg                *packed = 1;
702b8e80941Smrg                return true;
703b8e80941Smrg        case V3D_QPU_PACK_H:
704b8e80941Smrg                *packed = 2;
705b8e80941Smrg                return true;
706b8e80941Smrg        default:
707b8e80941Smrg                return false;
708b8e80941Smrg        }
709b8e80941Smrg}
710b8e80941Smrg
711b8e80941Smrgstatic bool
712b8e80941Smrgv3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
713b8e80941Smrg                   struct v3d_qpu_instr *instr)
714b8e80941Smrg{
715b8e80941Smrg        uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
716b8e80941Smrg        uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
717b8e80941Smrg        uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
718b8e80941Smrg        uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
719b8e80941Smrg
720b8e80941Smrg        uint32_t map_op = op;
721b8e80941Smrg        /* Some big clusters of opcodes are replicated with unpack
722b8e80941Smrg         * flags
723b8e80941Smrg         */
724b8e80941Smrg        if (map_op >= 249 && map_op <= 251)
725b8e80941Smrg                map_op = (map_op - 249 + 245);
726b8e80941Smrg        if (map_op >= 253 && map_op <= 255)
727b8e80941Smrg                map_op = (map_op - 253 + 245);
728b8e80941Smrg
729b8e80941Smrg        const struct opcode_desc *desc =
730b8e80941Smrg                lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
731b8e80941Smrg                              map_op, mux_a, mux_b);
732b8e80941Smrg        if (!desc)
733b8e80941Smrg                return false;
734b8e80941Smrg
735b8e80941Smrg        instr->alu.add.op = desc->op;
736b8e80941Smrg
737b8e80941Smrg        /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
738b8e80941Smrg         * operands.
739b8e80941Smrg         */
740b8e80941Smrg        if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
741b8e80941Smrg                if (instr->alu.add.op == V3D_QPU_A_FMIN)
742b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_FMAX;
743b8e80941Smrg                if (instr->alu.add.op == V3D_QPU_A_FADD)
744b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_FADDNF;
745b8e80941Smrg        }
746b8e80941Smrg
747b8e80941Smrg        /* Some QPU ops require a bit more than just basic opcode and mux a/b
748b8e80941Smrg         * comparisons to distinguish them.
749b8e80941Smrg         */
750b8e80941Smrg        switch (instr->alu.add.op) {
751b8e80941Smrg        case V3D_QPU_A_STVPMV:
752b8e80941Smrg        case V3D_QPU_A_STVPMD:
753b8e80941Smrg        case V3D_QPU_A_STVPMP:
754b8e80941Smrg                switch (waddr) {
755b8e80941Smrg                case 0:
756b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_STVPMV;
757b8e80941Smrg                        break;
758b8e80941Smrg                case 1:
759b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_STVPMD;
760b8e80941Smrg                        break;
761b8e80941Smrg                case 2:
762b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_STVPMP;
763b8e80941Smrg                        break;
764b8e80941Smrg                default:
765b8e80941Smrg                        return false;
766b8e80941Smrg                }
767b8e80941Smrg                break;
768b8e80941Smrg        default:
769b8e80941Smrg                break;
770b8e80941Smrg        }
771b8e80941Smrg
772b8e80941Smrg        switch (instr->alu.add.op) {
773b8e80941Smrg        case V3D_QPU_A_FADD:
774b8e80941Smrg        case V3D_QPU_A_FADDNF:
775b8e80941Smrg        case V3D_QPU_A_FSUB:
776b8e80941Smrg        case V3D_QPU_A_FMIN:
777b8e80941Smrg        case V3D_QPU_A_FMAX:
778b8e80941Smrg        case V3D_QPU_A_FCMP:
779b8e80941Smrg        case V3D_QPU_A_VFPACK:
780b8e80941Smrg                if (instr->alu.add.op != V3D_QPU_A_VFPACK)
781b8e80941Smrg                        instr->alu.add.output_pack = (op >> 4) & 0x3;
782b8e80941Smrg                else
783b8e80941Smrg                        instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
784b8e80941Smrg
785b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
786b8e80941Smrg                                                   &instr->alu.add.a_unpack)) {
787b8e80941Smrg                        return false;
788b8e80941Smrg                }
789b8e80941Smrg
790b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
791b8e80941Smrg                                                   &instr->alu.add.b_unpack)) {
792b8e80941Smrg                        return false;
793b8e80941Smrg                }
794b8e80941Smrg                break;
795b8e80941Smrg
796b8e80941Smrg        case V3D_QPU_A_FFLOOR:
797b8e80941Smrg        case V3D_QPU_A_FROUND:
798b8e80941Smrg        case V3D_QPU_A_FTRUNC:
799b8e80941Smrg        case V3D_QPU_A_FCEIL:
800b8e80941Smrg        case V3D_QPU_A_FDX:
801b8e80941Smrg        case V3D_QPU_A_FDY:
802b8e80941Smrg                instr->alu.add.output_pack = mux_b & 0x3;
803b8e80941Smrg
804b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
805b8e80941Smrg                                                   &instr->alu.add.a_unpack)) {
806b8e80941Smrg                        return false;
807b8e80941Smrg                }
808b8e80941Smrg                break;
809b8e80941Smrg
810b8e80941Smrg        case V3D_QPU_A_FTOIN:
811b8e80941Smrg        case V3D_QPU_A_FTOIZ:
812b8e80941Smrg        case V3D_QPU_A_FTOUZ:
813b8e80941Smrg        case V3D_QPU_A_FTOC:
814b8e80941Smrg                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
815b8e80941Smrg
816b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
817b8e80941Smrg                                                   &instr->alu.add.a_unpack)) {
818b8e80941Smrg                        return false;
819b8e80941Smrg                }
820b8e80941Smrg                break;
821b8e80941Smrg
822b8e80941Smrg        case V3D_QPU_A_VFMIN:
823b8e80941Smrg        case V3D_QPU_A_VFMAX:
824b8e80941Smrg                if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
825b8e80941Smrg                                                   &instr->alu.add.a_unpack)) {
826b8e80941Smrg                        return false;
827b8e80941Smrg                }
828b8e80941Smrg
829b8e80941Smrg                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
830b8e80941Smrg                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
831b8e80941Smrg                break;
832b8e80941Smrg
833b8e80941Smrg        default:
834b8e80941Smrg                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
835b8e80941Smrg                instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
836b8e80941Smrg                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
837b8e80941Smrg                break;
838b8e80941Smrg        }
839b8e80941Smrg
840b8e80941Smrg        instr->alu.add.a = mux_a;
841b8e80941Smrg        instr->alu.add.b = mux_b;
842b8e80941Smrg        instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
843b8e80941Smrg
844b8e80941Smrg        instr->alu.add.magic_write = false;
845b8e80941Smrg        if (packed_inst & VC5_QPU_MA) {
846b8e80941Smrg                switch (instr->alu.add.op) {
847b8e80941Smrg                case V3D_QPU_A_LDVPMV_IN:
848b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
849b8e80941Smrg                        break;
850b8e80941Smrg                case V3D_QPU_A_LDVPMD_IN:
851b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
852b8e80941Smrg                        break;
853b8e80941Smrg                case V3D_QPU_A_LDVPMG_IN:
854b8e80941Smrg                        instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
855b8e80941Smrg                        break;
856b8e80941Smrg                default:
857b8e80941Smrg                        instr->alu.add.magic_write = true;
858b8e80941Smrg                        break;
859b8e80941Smrg                }
860b8e80941Smrg        }
861b8e80941Smrg
862b8e80941Smrg        return true;
863b8e80941Smrg}
864b8e80941Smrg
865b8e80941Smrgstatic bool
866b8e80941Smrgv3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
867b8e80941Smrg                   struct v3d_qpu_instr *instr)
868b8e80941Smrg{
869b8e80941Smrg        uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
870b8e80941Smrg        uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
871b8e80941Smrg        uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
872b8e80941Smrg
873b8e80941Smrg        {
874b8e80941Smrg                const struct opcode_desc *desc =
875b8e80941Smrg                        lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
876b8e80941Smrg                                      op, mux_a, mux_b);
877b8e80941Smrg                if (!desc)
878b8e80941Smrg                        return false;
879b8e80941Smrg
880b8e80941Smrg                instr->alu.mul.op = desc->op;
881b8e80941Smrg        }
882b8e80941Smrg
883b8e80941Smrg        switch (instr->alu.mul.op) {
884b8e80941Smrg        case V3D_QPU_M_FMUL:
885b8e80941Smrg                instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
886b8e80941Smrg
887b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
888b8e80941Smrg                                                   &instr->alu.mul.a_unpack)) {
889b8e80941Smrg                        return false;
890b8e80941Smrg                }
891b8e80941Smrg
892b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
893b8e80941Smrg                                                   &instr->alu.mul.b_unpack)) {
894b8e80941Smrg                        return false;
895b8e80941Smrg                }
896b8e80941Smrg
897b8e80941Smrg                break;
898b8e80941Smrg
899b8e80941Smrg        case V3D_QPU_M_FMOV:
900b8e80941Smrg                instr->alu.mul.output_pack = (((op & 1) << 1) +
901b8e80941Smrg                                              ((mux_b >> 2) & 1));
902b8e80941Smrg
903b8e80941Smrg                if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
904b8e80941Smrg                                                   &instr->alu.mul.a_unpack)) {
905b8e80941Smrg                        return false;
906b8e80941Smrg                }
907b8e80941Smrg
908b8e80941Smrg                break;
909b8e80941Smrg
910b8e80941Smrg        case V3D_QPU_M_VFMUL:
911b8e80941Smrg                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
912b8e80941Smrg
913b8e80941Smrg                if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
914b8e80941Smrg                                                   &instr->alu.mul.a_unpack)) {
915b8e80941Smrg                        return false;
916b8e80941Smrg                }
917b8e80941Smrg
918b8e80941Smrg                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
919b8e80941Smrg
920b8e80941Smrg                break;
921b8e80941Smrg
922b8e80941Smrg        default:
923b8e80941Smrg                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
924b8e80941Smrg                instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
925b8e80941Smrg                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
926b8e80941Smrg                break;
927b8e80941Smrg        }
928b8e80941Smrg
929b8e80941Smrg        instr->alu.mul.a = mux_a;
930b8e80941Smrg        instr->alu.mul.b = mux_b;
931b8e80941Smrg        instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
932b8e80941Smrg        instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
933b8e80941Smrg
934b8e80941Smrg        return true;
935b8e80941Smrg}
936b8e80941Smrg
937b8e80941Smrgstatic bool
938b8e80941Smrgv3d_qpu_add_pack(const struct v3d_device_info *devinfo,
939b8e80941Smrg                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
940b8e80941Smrg{
941b8e80941Smrg        uint32_t waddr = instr->alu.add.waddr;
942b8e80941Smrg        uint32_t mux_a = instr->alu.add.a;
943b8e80941Smrg        uint32_t mux_b = instr->alu.add.b;
944b8e80941Smrg        int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
945b8e80941Smrg        const struct opcode_desc *desc;
946b8e80941Smrg
947b8e80941Smrg        int opcode;
948b8e80941Smrg        for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
949b8e80941Smrg             desc++) {
950b8e80941Smrg                if (desc->op == instr->alu.add.op)
951b8e80941Smrg                        break;
952b8e80941Smrg        }
953b8e80941Smrg        if (desc == &add_ops[ARRAY_SIZE(add_ops)])
954b8e80941Smrg                return false;
955b8e80941Smrg
956b8e80941Smrg        opcode = desc->opcode_first;
957b8e80941Smrg
958b8e80941Smrg        /* If an operation doesn't use an arg, its mux values may be used to
959b8e80941Smrg         * identify the operation type.
960b8e80941Smrg         */
961b8e80941Smrg        if (nsrc < 2)
962b8e80941Smrg                mux_b = ffs(desc->mux_b_mask) - 1;
963b8e80941Smrg
964b8e80941Smrg        if (nsrc < 1)
965b8e80941Smrg                mux_a = ffs(desc->mux_a_mask) - 1;
966b8e80941Smrg
967b8e80941Smrg        bool no_magic_write = false;
968b8e80941Smrg
969b8e80941Smrg        switch (instr->alu.add.op) {
970b8e80941Smrg        case V3D_QPU_A_STVPMV:
971b8e80941Smrg                waddr = 0;
972b8e80941Smrg                no_magic_write = true;
973b8e80941Smrg                break;
974b8e80941Smrg        case V3D_QPU_A_STVPMD:
975b8e80941Smrg                waddr = 1;
976b8e80941Smrg                no_magic_write = true;
977b8e80941Smrg                break;
978b8e80941Smrg        case V3D_QPU_A_STVPMP:
979b8e80941Smrg                waddr = 2;
980b8e80941Smrg                no_magic_write = true;
981b8e80941Smrg                break;
982b8e80941Smrg
983b8e80941Smrg        case V3D_QPU_A_LDVPMV_IN:
984b8e80941Smrg        case V3D_QPU_A_LDVPMD_IN:
985b8e80941Smrg        case V3D_QPU_A_LDVPMP:
986b8e80941Smrg        case V3D_QPU_A_LDVPMG_IN:
987b8e80941Smrg                assert(!instr->alu.add.magic_write);
988b8e80941Smrg                break;
989b8e80941Smrg
990b8e80941Smrg        case V3D_QPU_A_LDVPMV_OUT:
991b8e80941Smrg        case V3D_QPU_A_LDVPMD_OUT:
992b8e80941Smrg        case V3D_QPU_A_LDVPMG_OUT:
993b8e80941Smrg                assert(!instr->alu.add.magic_write);
994b8e80941Smrg                *packed_instr |= VC5_QPU_MA;
995b8e80941Smrg                break;
996b8e80941Smrg
997b8e80941Smrg        default:
998b8e80941Smrg                break;
999b8e80941Smrg        }
1000b8e80941Smrg
1001b8e80941Smrg        switch (instr->alu.add.op) {
1002b8e80941Smrg        case V3D_QPU_A_FADD:
1003b8e80941Smrg        case V3D_QPU_A_FADDNF:
1004b8e80941Smrg        case V3D_QPU_A_FSUB:
1005b8e80941Smrg        case V3D_QPU_A_FMIN:
1006b8e80941Smrg        case V3D_QPU_A_FMAX:
1007b8e80941Smrg        case V3D_QPU_A_FCMP: {
1008b8e80941Smrg                uint32_t output_pack;
1009b8e80941Smrg                uint32_t a_unpack;
1010b8e80941Smrg                uint32_t b_unpack;
1011b8e80941Smrg
1012b8e80941Smrg                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1013b8e80941Smrg                                               &output_pack)) {
1014b8e80941Smrg                        return false;
1015b8e80941Smrg                }
1016b8e80941Smrg                opcode |= output_pack << 4;
1017b8e80941Smrg
1018b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1019b8e80941Smrg                                                 &a_unpack)) {
1020b8e80941Smrg                        return false;
1021b8e80941Smrg                }
1022b8e80941Smrg
1023b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1024b8e80941Smrg                                                 &b_unpack)) {
1025b8e80941Smrg                        return false;
1026b8e80941Smrg                }
1027b8e80941Smrg
1028b8e80941Smrg                /* These operations with commutative operands are
1029b8e80941Smrg                 * distinguished by which order their operands come in.
1030b8e80941Smrg                 */
1031b8e80941Smrg                bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1032b8e80941Smrg                if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1033b8e80941Smrg                      instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1034b8e80941Smrg                    ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1035b8e80941Smrg                      instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1036b8e80941Smrg                        uint32_t temp;
1037b8e80941Smrg
1038b8e80941Smrg                        temp = a_unpack;
1039b8e80941Smrg                        a_unpack = b_unpack;
1040b8e80941Smrg                        b_unpack = temp;
1041b8e80941Smrg
1042b8e80941Smrg                        temp = mux_a;
1043b8e80941Smrg                        mux_a = mux_b;
1044b8e80941Smrg                        mux_b = temp;
1045b8e80941Smrg                }
1046b8e80941Smrg
1047b8e80941Smrg                opcode |= a_unpack << 2;
1048b8e80941Smrg                opcode |= b_unpack << 0;
1049b8e80941Smrg
1050b8e80941Smrg                break;
1051b8e80941Smrg        }
1052b8e80941Smrg
1053b8e80941Smrg        case V3D_QPU_A_VFPACK: {
1054b8e80941Smrg                uint32_t a_unpack;
1055b8e80941Smrg                uint32_t b_unpack;
1056b8e80941Smrg
1057b8e80941Smrg                if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1058b8e80941Smrg                    instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1059b8e80941Smrg                        return false;
1060b8e80941Smrg                }
1061b8e80941Smrg
1062b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1063b8e80941Smrg                                                 &a_unpack)) {
1064b8e80941Smrg                        return false;
1065b8e80941Smrg                }
1066b8e80941Smrg
1067b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1068b8e80941Smrg                                                 &b_unpack)) {
1069b8e80941Smrg                        return false;
1070b8e80941Smrg                }
1071b8e80941Smrg
1072b8e80941Smrg                opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1073b8e80941Smrg                opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1074b8e80941Smrg
1075b8e80941Smrg                break;
1076b8e80941Smrg        }
1077b8e80941Smrg
1078b8e80941Smrg        case V3D_QPU_A_FFLOOR:
1079b8e80941Smrg        case V3D_QPU_A_FROUND:
1080b8e80941Smrg        case V3D_QPU_A_FTRUNC:
1081b8e80941Smrg        case V3D_QPU_A_FCEIL:
1082b8e80941Smrg        case V3D_QPU_A_FDX:
1083b8e80941Smrg        case V3D_QPU_A_FDY: {
1084b8e80941Smrg                uint32_t packed;
1085b8e80941Smrg
1086b8e80941Smrg                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1087b8e80941Smrg                                               &packed)) {
1088b8e80941Smrg                        return false;
1089b8e80941Smrg                }
1090b8e80941Smrg                mux_b |= packed;
1091b8e80941Smrg
1092b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1093b8e80941Smrg                                                 &packed)) {
1094b8e80941Smrg                        return false;
1095b8e80941Smrg                }
1096b8e80941Smrg                if (packed == 0)
1097b8e80941Smrg                        return false;
1098b8e80941Smrg                opcode = (opcode & ~(1 << 2)) | packed << 2;
1099b8e80941Smrg                break;
1100b8e80941Smrg        }
1101b8e80941Smrg
1102b8e80941Smrg        case V3D_QPU_A_FTOIN:
1103b8e80941Smrg        case V3D_QPU_A_FTOIZ:
1104b8e80941Smrg        case V3D_QPU_A_FTOUZ:
1105b8e80941Smrg        case V3D_QPU_A_FTOC:
1106b8e80941Smrg                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1107b8e80941Smrg                        return false;
1108b8e80941Smrg
1109b8e80941Smrg                uint32_t packed;
1110b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1111b8e80941Smrg                                                 &packed)) {
1112b8e80941Smrg                        return false;
1113b8e80941Smrg                }
1114b8e80941Smrg                if (packed == 0)
1115b8e80941Smrg                        return false;
1116b8e80941Smrg                opcode |= packed << 2;
1117b8e80941Smrg
1118b8e80941Smrg                break;
1119b8e80941Smrg
1120b8e80941Smrg        case V3D_QPU_A_VFMIN:
1121b8e80941Smrg        case V3D_QPU_A_VFMAX:
1122b8e80941Smrg                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1123b8e80941Smrg                    instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1124b8e80941Smrg                        return false;
1125b8e80941Smrg                }
1126b8e80941Smrg
1127b8e80941Smrg                if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1128b8e80941Smrg                                                 &packed)) {
1129b8e80941Smrg                        return false;
1130b8e80941Smrg                }
1131b8e80941Smrg                opcode |= packed;
1132b8e80941Smrg                break;
1133b8e80941Smrg
1134b8e80941Smrg        default:
1135b8e80941Smrg                if (instr->alu.add.op != V3D_QPU_A_NOP &&
1136b8e80941Smrg                    (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1137b8e80941Smrg                     instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1138b8e80941Smrg                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1139b8e80941Smrg                        return false;
1140b8e80941Smrg                }
1141b8e80941Smrg                break;
1142b8e80941Smrg        }
1143b8e80941Smrg
1144b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1145b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1146b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1147b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1148b8e80941Smrg        if (instr->alu.add.magic_write && !no_magic_write)
1149b8e80941Smrg                *packed_instr |= VC5_QPU_MA;
1150b8e80941Smrg
1151b8e80941Smrg        return true;
1152b8e80941Smrg}
1153b8e80941Smrg
1154b8e80941Smrgstatic bool
1155b8e80941Smrgv3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1156b8e80941Smrg                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1157b8e80941Smrg{
1158b8e80941Smrg        uint32_t mux_a = instr->alu.mul.a;
1159b8e80941Smrg        uint32_t mux_b = instr->alu.mul.b;
1160b8e80941Smrg        int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1161b8e80941Smrg        const struct opcode_desc *desc;
1162b8e80941Smrg
1163b8e80941Smrg        for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1164b8e80941Smrg             desc++) {
1165b8e80941Smrg                if (desc->op == instr->alu.mul.op)
1166b8e80941Smrg                        break;
1167b8e80941Smrg        }
1168b8e80941Smrg        if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1169b8e80941Smrg                return false;
1170b8e80941Smrg
1171b8e80941Smrg        uint32_t opcode = desc->opcode_first;
1172b8e80941Smrg
1173b8e80941Smrg        /* Some opcodes have a single valid value for their mux a/b, so set
1174b8e80941Smrg         * that here.  If mux a/b determine packing, it will be set below.
1175b8e80941Smrg         */
1176b8e80941Smrg        if (nsrc < 2)
1177b8e80941Smrg                mux_b = ffs(desc->mux_b_mask) - 1;
1178b8e80941Smrg
1179b8e80941Smrg        if (nsrc < 1)
1180b8e80941Smrg                mux_a = ffs(desc->mux_a_mask) - 1;
1181b8e80941Smrg
1182b8e80941Smrg        switch (instr->alu.mul.op) {
1183b8e80941Smrg        case V3D_QPU_M_FMUL: {
1184b8e80941Smrg                uint32_t packed;
1185b8e80941Smrg
1186b8e80941Smrg                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1187b8e80941Smrg                                               &packed)) {
1188b8e80941Smrg                        return false;
1189b8e80941Smrg                }
1190b8e80941Smrg                /* No need for a +1 because desc->opcode_first has a 1 in this
1191b8e80941Smrg                 * field.
1192b8e80941Smrg                 */
1193b8e80941Smrg                opcode += packed << 4;
1194b8e80941Smrg
1195b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1196b8e80941Smrg                                                 &packed)) {
1197b8e80941Smrg                        return false;
1198b8e80941Smrg                }
1199b8e80941Smrg                opcode |= packed << 2;
1200b8e80941Smrg
1201b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1202b8e80941Smrg                                                 &packed)) {
1203b8e80941Smrg                        return false;
1204b8e80941Smrg                }
1205b8e80941Smrg                opcode |= packed << 0;
1206b8e80941Smrg                break;
1207b8e80941Smrg        }
1208b8e80941Smrg
1209b8e80941Smrg        case V3D_QPU_M_FMOV: {
1210b8e80941Smrg                uint32_t packed;
1211b8e80941Smrg
1212b8e80941Smrg                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1213b8e80941Smrg                                               &packed)) {
1214b8e80941Smrg                        return false;
1215b8e80941Smrg                }
1216b8e80941Smrg                opcode |= (packed >> 1) & 1;
1217b8e80941Smrg                mux_b = (packed & 1) << 2;
1218b8e80941Smrg
1219b8e80941Smrg                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1220b8e80941Smrg                                                 &packed)) {
1221b8e80941Smrg                        return false;
1222b8e80941Smrg                }
1223b8e80941Smrg                mux_b |= packed;
1224b8e80941Smrg                break;
1225b8e80941Smrg        }
1226b8e80941Smrg
1227b8e80941Smrg        case V3D_QPU_M_VFMUL: {
1228b8e80941Smrg                uint32_t packed;
1229b8e80941Smrg
1230b8e80941Smrg                if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1231b8e80941Smrg                        return false;
1232b8e80941Smrg
1233b8e80941Smrg                if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1234b8e80941Smrg                                                 &packed)) {
1235b8e80941Smrg                        return false;
1236b8e80941Smrg                }
1237b8e80941Smrg                if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1238b8e80941Smrg                        opcode = 8;
1239b8e80941Smrg                else
1240b8e80941Smrg                        opcode |= (packed + 4) & 7;
1241b8e80941Smrg
1242b8e80941Smrg                if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1243b8e80941Smrg                        return false;
1244b8e80941Smrg
1245b8e80941Smrg                break;
1246b8e80941Smrg        }
1247b8e80941Smrg
1248b8e80941Smrg        default:
1249b8e80941Smrg                break;
1250b8e80941Smrg        }
1251b8e80941Smrg
1252b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1253b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1254b8e80941Smrg
1255b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1256b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1257b8e80941Smrg        if (instr->alu.mul.magic_write)
1258b8e80941Smrg                *packed_instr |= VC5_QPU_MM;
1259b8e80941Smrg
1260b8e80941Smrg        return true;
1261b8e80941Smrg}
1262b8e80941Smrg
1263b8e80941Smrgstatic bool
1264b8e80941Smrgv3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1265b8e80941Smrg                         uint64_t packed_instr,
1266b8e80941Smrg                         struct v3d_qpu_instr *instr)
1267b8e80941Smrg{
1268b8e80941Smrg        instr->type = V3D_QPU_INSTR_TYPE_ALU;
1269b8e80941Smrg
1270b8e80941Smrg        if (!v3d_qpu_sig_unpack(devinfo,
1271b8e80941Smrg                                QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1272b8e80941Smrg                                &instr->sig))
1273b8e80941Smrg                return false;
1274b8e80941Smrg
1275b8e80941Smrg        uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1276b8e80941Smrg        if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1277b8e80941Smrg                instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1278b8e80941Smrg                instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1279b8e80941Smrg
1280b8e80941Smrg                instr->flags.ac = V3D_QPU_COND_NONE;
1281b8e80941Smrg                instr->flags.mc = V3D_QPU_COND_NONE;
1282b8e80941Smrg                instr->flags.apf = V3D_QPU_PF_NONE;
1283b8e80941Smrg                instr->flags.mpf = V3D_QPU_PF_NONE;
1284b8e80941Smrg                instr->flags.auf = V3D_QPU_UF_NONE;
1285b8e80941Smrg                instr->flags.muf = V3D_QPU_UF_NONE;
1286b8e80941Smrg        } else {
1287b8e80941Smrg                if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1288b8e80941Smrg                        return false;
1289b8e80941Smrg        }
1290b8e80941Smrg
1291b8e80941Smrg        instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1292b8e80941Smrg        instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1293b8e80941Smrg
1294b8e80941Smrg        if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1295b8e80941Smrg                return false;
1296b8e80941Smrg
1297b8e80941Smrg        if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1298b8e80941Smrg                return false;
1299b8e80941Smrg
1300b8e80941Smrg        return true;
1301b8e80941Smrg}
1302b8e80941Smrg
1303b8e80941Smrgstatic bool
1304b8e80941Smrgv3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1305b8e80941Smrg                            uint64_t packed_instr,
1306b8e80941Smrg                            struct v3d_qpu_instr *instr)
1307b8e80941Smrg{
1308b8e80941Smrg        instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1309b8e80941Smrg
1310b8e80941Smrg        uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1311b8e80941Smrg        if (cond == 0)
1312b8e80941Smrg                instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1313b8e80941Smrg        else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1314b8e80941Smrg                 V3D_QPU_BRANCH_COND_ALLNA)
1315b8e80941Smrg                instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1316b8e80941Smrg        else
1317b8e80941Smrg                return false;
1318b8e80941Smrg
1319b8e80941Smrg        uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1320b8e80941Smrg        if (msfign == 3)
1321b8e80941Smrg                return false;
1322b8e80941Smrg        instr->branch.msfign = msfign;
1323b8e80941Smrg
1324b8e80941Smrg        instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1325b8e80941Smrg
1326b8e80941Smrg        instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1327b8e80941Smrg        if (instr->branch.ub) {
1328b8e80941Smrg                instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1329b8e80941Smrg                                                  VC5_QPU_BRANCH_BDU);
1330b8e80941Smrg        }
1331b8e80941Smrg
1332b8e80941Smrg        instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1333b8e80941Smrg                                              VC5_QPU_RADDR_A);
1334b8e80941Smrg
1335b8e80941Smrg        instr->branch.offset = 0;
1336b8e80941Smrg
1337b8e80941Smrg        instr->branch.offset +=
1338b8e80941Smrg                QPU_GET_FIELD(packed_instr,
1339b8e80941Smrg                              VC5_QPU_BRANCH_ADDR_LOW) << 3;
1340b8e80941Smrg
1341b8e80941Smrg        instr->branch.offset +=
1342b8e80941Smrg                QPU_GET_FIELD(packed_instr,
1343b8e80941Smrg                              VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1344b8e80941Smrg
1345b8e80941Smrg        return true;
1346b8e80941Smrg}
1347b8e80941Smrg
1348b8e80941Smrgbool
1349b8e80941Smrgv3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1350b8e80941Smrg                     uint64_t packed_instr,
1351b8e80941Smrg                     struct v3d_qpu_instr *instr)
1352b8e80941Smrg{
1353b8e80941Smrg        if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1354b8e80941Smrg                return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1355b8e80941Smrg        } else {
1356b8e80941Smrg                uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1357b8e80941Smrg
1358b8e80941Smrg                if ((sig & 24) == 16) {
1359b8e80941Smrg                        return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1360b8e80941Smrg                                                           instr);
1361b8e80941Smrg                } else {
1362b8e80941Smrg                        return false;
1363b8e80941Smrg                }
1364b8e80941Smrg        }
1365b8e80941Smrg}
1366b8e80941Smrg
1367b8e80941Smrgstatic bool
1368b8e80941Smrgv3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1369b8e80941Smrg                       const struct v3d_qpu_instr *instr,
1370b8e80941Smrg                       uint64_t *packed_instr)
1371b8e80941Smrg{
1372b8e80941Smrg        uint32_t sig;
1373b8e80941Smrg        if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1374b8e80941Smrg                return false;
1375b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1376b8e80941Smrg
1377b8e80941Smrg        if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1378b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1379b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1380b8e80941Smrg
1381b8e80941Smrg                if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1382b8e80941Smrg                        return false;
1383b8e80941Smrg                if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1384b8e80941Smrg                        return false;
1385b8e80941Smrg
1386b8e80941Smrg                uint32_t flags;
1387b8e80941Smrg                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1388b8e80941Smrg                        if (instr->flags.ac != V3D_QPU_COND_NONE ||
1389b8e80941Smrg                            instr->flags.mc != V3D_QPU_COND_NONE ||
1390b8e80941Smrg                            instr->flags.apf != V3D_QPU_PF_NONE ||
1391b8e80941Smrg                            instr->flags.mpf != V3D_QPU_PF_NONE ||
1392b8e80941Smrg                            instr->flags.auf != V3D_QPU_UF_NONE ||
1393b8e80941Smrg                            instr->flags.muf != V3D_QPU_UF_NONE) {
1394b8e80941Smrg                                return false;
1395b8e80941Smrg                        }
1396b8e80941Smrg
1397b8e80941Smrg                        flags = instr->sig_addr;
1398b8e80941Smrg                        if (instr->sig_magic)
1399b8e80941Smrg                                flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1400b8e80941Smrg                } else {
1401b8e80941Smrg                        if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1402b8e80941Smrg                                return false;
1403b8e80941Smrg                }
1404b8e80941Smrg
1405b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1406b8e80941Smrg        } else {
1407b8e80941Smrg                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1408b8e80941Smrg                        return false;
1409b8e80941Smrg        }
1410b8e80941Smrg
1411b8e80941Smrg        return true;
1412b8e80941Smrg}
1413b8e80941Smrg
1414b8e80941Smrgstatic bool
1415b8e80941Smrgv3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1416b8e80941Smrg                          const struct v3d_qpu_instr *instr,
1417b8e80941Smrg                          uint64_t *packed_instr)
1418b8e80941Smrg{
1419b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1420b8e80941Smrg
1421b8e80941Smrg        if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1422b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1423b8e80941Smrg                                                    V3D_QPU_BRANCH_COND_A0),
1424b8e80941Smrg                                               VC5_QPU_BRANCH_COND);
1425b8e80941Smrg        }
1426b8e80941Smrg
1427b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1428b8e80941Smrg                                       VC5_QPU_BRANCH_MSFIGN);
1429b8e80941Smrg
1430b8e80941Smrg        *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1431b8e80941Smrg                                       VC5_QPU_BRANCH_BDI);
1432b8e80941Smrg
1433b8e80941Smrg        if (instr->branch.ub) {
1434b8e80941Smrg                *packed_instr |= VC5_QPU_BRANCH_UB;
1435b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1436b8e80941Smrg                                               VC5_QPU_BRANCH_BDU);
1437b8e80941Smrg        }
1438b8e80941Smrg
1439b8e80941Smrg        switch (instr->branch.bdi) {
1440b8e80941Smrg        case V3D_QPU_BRANCH_DEST_ABS:
1441b8e80941Smrg        case V3D_QPU_BRANCH_DEST_REL:
1442b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1443b8e80941Smrg                                               VC5_QPU_BRANCH_MSFIGN);
1444b8e80941Smrg
1445b8e80941Smrg                *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1446b8e80941Smrg                                                ~0xff000000) >> 3,
1447b8e80941Smrg                                               VC5_QPU_BRANCH_ADDR_LOW);
1448b8e80941Smrg
1449b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1450b8e80941Smrg                                               VC5_QPU_BRANCH_ADDR_HIGH);
1451b8e80941Smrg
1452b8e80941Smrg        case V3D_QPU_BRANCH_DEST_REGFILE:
1453b8e80941Smrg                *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1454b8e80941Smrg                                               VC5_QPU_RADDR_A);
1455b8e80941Smrg                break;
1456b8e80941Smrg
1457b8e80941Smrg        default:
1458b8e80941Smrg                break;
1459b8e80941Smrg        }
1460b8e80941Smrg
1461b8e80941Smrg        return true;
1462b8e80941Smrg}
1463b8e80941Smrg
1464b8e80941Smrgbool
1465b8e80941Smrgv3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1466b8e80941Smrg                   const struct v3d_qpu_instr *instr,
1467b8e80941Smrg                   uint64_t *packed_instr)
1468b8e80941Smrg{
1469b8e80941Smrg        *packed_instr = 0;
1470b8e80941Smrg
1471b8e80941Smrg        switch (instr->type) {
1472b8e80941Smrg        case V3D_QPU_INSTR_TYPE_ALU:
1473b8e80941Smrg                return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1474b8e80941Smrg        case V3D_QPU_INSTR_TYPE_BRANCH:
1475b8e80941Smrg                return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1476b8e80941Smrg        default:
1477b8e80941Smrg                return false;
1478b8e80941Smrg        }
1479b8e80941Smrg}
1480