1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <string.h>
25#include "util/macros.h"
26
27#include "broadcom/common/v3d_device_info.h"
28#include "qpu_instr.h"
29
30#ifndef QPU_MASK
31#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32/* Using the GNU statement expression extension */
33#define QPU_SET_FIELD(value, field)                                       \
34        ({                                                                \
35                uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36                assert((fieldval & ~ field ## _MASK) == 0);               \
37                fieldval & field ## _MASK;                                \
38         })
39
40#define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
41
42#define QPU_UPDATE_FIELD(inst, value, field)                              \
43        (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44#endif /* QPU_MASK */
45
46#define VC5_QPU_OP_MUL_SHIFT                58
47#define VC5_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
48
49#define VC5_QPU_SIG_SHIFT                   53
50#define VC5_QPU_SIG_MASK                    QPU_MASK(57, 53)
51
52#define VC5_QPU_COND_SHIFT                  46
53#define VC5_QPU_COND_MASK                   QPU_MASK(52, 46)
54#define VC5_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
55
56#define VC5_QPU_MM                          QPU_MASK(45, 45)
57#define VC5_QPU_MA                          QPU_MASK(44, 44)
58
59#define V3D_QPU_WADDR_M_SHIFT               38
60#define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
61
62#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT       35
63#define VC5_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
64
65#define V3D_QPU_WADDR_A_SHIFT               32
66#define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
67
68#define VC5_QPU_BRANCH_COND_SHIFT           32
69#define VC5_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
70
71#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT      24
72#define VC5_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
73
74#define VC5_QPU_OP_ADD_SHIFT                24
75#define VC5_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
76
77#define VC5_QPU_MUL_B_SHIFT                 21
78#define VC5_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
79
80#define VC5_QPU_BRANCH_MSFIGN_SHIFT         21
81#define VC5_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
82
83#define VC5_QPU_MUL_A_SHIFT                 18
84#define VC5_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
85
86#define VC5_QPU_ADD_B_SHIFT                 15
87#define VC5_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
88
89#define VC5_QPU_BRANCH_BDU_SHIFT            15
90#define VC5_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
91
92#define VC5_QPU_BRANCH_UB                   QPU_MASK(14, 14)
93
94#define VC5_QPU_ADD_A_SHIFT                 12
95#define VC5_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
96
97#define VC5_QPU_BRANCH_BDI_SHIFT            12
98#define VC5_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
99
100#define VC5_QPU_RADDR_A_SHIFT               6
101#define VC5_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
102
103#define VC5_QPU_RADDR_B_SHIFT               0
104#define VC5_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
105
106#define THRSW .thrsw = true
107#define LDUNIF .ldunif = true
108#define LDUNIFRF .ldunifrf = true
109#define LDUNIFA .ldunifa = true
110#define LDUNIFARF .ldunifarf = true
111#define LDTMU .ldtmu = true
112#define LDVARY .ldvary = true
113#define LDVPM .ldvpm = true
114#define SMIMM .small_imm = true
115#define LDTLB .ldtlb = true
116#define LDTLBU .ldtlbu = true
117#define UCB .ucb = true
118#define ROT .rotate = true
119#define WRTMUC .wrtmuc = true
120
121static const struct v3d_qpu_sig v33_sig_map[] = {
122        /*      MISC   R3       R4      R5 */
123        [0]  = {                               },
124        [1]  = { THRSW,                        },
125        [2]  = {                        LDUNIF },
126        [3]  = { THRSW,                 LDUNIF },
127        [4]  = {                LDTMU,         },
128        [5]  = { THRSW,         LDTMU,         },
129        [6]  = {                LDTMU,  LDUNIF },
130        [7]  = { THRSW,         LDTMU,  LDUNIF },
131        [8]  = {        LDVARY,                },
132        [9]  = { THRSW, LDVARY,                },
133        [10] = {        LDVARY,         LDUNIF },
134        [11] = { THRSW, LDVARY,         LDUNIF },
135        [12] = {        LDVARY, LDTMU,         },
136        [13] = { THRSW, LDVARY, LDTMU,         },
137        [14] = { SMIMM, LDVARY,                },
138        [15] = { SMIMM,                        },
139        [16] = {        LDTLB,                 },
140        [17] = {        LDTLBU,                },
141        /* 18-21 reserved */
142        [22] = { UCB,                          },
143        [23] = { ROT,                          },
144        [24] = {        LDVPM,                 },
145        [25] = { THRSW, LDVPM,                 },
146        [26] = {        LDVPM,          LDUNIF },
147        [27] = { THRSW, LDVPM,          LDUNIF },
148        [28] = {        LDVPM, LDTMU,          },
149        [29] = { THRSW, LDVPM, LDTMU,          },
150        [30] = { SMIMM, LDVPM,                 },
151        [31] = { SMIMM,                        },
152};
153
154static const struct v3d_qpu_sig v40_sig_map[] = {
155        /*      MISC    R3      R4      R5 */
156        [0]  = {                               },
157        [1]  = { THRSW,                        },
158        [2]  = {                        LDUNIF },
159        [3]  = { THRSW,                 LDUNIF },
160        [4]  = {                LDTMU,         },
161        [5]  = { THRSW,         LDTMU,         },
162        [6]  = {                LDTMU,  LDUNIF },
163        [7]  = { THRSW,         LDTMU,  LDUNIF },
164        [8]  = {        LDVARY,                },
165        [9]  = { THRSW, LDVARY,                },
166        [10] = {        LDVARY,         LDUNIF },
167        [11] = { THRSW, LDVARY,         LDUNIF },
168        /* 12-13 reserved */
169        [14] = { SMIMM, LDVARY,                },
170        [15] = { SMIMM,                        },
171        [16] = {        LDTLB,                 },
172        [17] = {        LDTLBU,                },
173        [18] = {                        WRTMUC },
174        [19] = { THRSW,                 WRTMUC },
175        [20] = {        LDVARY,         WRTMUC },
176        [21] = { THRSW, LDVARY,         WRTMUC },
177        [22] = { UCB,                          },
178        [23] = { ROT,                          },
179        /* 24-30 reserved */
180        [31] = { SMIMM,         LDTMU,         },
181};
182
183static const struct v3d_qpu_sig v41_sig_map[] = {
184        /*      MISC       phys    R5 */
185        [0]  = {                          },
186        [1]  = { THRSW,                   },
187        [2]  = {                   LDUNIF },
188        [3]  = { THRSW,            LDUNIF },
189        [4]  = {           LDTMU,         },
190        [5]  = { THRSW,    LDTMU,         },
191        [6]  = {           LDTMU,  LDUNIF },
192        [7]  = { THRSW,    LDTMU,  LDUNIF },
193        [8]  = {           LDVARY,        },
194        [9]  = { THRSW,    LDVARY,        },
195        [10] = {           LDVARY, LDUNIF },
196        [11] = { THRSW,    LDVARY, LDUNIF },
197        [12] = { LDUNIFRF                 },
198        [13] = { THRSW,    LDUNIFRF       },
199        [14] = { SMIMM,    LDVARY,        },
200        [15] = { SMIMM,                   },
201        [16] = {           LDTLB,         },
202        [17] = {           LDTLBU,        },
203        [18] = {                          WRTMUC },
204        [19] = { THRSW,                   WRTMUC },
205        [20] = {           LDVARY,        WRTMUC },
206        [21] = { THRSW,    LDVARY,        WRTMUC },
207        [22] = { UCB,                     },
208        [23] = { ROT,                     },
209        /* 24-30 reserved */
210        [24] = {                   LDUNIFA},
211        [25] = { LDUNIFARF                },
212        [31] = { SMIMM,            LDTMU, },
213};
214
215bool
216v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
217                   uint32_t packed_sig,
218                   struct v3d_qpu_sig *sig)
219{
220        if (packed_sig >= ARRAY_SIZE(v33_sig_map))
221                return false;
222
223        if (devinfo->ver >= 41)
224                *sig = v41_sig_map[packed_sig];
225        else if (devinfo->ver == 40)
226                *sig = v40_sig_map[packed_sig];
227        else
228                *sig = v33_sig_map[packed_sig];
229
230        /* Signals with zeroed unpacked contents after element 0 are reserved. */
231        return (packed_sig == 0 ||
232                memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
233}
234
235bool
236v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
237                 const struct v3d_qpu_sig *sig,
238                 uint32_t *packed_sig)
239{
240        static const struct v3d_qpu_sig *map;
241
242        if (devinfo->ver >= 41)
243                map = v41_sig_map;
244        else if (devinfo->ver == 40)
245                map = v40_sig_map;
246        else
247                map = v33_sig_map;
248
249        for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
250                if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
251                        *packed_sig = i;
252                        return true;
253                }
254        }
255
256        return false;
257}
258static inline unsigned
259fui( float f )
260{
261        union {float f; unsigned ui;} fi;
262   fi.f = f;
263   return fi.ui;
264}
265
266static const uint32_t small_immediates[] = {
267        0, 1, 2, 3,
268        4, 5, 6, 7,
269        8, 9, 10, 11,
270        12, 13, 14, 15,
271        -16, -15, -14, -13,
272        -12, -11, -10, -9,
273        -8, -7, -6, -5,
274        -4, -3, -2, -1,
275        0x3b800000, /* 2.0^-8 */
276        0x3c000000, /* 2.0^-7 */
277        0x3c800000, /* 2.0^-6 */
278        0x3d000000, /* 2.0^-5 */
279        0x3d800000, /* 2.0^-4 */
280        0x3e000000, /* 2.0^-3 */
281        0x3e800000, /* 2.0^-2 */
282        0x3f000000, /* 2.0^-1 */
283        0x3f800000, /* 2.0^0 */
284        0x40000000, /* 2.0^1 */
285        0x40800000, /* 2.0^2 */
286        0x41000000, /* 2.0^3 */
287        0x41800000, /* 2.0^4 */
288        0x42000000, /* 2.0^5 */
289        0x42800000, /* 2.0^6 */
290        0x43000000, /* 2.0^7 */
291};
292
293bool
294v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
295                         uint32_t packed_small_immediate,
296                         uint32_t *small_immediate)
297{
298        if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
299                return false;
300
301        *small_immediate = small_immediates[packed_small_immediate];
302        return true;
303}
304
305bool
306v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
307                       uint32_t value,
308                       uint32_t *packed_small_immediate)
309{
310        STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
311
312        for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
313                if (small_immediates[i] == value) {
314                        *packed_small_immediate = i;
315                        return true;
316                }
317        }
318
319        return false;
320}
321
322bool
323v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
324                     uint32_t packed_cond,
325                     struct v3d_qpu_flags *cond)
326{
327        static const enum v3d_qpu_cond cond_map[4] = {
328                [0] = V3D_QPU_COND_IFA,
329                [1] = V3D_QPU_COND_IFB,
330                [2] = V3D_QPU_COND_IFNA,
331                [3] = V3D_QPU_COND_IFNB,
332        };
333
334        cond->ac = V3D_QPU_COND_NONE;
335        cond->mc = V3D_QPU_COND_NONE;
336        cond->apf = V3D_QPU_PF_NONE;
337        cond->mpf = V3D_QPU_PF_NONE;
338        cond->auf = V3D_QPU_UF_NONE;
339        cond->muf = V3D_QPU_UF_NONE;
340
341        if (packed_cond == 0) {
342                return true;
343        } else if (packed_cond >> 2 == 0) {
344                cond->apf = packed_cond & 0x3;
345        } else if (packed_cond >> 4 == 0) {
346                cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347        } else if (packed_cond == 0x10) {
348                return false;
349        } else if (packed_cond >> 2 == 0x4) {
350                cond->mpf = packed_cond & 0x3;
351        } else if (packed_cond >> 4 == 0x1) {
352                cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
353        } else if (packed_cond >> 4 == 0x2) {
354                cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
355                cond->mpf = packed_cond & 0x3;
356        } else if (packed_cond >> 4 == 0x3) {
357                cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
358                cond->apf = packed_cond & 0x3;
359        } else if (packed_cond >> 6) {
360                cond->mc = cond_map[(packed_cond >> 4) & 0x3];
361                if (((packed_cond >> 2) & 0x3) == 0) {
362                        cond->ac = cond_map[packed_cond & 0x3];
363                } else {
364                        cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
365                }
366        }
367
368        return true;
369}
370
371bool
372v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
373                   const struct v3d_qpu_flags *cond,
374                   uint32_t *packed_cond)
375{
376#define AC (1 << 0)
377#define MC (1 << 1)
378#define APF (1 << 2)
379#define MPF (1 << 3)
380#define AUF (1 << 4)
381#define MUF (1 << 5)
382        static const struct {
383                uint8_t flags_present;
384                uint8_t bits;
385        } flags_table[] = {
386                { 0,        0 },
387                { APF,      0 },
388                { AUF,      0 },
389                { MPF,      (1 << 4) },
390                { MUF,      (1 << 4) },
391                { AC,       (1 << 5) },
392                { AC | MPF, (1 << 5) },
393                { MC,       (1 << 5) | (1 << 4) },
394                { MC | APF, (1 << 5) | (1 << 4) },
395                { MC | AC,  (1 << 6) },
396                { MC | AUF, (1 << 6) },
397        };
398
399        uint8_t flags_present = 0;
400        if (cond->ac != V3D_QPU_COND_NONE)
401                flags_present |= AC;
402        if (cond->mc != V3D_QPU_COND_NONE)
403                flags_present |= MC;
404        if (cond->apf != V3D_QPU_PF_NONE)
405                flags_present |= APF;
406        if (cond->mpf != V3D_QPU_PF_NONE)
407                flags_present |= MPF;
408        if (cond->auf != V3D_QPU_UF_NONE)
409                flags_present |= AUF;
410        if (cond->muf != V3D_QPU_UF_NONE)
411                flags_present |= MUF;
412
413        for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
414                if (flags_table[i].flags_present != flags_present)
415                        continue;
416
417                *packed_cond = flags_table[i].bits;
418
419                *packed_cond |= cond->apf;
420                *packed_cond |= cond->mpf;
421
422                if (flags_present & AUF)
423                        *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
424                if (flags_present & MUF)
425                        *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
426
427                if (flags_present & AC)
428                        *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
429
430                if (flags_present & MC) {
431                        if (*packed_cond & (1 << 6))
432                                *packed_cond |= (cond->mc -
433                                                 V3D_QPU_COND_IFA) << 4;
434                        else
435                                *packed_cond |= (cond->mc -
436                                                 V3D_QPU_COND_IFA) << 2;
437                }
438
439                return true;
440        }
441
442        return false;
443}
444
445/* Make a mapping of the table of opcodes in the spec.  The opcode is
446 * determined by a combination of the opcode field, and in the case of 0 or
447 * 1-arg opcodes, the mux_b field as well.
448 */
449#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
450#define ANYMUX MUX_MASK(0, 7)
451
452struct opcode_desc {
453        uint8_t opcode_first;
454        uint8_t opcode_last;
455        uint8_t mux_b_mask;
456        uint8_t mux_a_mask;
457        uint8_t op;
458        /* 0 if it's the same across V3D versions, or a specific V3D version. */
459        uint8_t ver;
460};
461
462static const struct opcode_desc add_ops[] = {
463        /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
464        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
465        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
466        { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
467        { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
468        { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
469        { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
470        { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
471        { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
472        { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
473        { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
474        { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
475        { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
476        { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
477        { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
478        { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
479        { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
480        /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
481        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
482        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
483        { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
484
485        { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
486        { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
487        { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
488
489        { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
490        { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
491        { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
492        { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
493        { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
494        { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
495        { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
496        { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
497        { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
498        { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
499        { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
500        { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
501        { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
502        { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
503        { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
504        { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
505        { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
506        { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
507
508        { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
509        { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
510        { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
511        { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
512
513        { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
514        { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
515        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
516        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
517        { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
518        { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
519        { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
520        { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
521
522        { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
523        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
524        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
525        { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
526        { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
527        { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
528        { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
529        { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
530        { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
531        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
532
533        /* FIXME: MORE COMPLICATED */
534        /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
535
536        { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
537        { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
538
539        { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
540        { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
541        { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
542        { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
543        { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
544        { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
545        { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
546        { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
547
548        { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
549        { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
550
551        /* The stvpms are distinguished by the waddr field. */
552        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
553        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
554        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
555
556        { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
557        { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
558        { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
559};
560
561static const struct opcode_desc mul_ops[] = {
562        { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
563        { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
564        { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
565        { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
566        { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
567        { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
568        { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
569        { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
570        { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
571        { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
572        { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
573};
574
575static const struct opcode_desc *
576lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
577              uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
578{
579        for (int i = 0; i < num_opcodes; i++) {
580                const struct opcode_desc *op_desc = &opcodes[i];
581
582                if (opcode < op_desc->opcode_first ||
583                    opcode > op_desc->opcode_last)
584                        continue;
585
586                if (!(op_desc->mux_b_mask & (1 << mux_b)))
587                        continue;
588
589                if (!(op_desc->mux_a_mask & (1 << mux_a)))
590                        continue;
591
592                return op_desc;
593        }
594
595        return NULL;
596}
597
598static bool
599v3d_qpu_float32_unpack_unpack(uint32_t packed,
600                              enum v3d_qpu_input_unpack *unpacked)
601{
602        switch (packed) {
603        case 0:
604                *unpacked = V3D_QPU_UNPACK_ABS;
605                return true;
606        case 1:
607                *unpacked = V3D_QPU_UNPACK_NONE;
608                return true;
609        case 2:
610                *unpacked = V3D_QPU_UNPACK_L;
611                return true;
612        case 3:
613                *unpacked = V3D_QPU_UNPACK_H;
614                return true;
615        default:
616                return false;
617        }
618}
619
620static bool
621v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
622                            uint32_t *packed)
623{
624        switch (unpacked) {
625        case V3D_QPU_UNPACK_ABS:
626                *packed = 0;
627                return true;
628        case V3D_QPU_UNPACK_NONE:
629                *packed = 1;
630                return true;
631        case V3D_QPU_UNPACK_L:
632                *packed = 2;
633                return true;
634        case V3D_QPU_UNPACK_H:
635                *packed = 3;
636                return true;
637        default:
638                return false;
639        }
640}
641
642static bool
643v3d_qpu_float16_unpack_unpack(uint32_t packed,
644                              enum v3d_qpu_input_unpack *unpacked)
645{
646        switch (packed) {
647        case 0:
648                *unpacked = V3D_QPU_UNPACK_NONE;
649                return true;
650        case 1:
651                *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
652                return true;
653        case 2:
654                *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
655                return true;
656        case 3:
657                *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
658                return true;
659        case 4:
660                *unpacked = V3D_QPU_UNPACK_SWAP_16;
661                return true;
662        default:
663                return false;
664        }
665}
666
667static bool
668v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
669                            uint32_t *packed)
670{
671        switch (unpacked) {
672        case V3D_QPU_UNPACK_NONE:
673                *packed = 0;
674                return true;
675        case V3D_QPU_UNPACK_REPLICATE_32F_16:
676                *packed = 1;
677                return true;
678        case V3D_QPU_UNPACK_REPLICATE_L_16:
679                *packed = 2;
680                return true;
681        case V3D_QPU_UNPACK_REPLICATE_H_16:
682                *packed = 3;
683                return true;
684        case V3D_QPU_UNPACK_SWAP_16:
685                *packed = 4;
686                return true;
687        default:
688                return false;
689        }
690}
691
692static bool
693v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
694                          uint32_t *packed)
695{
696        switch (unpacked) {
697        case V3D_QPU_PACK_NONE:
698                *packed = 0;
699                return true;
700        case V3D_QPU_PACK_L:
701                *packed = 1;
702                return true;
703        case V3D_QPU_PACK_H:
704                *packed = 2;
705                return true;
706        default:
707                return false;
708        }
709}
710
711static bool
712v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
713                   struct v3d_qpu_instr *instr)
714{
715        uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
716        uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
717        uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
718        uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
719
720        uint32_t map_op = op;
721        /* Some big clusters of opcodes are replicated with unpack
722         * flags
723         */
724        if (map_op >= 249 && map_op <= 251)
725                map_op = (map_op - 249 + 245);
726        if (map_op >= 253 && map_op <= 255)
727                map_op = (map_op - 253 + 245);
728
729        const struct opcode_desc *desc =
730                lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
731                              map_op, mux_a, mux_b);
732        if (!desc)
733                return false;
734
735        instr->alu.add.op = desc->op;
736
737        /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
738         * operands.
739         */
740        if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
741                if (instr->alu.add.op == V3D_QPU_A_FMIN)
742                        instr->alu.add.op = V3D_QPU_A_FMAX;
743                if (instr->alu.add.op == V3D_QPU_A_FADD)
744                        instr->alu.add.op = V3D_QPU_A_FADDNF;
745        }
746
747        /* Some QPU ops require a bit more than just basic opcode and mux a/b
748         * comparisons to distinguish them.
749         */
750        switch (instr->alu.add.op) {
751        case V3D_QPU_A_STVPMV:
752        case V3D_QPU_A_STVPMD:
753        case V3D_QPU_A_STVPMP:
754                switch (waddr) {
755                case 0:
756                        instr->alu.add.op = V3D_QPU_A_STVPMV;
757                        break;
758                case 1:
759                        instr->alu.add.op = V3D_QPU_A_STVPMD;
760                        break;
761                case 2:
762                        instr->alu.add.op = V3D_QPU_A_STVPMP;
763                        break;
764                default:
765                        return false;
766                }
767                break;
768        default:
769                break;
770        }
771
772        switch (instr->alu.add.op) {
773        case V3D_QPU_A_FADD:
774        case V3D_QPU_A_FADDNF:
775        case V3D_QPU_A_FSUB:
776        case V3D_QPU_A_FMIN:
777        case V3D_QPU_A_FMAX:
778        case V3D_QPU_A_FCMP:
779        case V3D_QPU_A_VFPACK:
780                if (instr->alu.add.op != V3D_QPU_A_VFPACK)
781                        instr->alu.add.output_pack = (op >> 4) & 0x3;
782                else
783                        instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
784
785                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
786                                                   &instr->alu.add.a_unpack)) {
787                        return false;
788                }
789
790                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
791                                                   &instr->alu.add.b_unpack)) {
792                        return false;
793                }
794                break;
795
796        case V3D_QPU_A_FFLOOR:
797        case V3D_QPU_A_FROUND:
798        case V3D_QPU_A_FTRUNC:
799        case V3D_QPU_A_FCEIL:
800        case V3D_QPU_A_FDX:
801        case V3D_QPU_A_FDY:
802                instr->alu.add.output_pack = mux_b & 0x3;
803
804                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
805                                                   &instr->alu.add.a_unpack)) {
806                        return false;
807                }
808                break;
809
810        case V3D_QPU_A_FTOIN:
811        case V3D_QPU_A_FTOIZ:
812        case V3D_QPU_A_FTOUZ:
813        case V3D_QPU_A_FTOC:
814                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
815
816                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
817                                                   &instr->alu.add.a_unpack)) {
818                        return false;
819                }
820                break;
821
822        case V3D_QPU_A_VFMIN:
823        case V3D_QPU_A_VFMAX:
824                if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
825                                                   &instr->alu.add.a_unpack)) {
826                        return false;
827                }
828
829                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
830                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
831                break;
832
833        default:
834                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
835                instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
836                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
837                break;
838        }
839
840        instr->alu.add.a = mux_a;
841        instr->alu.add.b = mux_b;
842        instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
843
844        instr->alu.add.magic_write = false;
845        if (packed_inst & VC5_QPU_MA) {
846                switch (instr->alu.add.op) {
847                case V3D_QPU_A_LDVPMV_IN:
848                        instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
849                        break;
850                case V3D_QPU_A_LDVPMD_IN:
851                        instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
852                        break;
853                case V3D_QPU_A_LDVPMG_IN:
854                        instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
855                        break;
856                default:
857                        instr->alu.add.magic_write = true;
858                        break;
859                }
860        }
861
862        return true;
863}
864
865static bool
866v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
867                   struct v3d_qpu_instr *instr)
868{
869        uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
870        uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
871        uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
872
873        {
874                const struct opcode_desc *desc =
875                        lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
876                                      op, mux_a, mux_b);
877                if (!desc)
878                        return false;
879
880                instr->alu.mul.op = desc->op;
881        }
882
883        switch (instr->alu.mul.op) {
884        case V3D_QPU_M_FMUL:
885                instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
886
887                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
888                                                   &instr->alu.mul.a_unpack)) {
889                        return false;
890                }
891
892                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
893                                                   &instr->alu.mul.b_unpack)) {
894                        return false;
895                }
896
897                break;
898
899        case V3D_QPU_M_FMOV:
900                instr->alu.mul.output_pack = (((op & 1) << 1) +
901                                              ((mux_b >> 2) & 1));
902
903                if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
904                                                   &instr->alu.mul.a_unpack)) {
905                        return false;
906                }
907
908                break;
909
910        case V3D_QPU_M_VFMUL:
911                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
912
913                if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
914                                                   &instr->alu.mul.a_unpack)) {
915                        return false;
916                }
917
918                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
919
920                break;
921
922        default:
923                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
924                instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
925                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
926                break;
927        }
928
929        instr->alu.mul.a = mux_a;
930        instr->alu.mul.b = mux_b;
931        instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
932        instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
933
934        return true;
935}
936
937static bool
938v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
939                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
940{
941        uint32_t waddr = instr->alu.add.waddr;
942        uint32_t mux_a = instr->alu.add.a;
943        uint32_t mux_b = instr->alu.add.b;
944        int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
945        const struct opcode_desc *desc;
946
947        int opcode;
948        for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
949             desc++) {
950                if (desc->op == instr->alu.add.op)
951                        break;
952        }
953        if (desc == &add_ops[ARRAY_SIZE(add_ops)])
954                return false;
955
956        opcode = desc->opcode_first;
957
958        /* If an operation doesn't use an arg, its mux values may be used to
959         * identify the operation type.
960         */
961        if (nsrc < 2)
962                mux_b = ffs(desc->mux_b_mask) - 1;
963
964        if (nsrc < 1)
965                mux_a = ffs(desc->mux_a_mask) - 1;
966
967        bool no_magic_write = false;
968
969        switch (instr->alu.add.op) {
970        case V3D_QPU_A_STVPMV:
971                waddr = 0;
972                no_magic_write = true;
973                break;
974        case V3D_QPU_A_STVPMD:
975                waddr = 1;
976                no_magic_write = true;
977                break;
978        case V3D_QPU_A_STVPMP:
979                waddr = 2;
980                no_magic_write = true;
981                break;
982
983        case V3D_QPU_A_LDVPMV_IN:
984        case V3D_QPU_A_LDVPMD_IN:
985        case V3D_QPU_A_LDVPMP:
986        case V3D_QPU_A_LDVPMG_IN:
987                assert(!instr->alu.add.magic_write);
988                break;
989
990        case V3D_QPU_A_LDVPMV_OUT:
991        case V3D_QPU_A_LDVPMD_OUT:
992        case V3D_QPU_A_LDVPMG_OUT:
993                assert(!instr->alu.add.magic_write);
994                *packed_instr |= VC5_QPU_MA;
995                break;
996
997        default:
998                break;
999        }
1000
1001        switch (instr->alu.add.op) {
1002        case V3D_QPU_A_FADD:
1003        case V3D_QPU_A_FADDNF:
1004        case V3D_QPU_A_FSUB:
1005        case V3D_QPU_A_FMIN:
1006        case V3D_QPU_A_FMAX:
1007        case V3D_QPU_A_FCMP: {
1008                uint32_t output_pack;
1009                uint32_t a_unpack;
1010                uint32_t b_unpack;
1011
1012                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1013                                               &output_pack)) {
1014                        return false;
1015                }
1016                opcode |= output_pack << 4;
1017
1018                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1019                                                 &a_unpack)) {
1020                        return false;
1021                }
1022
1023                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1024                                                 &b_unpack)) {
1025                        return false;
1026                }
1027
1028                /* These operations with commutative operands are
1029                 * distinguished by which order their operands come in.
1030                 */
1031                bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1032                if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1033                      instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1034                    ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1035                      instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1036                        uint32_t temp;
1037
1038                        temp = a_unpack;
1039                        a_unpack = b_unpack;
1040                        b_unpack = temp;
1041
1042                        temp = mux_a;
1043                        mux_a = mux_b;
1044                        mux_b = temp;
1045                }
1046
1047                opcode |= a_unpack << 2;
1048                opcode |= b_unpack << 0;
1049
1050                break;
1051        }
1052
1053        case V3D_QPU_A_VFPACK: {
1054                uint32_t a_unpack;
1055                uint32_t b_unpack;
1056
1057                if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1058                    instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1059                        return false;
1060                }
1061
1062                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1063                                                 &a_unpack)) {
1064                        return false;
1065                }
1066
1067                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1068                                                 &b_unpack)) {
1069                        return false;
1070                }
1071
1072                opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1073                opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1074
1075                break;
1076        }
1077
1078        case V3D_QPU_A_FFLOOR:
1079        case V3D_QPU_A_FROUND:
1080        case V3D_QPU_A_FTRUNC:
1081        case V3D_QPU_A_FCEIL:
1082        case V3D_QPU_A_FDX:
1083        case V3D_QPU_A_FDY: {
1084                uint32_t packed;
1085
1086                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1087                                               &packed)) {
1088                        return false;
1089                }
1090                mux_b |= packed;
1091
1092                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1093                                                 &packed)) {
1094                        return false;
1095                }
1096                if (packed == 0)
1097                        return false;
1098                opcode = (opcode & ~(1 << 2)) | packed << 2;
1099                break;
1100        }
1101
1102        case V3D_QPU_A_FTOIN:
1103        case V3D_QPU_A_FTOIZ:
1104        case V3D_QPU_A_FTOUZ:
1105        case V3D_QPU_A_FTOC:
1106                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1107                        return false;
1108
1109                uint32_t packed;
1110                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1111                                                 &packed)) {
1112                        return false;
1113                }
1114                if (packed == 0)
1115                        return false;
1116                opcode |= packed << 2;
1117
1118                break;
1119
1120        case V3D_QPU_A_VFMIN:
1121        case V3D_QPU_A_VFMAX:
1122                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1123                    instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1124                        return false;
1125                }
1126
1127                if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1128                                                 &packed)) {
1129                        return false;
1130                }
1131                opcode |= packed;
1132                break;
1133
1134        default:
1135                if (instr->alu.add.op != V3D_QPU_A_NOP &&
1136                    (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1137                     instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1138                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1139                        return false;
1140                }
1141                break;
1142        }
1143
1144        *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1145        *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1146        *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1147        *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1148        if (instr->alu.add.magic_write && !no_magic_write)
1149                *packed_instr |= VC5_QPU_MA;
1150
1151        return true;
1152}
1153
1154static bool
1155v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1156                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1157{
1158        uint32_t mux_a = instr->alu.mul.a;
1159        uint32_t mux_b = instr->alu.mul.b;
1160        int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1161        const struct opcode_desc *desc;
1162
1163        for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1164             desc++) {
1165                if (desc->op == instr->alu.mul.op)
1166                        break;
1167        }
1168        if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1169                return false;
1170
1171        uint32_t opcode = desc->opcode_first;
1172
1173        /* Some opcodes have a single valid value for their mux a/b, so set
1174         * that here.  If mux a/b determine packing, it will be set below.
1175         */
1176        if (nsrc < 2)
1177                mux_b = ffs(desc->mux_b_mask) - 1;
1178
1179        if (nsrc < 1)
1180                mux_a = ffs(desc->mux_a_mask) - 1;
1181
1182        switch (instr->alu.mul.op) {
1183        case V3D_QPU_M_FMUL: {
1184                uint32_t packed;
1185
1186                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1187                                               &packed)) {
1188                        return false;
1189                }
1190                /* No need for a +1 because desc->opcode_first has a 1 in this
1191                 * field.
1192                 */
1193                opcode += packed << 4;
1194
1195                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1196                                                 &packed)) {
1197                        return false;
1198                }
1199                opcode |= packed << 2;
1200
1201                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1202                                                 &packed)) {
1203                        return false;
1204                }
1205                opcode |= packed << 0;
1206                break;
1207        }
1208
1209        case V3D_QPU_M_FMOV: {
1210                uint32_t packed;
1211
1212                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1213                                               &packed)) {
1214                        return false;
1215                }
1216                opcode |= (packed >> 1) & 1;
1217                mux_b = (packed & 1) << 2;
1218
1219                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1220                                                 &packed)) {
1221                        return false;
1222                }
1223                mux_b |= packed;
1224                break;
1225        }
1226
1227        case V3D_QPU_M_VFMUL: {
1228                uint32_t packed;
1229
1230                if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1231                        return false;
1232
1233                if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1234                                                 &packed)) {
1235                        return false;
1236                }
1237                if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1238                        opcode = 8;
1239                else
1240                        opcode |= (packed + 4) & 7;
1241
1242                if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1243                        return false;
1244
1245                break;
1246        }
1247
1248        default:
1249                break;
1250        }
1251
1252        *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1253        *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1254
1255        *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1256        *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1257        if (instr->alu.mul.magic_write)
1258                *packed_instr |= VC5_QPU_MM;
1259
1260        return true;
1261}
1262
1263static bool
1264v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1265                         uint64_t packed_instr,
1266                         struct v3d_qpu_instr *instr)
1267{
1268        instr->type = V3D_QPU_INSTR_TYPE_ALU;
1269
1270        if (!v3d_qpu_sig_unpack(devinfo,
1271                                QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1272                                &instr->sig))
1273                return false;
1274
1275        uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1276        if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1277                instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1278                instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1279
1280                instr->flags.ac = V3D_QPU_COND_NONE;
1281                instr->flags.mc = V3D_QPU_COND_NONE;
1282                instr->flags.apf = V3D_QPU_PF_NONE;
1283                instr->flags.mpf = V3D_QPU_PF_NONE;
1284                instr->flags.auf = V3D_QPU_UF_NONE;
1285                instr->flags.muf = V3D_QPU_UF_NONE;
1286        } else {
1287                if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1288                        return false;
1289        }
1290
1291        instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1292        instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1293
1294        if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1295                return false;
1296
1297        if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1298                return false;
1299
1300        return true;
1301}
1302
1303static bool
1304v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1305                            uint64_t packed_instr,
1306                            struct v3d_qpu_instr *instr)
1307{
1308        instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1309
1310        uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1311        if (cond == 0)
1312                instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1313        else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1314                 V3D_QPU_BRANCH_COND_ALLNA)
1315                instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1316        else
1317                return false;
1318
1319        uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1320        if (msfign == 3)
1321                return false;
1322        instr->branch.msfign = msfign;
1323
1324        instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1325
1326        instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1327        if (instr->branch.ub) {
1328                instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1329                                                  VC5_QPU_BRANCH_BDU);
1330        }
1331
1332        instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1333                                              VC5_QPU_RADDR_A);
1334
1335        instr->branch.offset = 0;
1336
1337        instr->branch.offset +=
1338                QPU_GET_FIELD(packed_instr,
1339                              VC5_QPU_BRANCH_ADDR_LOW) << 3;
1340
1341        instr->branch.offset +=
1342                QPU_GET_FIELD(packed_instr,
1343                              VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1344
1345        return true;
1346}
1347
1348bool
1349v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1350                     uint64_t packed_instr,
1351                     struct v3d_qpu_instr *instr)
1352{
1353        if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1354                return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1355        } else {
1356                uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1357
1358                if ((sig & 24) == 16) {
1359                        return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1360                                                           instr);
1361                } else {
1362                        return false;
1363                }
1364        }
1365}
1366
1367static bool
1368v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1369                       const struct v3d_qpu_instr *instr,
1370                       uint64_t *packed_instr)
1371{
1372        uint32_t sig;
1373        if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1374                return false;
1375        *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1376
1377        if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1378                *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1379                *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1380
1381                if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1382                        return false;
1383                if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1384                        return false;
1385
1386                uint32_t flags;
1387                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1388                        if (instr->flags.ac != V3D_QPU_COND_NONE ||
1389                            instr->flags.mc != V3D_QPU_COND_NONE ||
1390                            instr->flags.apf != V3D_QPU_PF_NONE ||
1391                            instr->flags.mpf != V3D_QPU_PF_NONE ||
1392                            instr->flags.auf != V3D_QPU_UF_NONE ||
1393                            instr->flags.muf != V3D_QPU_UF_NONE) {
1394                                return false;
1395                        }
1396
1397                        flags = instr->sig_addr;
1398                        if (instr->sig_magic)
1399                                flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1400                } else {
1401                        if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1402                                return false;
1403                }
1404
1405                *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1406        } else {
1407                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1408                        return false;
1409        }
1410
1411        return true;
1412}
1413
1414static bool
1415v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1416                          const struct v3d_qpu_instr *instr,
1417                          uint64_t *packed_instr)
1418{
1419        *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1420
1421        if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1422                *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1423                                                    V3D_QPU_BRANCH_COND_A0),
1424                                               VC5_QPU_BRANCH_COND);
1425        }
1426
1427        *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1428                                       VC5_QPU_BRANCH_MSFIGN);
1429
1430        *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1431                                       VC5_QPU_BRANCH_BDI);
1432
1433        if (instr->branch.ub) {
1434                *packed_instr |= VC5_QPU_BRANCH_UB;
1435                *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1436                                               VC5_QPU_BRANCH_BDU);
1437        }
1438
1439        switch (instr->branch.bdi) {
1440        case V3D_QPU_BRANCH_DEST_ABS:
1441        case V3D_QPU_BRANCH_DEST_REL:
1442                *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1443                                               VC5_QPU_BRANCH_MSFIGN);
1444
1445                *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1446                                                ~0xff000000) >> 3,
1447                                               VC5_QPU_BRANCH_ADDR_LOW);
1448
1449                *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1450                                               VC5_QPU_BRANCH_ADDR_HIGH);
1451
1452        case V3D_QPU_BRANCH_DEST_REGFILE:
1453                *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1454                                               VC5_QPU_RADDR_A);
1455                break;
1456
1457        default:
1458                break;
1459        }
1460
1461        return true;
1462}
1463
1464bool
1465v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1466                   const struct v3d_qpu_instr *instr,
1467                   uint64_t *packed_instr)
1468{
1469        *packed_instr = 0;
1470
1471        switch (instr->type) {
1472        case V3D_QPU_INSTR_TYPE_ALU:
1473                return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1474        case V3D_QPU_INSTR_TYPE_BRANCH:
1475                return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1476        default:
1477                return false;
1478        }
1479}
1480