1/*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <string.h>
25#include "util/macros.h"
26#include "util/bitscan.h"
27
28#include "broadcom/common/v3d_device_info.h"
29#include "qpu_instr.h"
30
31#ifndef QPU_MASK
32#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33/* Using the GNU statement expression extension */
34#define QPU_SET_FIELD(value, field)                                       \
35        ({                                                                \
36                uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                assert((fieldval & ~ field ## _MASK) == 0);               \
38                fieldval & field ## _MASK;                                \
39         })
40
41#define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42
43#define QPU_UPDATE_FIELD(inst, value, field)                              \
44        (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45#endif /* QPU_MASK */
46
47#define V3D_QPU_OP_MUL_SHIFT                58
48#define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49
50#define V3D_QPU_SIG_SHIFT                   53
51#define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52
53#define V3D_QPU_COND_SHIFT                  46
54#define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55#define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56
57#define V3D_QPU_MM                          QPU_MASK(45, 45)
58#define V3D_QPU_MA                          QPU_MASK(44, 44)
59
60#define V3D_QPU_WADDR_M_SHIFT               38
61#define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62
63#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64#define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65
66#define V3D_QPU_WADDR_A_SHIFT               32
67#define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68
69#define V3D_QPU_BRANCH_COND_SHIFT           32
70#define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71
72#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73#define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74
75#define V3D_QPU_OP_ADD_SHIFT                24
76#define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77
78#define V3D_QPU_MUL_B_SHIFT                 21
79#define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80
81#define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82#define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83
84#define V3D_QPU_MUL_A_SHIFT                 18
85#define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86
87#define V3D_QPU_ADD_B_SHIFT                 15
88#define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
89
90#define V3D_QPU_BRANCH_BDU_SHIFT            15
91#define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
92
93#define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
94
95#define V3D_QPU_ADD_A_SHIFT                 12
96#define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
97
98#define V3D_QPU_BRANCH_BDI_SHIFT            12
99#define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
100
101#define V3D_QPU_RADDR_A_SHIFT               6
102#define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
103
104#define V3D_QPU_RADDR_B_SHIFT               0
105#define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
106
107#define THRSW .thrsw = true
108#define LDUNIF .ldunif = true
109#define LDUNIFRF .ldunifrf = true
110#define LDUNIFA .ldunifa = true
111#define LDUNIFARF .ldunifarf = true
112#define LDTMU .ldtmu = true
113#define LDVARY .ldvary = true
114#define LDVPM .ldvpm = true
115#define SMIMM .small_imm = true
116#define LDTLB .ldtlb = true
117#define LDTLBU .ldtlbu = true
118#define UCB .ucb = true
119#define ROT .rotate = true
120#define WRTMUC .wrtmuc = true
121
122static const struct v3d_qpu_sig v33_sig_map[] = {
123        /*      MISC   R3       R4      R5 */
124        [0]  = {                               },
125        [1]  = { THRSW,                        },
126        [2]  = {                        LDUNIF },
127        [3]  = { THRSW,                 LDUNIF },
128        [4]  = {                LDTMU,         },
129        [5]  = { THRSW,         LDTMU,         },
130        [6]  = {                LDTMU,  LDUNIF },
131        [7]  = { THRSW,         LDTMU,  LDUNIF },
132        [8]  = {        LDVARY,                },
133        [9]  = { THRSW, LDVARY,                },
134        [10] = {        LDVARY,         LDUNIF },
135        [11] = { THRSW, LDVARY,         LDUNIF },
136        [12] = {        LDVARY, LDTMU,         },
137        [13] = { THRSW, LDVARY, LDTMU,         },
138        [14] = { SMIMM, LDVARY,                },
139        [15] = { SMIMM,                        },
140        [16] = {        LDTLB,                 },
141        [17] = {        LDTLBU,                },
142        /* 18-21 reserved */
143        [22] = { UCB,                          },
144        [23] = { ROT,                          },
145        [24] = {        LDVPM,                 },
146        [25] = { THRSW, LDVPM,                 },
147        [26] = {        LDVPM,          LDUNIF },
148        [27] = { THRSW, LDVPM,          LDUNIF },
149        [28] = {        LDVPM, LDTMU,          },
150        [29] = { THRSW, LDVPM, LDTMU,          },
151        [30] = { SMIMM, LDVPM,                 },
152        [31] = { SMIMM,                        },
153};
154
155static const struct v3d_qpu_sig v40_sig_map[] = {
156        /*      MISC    R3      R4      R5 */
157        [0]  = {                               },
158        [1]  = { THRSW,                        },
159        [2]  = {                        LDUNIF },
160        [3]  = { THRSW,                 LDUNIF },
161        [4]  = {                LDTMU,         },
162        [5]  = { THRSW,         LDTMU,         },
163        [6]  = {                LDTMU,  LDUNIF },
164        [7]  = { THRSW,         LDTMU,  LDUNIF },
165        [8]  = {        LDVARY,                },
166        [9]  = { THRSW, LDVARY,                },
167        [10] = {        LDVARY,         LDUNIF },
168        [11] = { THRSW, LDVARY,         LDUNIF },
169        /* 12-13 reserved */
170        [14] = { SMIMM, LDVARY,                },
171        [15] = { SMIMM,                        },
172        [16] = {        LDTLB,                 },
173        [17] = {        LDTLBU,                },
174        [18] = {                        WRTMUC },
175        [19] = { THRSW,                 WRTMUC },
176        [20] = {        LDVARY,         WRTMUC },
177        [21] = { THRSW, LDVARY,         WRTMUC },
178        [22] = { UCB,                          },
179        [23] = { ROT,                          },
180        /* 24-30 reserved */
181        [31] = { SMIMM,         LDTMU,         },
182};
183
184static const struct v3d_qpu_sig v41_sig_map[] = {
185        /*      MISC       phys    R5 */
186        [0]  = {                          },
187        [1]  = { THRSW,                   },
188        [2]  = {                   LDUNIF },
189        [3]  = { THRSW,            LDUNIF },
190        [4]  = {           LDTMU,         },
191        [5]  = { THRSW,    LDTMU,         },
192        [6]  = {           LDTMU,  LDUNIF },
193        [7]  = { THRSW,    LDTMU,  LDUNIF },
194        [8]  = {           LDVARY,        },
195        [9]  = { THRSW,    LDVARY,        },
196        [10] = {           LDVARY, LDUNIF },
197        [11] = { THRSW,    LDVARY, LDUNIF },
198        [12] = { LDUNIFRF                 },
199        [13] = { THRSW,    LDUNIFRF       },
200        [14] = { SMIMM,    LDVARY,        },
201        [15] = { SMIMM,                   },
202        [16] = {           LDTLB,         },
203        [17] = {           LDTLBU,        },
204        [18] = {                          WRTMUC },
205        [19] = { THRSW,                   WRTMUC },
206        [20] = {           LDVARY,        WRTMUC },
207        [21] = { THRSW,    LDVARY,        WRTMUC },
208        [22] = { UCB,                     },
209        [23] = { ROT,                     },
210        [24] = {                   LDUNIFA},
211        [25] = { LDUNIFARF                },
212        /* 26-30 reserved */
213        [31] = { SMIMM,            LDTMU, },
214};
215
216bool
217v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218                   uint32_t packed_sig,
219                   struct v3d_qpu_sig *sig)
220{
221        if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222                return false;
223
224        if (devinfo->ver >= 41)
225                *sig = v41_sig_map[packed_sig];
226        else if (devinfo->ver == 40)
227                *sig = v40_sig_map[packed_sig];
228        else
229                *sig = v33_sig_map[packed_sig];
230
231        /* Signals with zeroed unpacked contents after element 0 are reserved. */
232        return (packed_sig == 0 ||
233                memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234}
235
236bool
237v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238                 const struct v3d_qpu_sig *sig,
239                 uint32_t *packed_sig)
240{
241        static const struct v3d_qpu_sig *map;
242
243        if (devinfo->ver >= 41)
244                map = v41_sig_map;
245        else if (devinfo->ver == 40)
246                map = v40_sig_map;
247        else
248                map = v33_sig_map;
249
250        for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251                if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252                        *packed_sig = i;
253                        return true;
254                }
255        }
256
257        return false;
258}
259static inline unsigned
260fui( float f )
261{
262        union {float f; unsigned ui;} fi;
263   fi.f = f;
264   return fi.ui;
265}
266
267static const uint32_t small_immediates[] = {
268        0, 1, 2, 3,
269        4, 5, 6, 7,
270        8, 9, 10, 11,
271        12, 13, 14, 15,
272        -16, -15, -14, -13,
273        -12, -11, -10, -9,
274        -8, -7, -6, -5,
275        -4, -3, -2, -1,
276        0x3b800000, /* 2.0^-8 */
277        0x3c000000, /* 2.0^-7 */
278        0x3c800000, /* 2.0^-6 */
279        0x3d000000, /* 2.0^-5 */
280        0x3d800000, /* 2.0^-4 */
281        0x3e000000, /* 2.0^-3 */
282        0x3e800000, /* 2.0^-2 */
283        0x3f000000, /* 2.0^-1 */
284        0x3f800000, /* 2.0^0 */
285        0x40000000, /* 2.0^1 */
286        0x40800000, /* 2.0^2 */
287        0x41000000, /* 2.0^3 */
288        0x41800000, /* 2.0^4 */
289        0x42000000, /* 2.0^5 */
290        0x42800000, /* 2.0^6 */
291        0x43000000, /* 2.0^7 */
292};
293
294bool
295v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
296                         uint32_t packed_small_immediate,
297                         uint32_t *small_immediate)
298{
299        if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
300                return false;
301
302        *small_immediate = small_immediates[packed_small_immediate];
303        return true;
304}
305
306bool
307v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
308                       uint32_t value,
309                       uint32_t *packed_small_immediate)
310{
311        STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
312
313        for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
314                if (small_immediates[i] == value) {
315                        *packed_small_immediate = i;
316                        return true;
317                }
318        }
319
320        return false;
321}
322
323bool
324v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
325                     uint32_t packed_cond,
326                     struct v3d_qpu_flags *cond)
327{
328        static const enum v3d_qpu_cond cond_map[4] = {
329                [0] = V3D_QPU_COND_IFA,
330                [1] = V3D_QPU_COND_IFB,
331                [2] = V3D_QPU_COND_IFNA,
332                [3] = V3D_QPU_COND_IFNB,
333        };
334
335        cond->ac = V3D_QPU_COND_NONE;
336        cond->mc = V3D_QPU_COND_NONE;
337        cond->apf = V3D_QPU_PF_NONE;
338        cond->mpf = V3D_QPU_PF_NONE;
339        cond->auf = V3D_QPU_UF_NONE;
340        cond->muf = V3D_QPU_UF_NONE;
341
342        if (packed_cond == 0) {
343                return true;
344        } else if (packed_cond >> 2 == 0) {
345                cond->apf = packed_cond & 0x3;
346        } else if (packed_cond >> 4 == 0) {
347                cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
348        } else if (packed_cond == 0x10) {
349                return false;
350        } else if (packed_cond >> 2 == 0x4) {
351                cond->mpf = packed_cond & 0x3;
352        } else if (packed_cond >> 4 == 0x1) {
353                cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
354        } else if (packed_cond >> 4 == 0x2) {
355                cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
356                cond->mpf = packed_cond & 0x3;
357        } else if (packed_cond >> 4 == 0x3) {
358                cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
359                cond->apf = packed_cond & 0x3;
360        } else if (packed_cond >> 6) {
361                cond->mc = cond_map[(packed_cond >> 4) & 0x3];
362                if (((packed_cond >> 2) & 0x3) == 0) {
363                        cond->ac = cond_map[packed_cond & 0x3];
364                } else {
365                        cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
366                }
367        }
368
369        return true;
370}
371
372bool
373v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
374                   const struct v3d_qpu_flags *cond,
375                   uint32_t *packed_cond)
376{
377#define AC (1 << 0)
378#define MC (1 << 1)
379#define APF (1 << 2)
380#define MPF (1 << 3)
381#define AUF (1 << 4)
382#define MUF (1 << 5)
383        static const struct {
384                uint8_t flags_present;
385                uint8_t bits;
386        } flags_table[] = {
387                { 0,        0 },
388                { APF,      0 },
389                { AUF,      0 },
390                { MPF,      (1 << 4) },
391                { MUF,      (1 << 4) },
392                { AC,       (1 << 5) },
393                { AC | MPF, (1 << 5) },
394                { MC,       (1 << 5) | (1 << 4) },
395                { MC | APF, (1 << 5) | (1 << 4) },
396                { MC | AC,  (1 << 6) },
397                { MC | AUF, (1 << 6) },
398        };
399
400        uint8_t flags_present = 0;
401        if (cond->ac != V3D_QPU_COND_NONE)
402                flags_present |= AC;
403        if (cond->mc != V3D_QPU_COND_NONE)
404                flags_present |= MC;
405        if (cond->apf != V3D_QPU_PF_NONE)
406                flags_present |= APF;
407        if (cond->mpf != V3D_QPU_PF_NONE)
408                flags_present |= MPF;
409        if (cond->auf != V3D_QPU_UF_NONE)
410                flags_present |= AUF;
411        if (cond->muf != V3D_QPU_UF_NONE)
412                flags_present |= MUF;
413
414        for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
415                if (flags_table[i].flags_present != flags_present)
416                        continue;
417
418                *packed_cond = flags_table[i].bits;
419
420                *packed_cond |= cond->apf;
421                *packed_cond |= cond->mpf;
422
423                if (flags_present & AUF)
424                        *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
425                if (flags_present & MUF)
426                        *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
427
428                if (flags_present & AC)
429                        *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
430
431                if (flags_present & MC) {
432                        if (*packed_cond & (1 << 6))
433                                *packed_cond |= (cond->mc -
434                                                 V3D_QPU_COND_IFA) << 4;
435                        else
436                                *packed_cond |= (cond->mc -
437                                                 V3D_QPU_COND_IFA) << 2;
438                }
439
440                return true;
441        }
442
443        return false;
444}
445
446/* Make a mapping of the table of opcodes in the spec.  The opcode is
447 * determined by a combination of the opcode field, and in the case of 0 or
448 * 1-arg opcodes, the mux_b field as well.
449 */
450#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
451#define ANYMUX MUX_MASK(0, 7)
452
453struct opcode_desc {
454        uint8_t opcode_first;
455        uint8_t opcode_last;
456        uint8_t mux_b_mask;
457        uint8_t mux_a_mask;
458        uint8_t op;
459
460        /* first_ver == 0 if it's the same across all V3D versions.
461         * first_ver == X, last_ver == 0 if it's the same for all V3D versions
462         *   starting from X
463         * first_ver == X, last_ver == Y if it's the same for all V3D versions
464         *   on the range X through Y
465         */
466        uint8_t first_ver;
467        uint8_t last_ver;
468};
469
470static const struct opcode_desc add_ops[] = {
471        /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
472        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
473        { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
474        { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
475        { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
476        { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
477        { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
478        { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
479        { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
480        { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
481        { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
482        { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
483        { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
484        { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
485        { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
486        { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
487        { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
488        /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
489        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
490        { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
491        { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
492
493        { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
494        { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
495        { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
496
497        { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
498        { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
499        { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
500        { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
501        { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
502        { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
503        { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
504        { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
505        { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
506        { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
507        { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
508        { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
509        { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
510        { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
511        { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
512        { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
513        { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
514        { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
515
516        { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
517        { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
518        { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
519        { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
520
521        { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
522        { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
523        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
524        { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
525        { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
526        { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
527        { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
528        { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
529        { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
530        { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
531        { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
532
533        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
534        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
535        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
536        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
537        { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
538        { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
539        { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
540        { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
541        { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
542        { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
543        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
544        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
545
546        /* FIXME: MORE COMPLICATED */
547        /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
548
549        { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
550        { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
551
552        { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
553        { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
554        { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
555        { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
556        { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
557        { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
558        { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
559        { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
560
561        { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
562        { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
563
564        /* The stvpms are distinguished by the waddr field. */
565        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
566        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
567        { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
568
569        { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
570        { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
571        { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
572};
573
574static const struct opcode_desc mul_ops[] = {
575        { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
576        { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
577        { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
578        { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
579        { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
580        { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
581        { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
582        { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
583        { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
584        { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
585        { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
586};
587
588/* Returns true if op_desc should be filtered out based on devinfo->ver
589 * against op_desc->first_ver and op_desc->last_ver. Check notes about
590 * first_ver/last_ver on struct opcode_desc comments.
591 */
592static bool
593opcode_invalid_in_version(const struct v3d_device_info *devinfo,
594                          const struct opcode_desc *op_desc)
595{
596        return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
597                (op_desc->last_ver != 0  && devinfo->ver > op_desc->last_ver);
598}
599
600static const struct opcode_desc *
601lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
602                          const struct opcode_desc *opcodes,
603                          size_t num_opcodes, uint32_t opcode,
604                          uint32_t mux_a, uint32_t mux_b)
605{
606        for (int i = 0; i < num_opcodes; i++) {
607                const struct opcode_desc *op_desc = &opcodes[i];
608
609                if (opcode < op_desc->opcode_first ||
610                    opcode > op_desc->opcode_last)
611                        continue;
612
613                if (opcode_invalid_in_version(devinfo, op_desc))
614                        continue;
615
616                if (!(op_desc->mux_b_mask & (1 << mux_b)))
617                        continue;
618
619                if (!(op_desc->mux_a_mask & (1 << mux_a)))
620                        continue;
621
622                return op_desc;
623        }
624
625        return NULL;
626}
627
628static bool
629v3d_qpu_float32_unpack_unpack(uint32_t packed,
630                              enum v3d_qpu_input_unpack *unpacked)
631{
632        switch (packed) {
633        case 0:
634                *unpacked = V3D_QPU_UNPACK_ABS;
635                return true;
636        case 1:
637                *unpacked = V3D_QPU_UNPACK_NONE;
638                return true;
639        case 2:
640                *unpacked = V3D_QPU_UNPACK_L;
641                return true;
642        case 3:
643                *unpacked = V3D_QPU_UNPACK_H;
644                return true;
645        default:
646                return false;
647        }
648}
649
650static bool
651v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
652                            uint32_t *packed)
653{
654        switch (unpacked) {
655        case V3D_QPU_UNPACK_ABS:
656                *packed = 0;
657                return true;
658        case V3D_QPU_UNPACK_NONE:
659                *packed = 1;
660                return true;
661        case V3D_QPU_UNPACK_L:
662                *packed = 2;
663                return true;
664        case V3D_QPU_UNPACK_H:
665                *packed = 3;
666                return true;
667        default:
668                return false;
669        }
670}
671
672static bool
673v3d_qpu_float16_unpack_unpack(uint32_t packed,
674                              enum v3d_qpu_input_unpack *unpacked)
675{
676        switch (packed) {
677        case 0:
678                *unpacked = V3D_QPU_UNPACK_NONE;
679                return true;
680        case 1:
681                *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
682                return true;
683        case 2:
684                *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
685                return true;
686        case 3:
687                *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
688                return true;
689        case 4:
690                *unpacked = V3D_QPU_UNPACK_SWAP_16;
691                return true;
692        default:
693                return false;
694        }
695}
696
697static bool
698v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
699                            uint32_t *packed)
700{
701        switch (unpacked) {
702        case V3D_QPU_UNPACK_NONE:
703                *packed = 0;
704                return true;
705        case V3D_QPU_UNPACK_REPLICATE_32F_16:
706                *packed = 1;
707                return true;
708        case V3D_QPU_UNPACK_REPLICATE_L_16:
709                *packed = 2;
710                return true;
711        case V3D_QPU_UNPACK_REPLICATE_H_16:
712                *packed = 3;
713                return true;
714        case V3D_QPU_UNPACK_SWAP_16:
715                *packed = 4;
716                return true;
717        default:
718                return false;
719        }
720}
721
722static bool
723v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
724                          uint32_t *packed)
725{
726        switch (unpacked) {
727        case V3D_QPU_PACK_NONE:
728                *packed = 0;
729                return true;
730        case V3D_QPU_PACK_L:
731                *packed = 1;
732                return true;
733        case V3D_QPU_PACK_H:
734                *packed = 2;
735                return true;
736        default:
737                return false;
738        }
739}
740
741static bool
742v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
743                   struct v3d_qpu_instr *instr)
744{
745        uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
746        uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
747        uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
748        uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
749
750        uint32_t map_op = op;
751        /* Some big clusters of opcodes are replicated with unpack
752         * flags
753         */
754        if (map_op >= 249 && map_op <= 251)
755                map_op = (map_op - 249 + 245);
756        if (map_op >= 253 && map_op <= 255)
757                map_op = (map_op - 253 + 245);
758
759        const struct opcode_desc *desc =
760                lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
761                                          map_op, mux_a, mux_b);
762
763        if (!desc)
764                return false;
765
766        instr->alu.add.op = desc->op;
767
768        /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
769         * operands.
770         */
771        if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
772                if (instr->alu.add.op == V3D_QPU_A_FMIN)
773                        instr->alu.add.op = V3D_QPU_A_FMAX;
774                if (instr->alu.add.op == V3D_QPU_A_FADD)
775                        instr->alu.add.op = V3D_QPU_A_FADDNF;
776        }
777
778        /* Some QPU ops require a bit more than just basic opcode and mux a/b
779         * comparisons to distinguish them.
780         */
781        switch (instr->alu.add.op) {
782        case V3D_QPU_A_STVPMV:
783        case V3D_QPU_A_STVPMD:
784        case V3D_QPU_A_STVPMP:
785                switch (waddr) {
786                case 0:
787                        instr->alu.add.op = V3D_QPU_A_STVPMV;
788                        break;
789                case 1:
790                        instr->alu.add.op = V3D_QPU_A_STVPMD;
791                        break;
792                case 2:
793                        instr->alu.add.op = V3D_QPU_A_STVPMP;
794                        break;
795                default:
796                        return false;
797                }
798                break;
799        default:
800                break;
801        }
802
803        switch (instr->alu.add.op) {
804        case V3D_QPU_A_FADD:
805        case V3D_QPU_A_FADDNF:
806        case V3D_QPU_A_FSUB:
807        case V3D_QPU_A_FMIN:
808        case V3D_QPU_A_FMAX:
809        case V3D_QPU_A_FCMP:
810        case V3D_QPU_A_VFPACK:
811                if (instr->alu.add.op != V3D_QPU_A_VFPACK)
812                        instr->alu.add.output_pack = (op >> 4) & 0x3;
813                else
814                        instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
815
816                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
817                                                   &instr->alu.add.a_unpack)) {
818                        return false;
819                }
820
821                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
822                                                   &instr->alu.add.b_unpack)) {
823                        return false;
824                }
825                break;
826
827        case V3D_QPU_A_FFLOOR:
828        case V3D_QPU_A_FROUND:
829        case V3D_QPU_A_FTRUNC:
830        case V3D_QPU_A_FCEIL:
831        case V3D_QPU_A_FDX:
832        case V3D_QPU_A_FDY:
833                instr->alu.add.output_pack = mux_b & 0x3;
834
835                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
836                                                   &instr->alu.add.a_unpack)) {
837                        return false;
838                }
839                break;
840
841        case V3D_QPU_A_FTOIN:
842        case V3D_QPU_A_FTOIZ:
843        case V3D_QPU_A_FTOUZ:
844        case V3D_QPU_A_FTOC:
845                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
846
847                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
848                                                   &instr->alu.add.a_unpack)) {
849                        return false;
850                }
851                break;
852
853        case V3D_QPU_A_VFMIN:
854        case V3D_QPU_A_VFMAX:
855                if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
856                                                   &instr->alu.add.a_unpack)) {
857                        return false;
858                }
859
860                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
861                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
862                break;
863
864        default:
865                instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
866                instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
867                instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
868                break;
869        }
870
871        instr->alu.add.a = mux_a;
872        instr->alu.add.b = mux_b;
873        instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
874
875        instr->alu.add.magic_write = false;
876        if (packed_inst & V3D_QPU_MA) {
877                switch (instr->alu.add.op) {
878                case V3D_QPU_A_LDVPMV_IN:
879                        instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
880                        break;
881                case V3D_QPU_A_LDVPMD_IN:
882                        instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
883                        break;
884                case V3D_QPU_A_LDVPMG_IN:
885                        instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
886                        break;
887                default:
888                        instr->alu.add.magic_write = true;
889                        break;
890                }
891        }
892
893        return true;
894}
895
896static bool
897v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
898                   struct v3d_qpu_instr *instr)
899{
900        uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
901        uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
902        uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
903
904        {
905                const struct opcode_desc *desc =
906                        lookup_opcode_from_packed(devinfo, mul_ops,
907                                                  ARRAY_SIZE(mul_ops),
908                                                  op, mux_a, mux_b);
909                if (!desc)
910                        return false;
911
912                instr->alu.mul.op = desc->op;
913        }
914
915        switch (instr->alu.mul.op) {
916        case V3D_QPU_M_FMUL:
917                instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
918
919                if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
920                                                   &instr->alu.mul.a_unpack)) {
921                        return false;
922                }
923
924                if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
925                                                   &instr->alu.mul.b_unpack)) {
926                        return false;
927                }
928
929                break;
930
931        case V3D_QPU_M_FMOV:
932                instr->alu.mul.output_pack = (((op & 1) << 1) +
933                                              ((mux_b >> 2) & 1));
934
935                if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
936                                                   &instr->alu.mul.a_unpack)) {
937                        return false;
938                }
939
940                break;
941
942        case V3D_QPU_M_VFMUL:
943                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
944
945                if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
946                                                   &instr->alu.mul.a_unpack)) {
947                        return false;
948                }
949
950                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
951
952                break;
953
954        default:
955                instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
956                instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
957                instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
958                break;
959        }
960
961        instr->alu.mul.a = mux_a;
962        instr->alu.mul.b = mux_b;
963        instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
964        instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
965
966        return true;
967}
968
969static const struct opcode_desc *
970lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
971                         const struct opcode_desc *opcodes, size_t num_opcodes,
972                         uint8_t op)
973{
974        for (int i = 0; i < num_opcodes; i++) {
975                const struct opcode_desc *op_desc = &opcodes[i];
976
977                if (op_desc->op != op)
978                        continue;
979
980                if (opcode_invalid_in_version(devinfo, op_desc))
981                        continue;
982
983                return op_desc;
984        }
985
986        return NULL;
987}
988
989static bool
990v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
991                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
992{
993        uint32_t waddr = instr->alu.add.waddr;
994        uint32_t mux_a = instr->alu.add.a;
995        uint32_t mux_b = instr->alu.add.b;
996        int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
997        const struct opcode_desc *desc =
998                lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
999                                         instr->alu.add.op);
1000
1001        if (!desc)
1002                return false;
1003
1004        uint32_t opcode = desc->opcode_first;
1005
1006        /* If an operation doesn't use an arg, its mux values may be used to
1007         * identify the operation type.
1008         */
1009        if (nsrc < 2)
1010                mux_b = ffs(desc->mux_b_mask) - 1;
1011
1012        if (nsrc < 1)
1013                mux_a = ffs(desc->mux_a_mask) - 1;
1014
1015        bool no_magic_write = false;
1016
1017        switch (instr->alu.add.op) {
1018        case V3D_QPU_A_STVPMV:
1019                waddr = 0;
1020                no_magic_write = true;
1021                break;
1022        case V3D_QPU_A_STVPMD:
1023                waddr = 1;
1024                no_magic_write = true;
1025                break;
1026        case V3D_QPU_A_STVPMP:
1027                waddr = 2;
1028                no_magic_write = true;
1029                break;
1030
1031        case V3D_QPU_A_LDVPMV_IN:
1032        case V3D_QPU_A_LDVPMD_IN:
1033        case V3D_QPU_A_LDVPMP:
1034        case V3D_QPU_A_LDVPMG_IN:
1035                assert(!instr->alu.add.magic_write);
1036                break;
1037
1038        case V3D_QPU_A_LDVPMV_OUT:
1039        case V3D_QPU_A_LDVPMD_OUT:
1040        case V3D_QPU_A_LDVPMG_OUT:
1041                assert(!instr->alu.add.magic_write);
1042                *packed_instr |= V3D_QPU_MA;
1043                break;
1044
1045        default:
1046                break;
1047        }
1048
1049        switch (instr->alu.add.op) {
1050        case V3D_QPU_A_FADD:
1051        case V3D_QPU_A_FADDNF:
1052        case V3D_QPU_A_FSUB:
1053        case V3D_QPU_A_FMIN:
1054        case V3D_QPU_A_FMAX:
1055        case V3D_QPU_A_FCMP: {
1056                uint32_t output_pack;
1057                uint32_t a_unpack;
1058                uint32_t b_unpack;
1059
1060                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1061                                               &output_pack)) {
1062                        return false;
1063                }
1064                opcode |= output_pack << 4;
1065
1066                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1067                                                 &a_unpack)) {
1068                        return false;
1069                }
1070
1071                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1072                                                 &b_unpack)) {
1073                        return false;
1074                }
1075
1076                /* These operations with commutative operands are
1077                 * distinguished by which order their operands come in.
1078                 */
1079                bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1080                if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1081                      instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1082                    ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1083                      instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1084                        uint32_t temp;
1085
1086                        temp = a_unpack;
1087                        a_unpack = b_unpack;
1088                        b_unpack = temp;
1089
1090                        temp = mux_a;
1091                        mux_a = mux_b;
1092                        mux_b = temp;
1093                }
1094
1095                opcode |= a_unpack << 2;
1096                opcode |= b_unpack << 0;
1097
1098                break;
1099        }
1100
1101        case V3D_QPU_A_VFPACK: {
1102                uint32_t a_unpack;
1103                uint32_t b_unpack;
1104
1105                if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1106                    instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1107                        return false;
1108                }
1109
1110                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1111                                                 &a_unpack)) {
1112                        return false;
1113                }
1114
1115                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1116                                                 &b_unpack)) {
1117                        return false;
1118                }
1119
1120                opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1121                opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1122
1123                break;
1124        }
1125
1126        case V3D_QPU_A_FFLOOR:
1127        case V3D_QPU_A_FROUND:
1128        case V3D_QPU_A_FTRUNC:
1129        case V3D_QPU_A_FCEIL:
1130        case V3D_QPU_A_FDX:
1131        case V3D_QPU_A_FDY: {
1132                uint32_t packed;
1133
1134                if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1135                                               &packed)) {
1136                        return false;
1137                }
1138                mux_b |= packed;
1139
1140                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1141                                                 &packed)) {
1142                        return false;
1143                }
1144                if (packed == 0)
1145                        return false;
1146                opcode = (opcode & ~(1 << 2)) | packed << 2;
1147                break;
1148        }
1149
1150        case V3D_QPU_A_FTOIN:
1151        case V3D_QPU_A_FTOIZ:
1152        case V3D_QPU_A_FTOUZ:
1153        case V3D_QPU_A_FTOC:
1154                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1155                        return false;
1156
1157                uint32_t packed;
1158                if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1159                                                 &packed)) {
1160                        return false;
1161                }
1162                if (packed == 0)
1163                        return false;
1164                opcode |= packed << 2;
1165
1166                break;
1167
1168        case V3D_QPU_A_VFMIN:
1169        case V3D_QPU_A_VFMAX:
1170                if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1171                    instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1172                        return false;
1173                }
1174
1175                if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1176                                                 &packed)) {
1177                        return false;
1178                }
1179                opcode |= packed;
1180                break;
1181
1182        default:
1183                if (instr->alu.add.op != V3D_QPU_A_NOP &&
1184                    (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1185                     instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1186                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1187                        return false;
1188                }
1189                break;
1190        }
1191
1192        *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1193        *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1194        *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1195        *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1196        if (instr->alu.add.magic_write && !no_magic_write)
1197                *packed_instr |= V3D_QPU_MA;
1198
1199        return true;
1200}
1201
1202static bool
1203v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1204                 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1205{
1206        uint32_t mux_a = instr->alu.mul.a;
1207        uint32_t mux_b = instr->alu.mul.b;
1208        int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1209
1210        const struct opcode_desc *desc =
1211                lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
1212                                         instr->alu.mul.op);
1213
1214        if (!desc)
1215                return false;
1216
1217        uint32_t opcode = desc->opcode_first;
1218
1219        /* Some opcodes have a single valid value for their mux a/b, so set
1220         * that here.  If mux a/b determine packing, it will be set below.
1221         */
1222        if (nsrc < 2)
1223                mux_b = ffs(desc->mux_b_mask) - 1;
1224
1225        if (nsrc < 1)
1226                mux_a = ffs(desc->mux_a_mask) - 1;
1227
1228        switch (instr->alu.mul.op) {
1229        case V3D_QPU_M_FMUL: {
1230                uint32_t packed;
1231
1232                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1233                                               &packed)) {
1234                        return false;
1235                }
1236                /* No need for a +1 because desc->opcode_first has a 1 in this
1237                 * field.
1238                 */
1239                opcode += packed << 4;
1240
1241                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1242                                                 &packed)) {
1243                        return false;
1244                }
1245                opcode |= packed << 2;
1246
1247                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1248                                                 &packed)) {
1249                        return false;
1250                }
1251                opcode |= packed << 0;
1252                break;
1253        }
1254
1255        case V3D_QPU_M_FMOV: {
1256                uint32_t packed;
1257
1258                if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1259                                               &packed)) {
1260                        return false;
1261                }
1262                opcode |= (packed >> 1) & 1;
1263                mux_b = (packed & 1) << 2;
1264
1265                if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1266                                                 &packed)) {
1267                        return false;
1268                }
1269                mux_b |= packed;
1270                break;
1271        }
1272
1273        case V3D_QPU_M_VFMUL: {
1274                uint32_t packed;
1275
1276                if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1277                        return false;
1278
1279                if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1280                                                 &packed)) {
1281                        return false;
1282                }
1283                if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1284                        opcode = 8;
1285                else
1286                        opcode |= (packed + 4) & 7;
1287
1288                if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1289                        return false;
1290
1291                break;
1292        }
1293
1294        default:
1295                break;
1296        }
1297
1298        *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1299        *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1300
1301        *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1302        *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1303        if (instr->alu.mul.magic_write)
1304                *packed_instr |= V3D_QPU_MM;
1305
1306        return true;
1307}
1308
1309static bool
1310v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1311                         uint64_t packed_instr,
1312                         struct v3d_qpu_instr *instr)
1313{
1314        instr->type = V3D_QPU_INSTR_TYPE_ALU;
1315
1316        if (!v3d_qpu_sig_unpack(devinfo,
1317                                QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1318                                &instr->sig))
1319                return false;
1320
1321        uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1322        if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1323                instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1324                instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1325
1326                instr->flags.ac = V3D_QPU_COND_NONE;
1327                instr->flags.mc = V3D_QPU_COND_NONE;
1328                instr->flags.apf = V3D_QPU_PF_NONE;
1329                instr->flags.mpf = V3D_QPU_PF_NONE;
1330                instr->flags.auf = V3D_QPU_UF_NONE;
1331                instr->flags.muf = V3D_QPU_UF_NONE;
1332        } else {
1333                if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1334                        return false;
1335        }
1336
1337        instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1338        instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1339
1340        if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1341                return false;
1342
1343        if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1344                return false;
1345
1346        return true;
1347}
1348
1349static bool
1350v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1351                            uint64_t packed_instr,
1352                            struct v3d_qpu_instr *instr)
1353{
1354        instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1355
1356        uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1357        if (cond == 0)
1358                instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1359        else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1360                 V3D_QPU_BRANCH_COND_ALLNA)
1361                instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1362        else
1363                return false;
1364
1365        uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1366        if (msfign == 3)
1367                return false;
1368        instr->branch.msfign = msfign;
1369
1370        instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1371
1372        instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1373        if (instr->branch.ub) {
1374                instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1375                                                  V3D_QPU_BRANCH_BDU);
1376        }
1377
1378        instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1379                                              V3D_QPU_RADDR_A);
1380
1381        instr->branch.offset = 0;
1382
1383        instr->branch.offset +=
1384                QPU_GET_FIELD(packed_instr,
1385                              V3D_QPU_BRANCH_ADDR_LOW) << 3;
1386
1387        instr->branch.offset +=
1388                QPU_GET_FIELD(packed_instr,
1389                              V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1390
1391        return true;
1392}
1393
1394bool
1395v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1396                     uint64_t packed_instr,
1397                     struct v3d_qpu_instr *instr)
1398{
1399        if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1400                return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1401        } else {
1402                uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1403
1404                if ((sig & 24) == 16) {
1405                        return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1406                                                           instr);
1407                } else {
1408                        return false;
1409                }
1410        }
1411}
1412
1413static bool
1414v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1415                       const struct v3d_qpu_instr *instr,
1416                       uint64_t *packed_instr)
1417{
1418        uint32_t sig;
1419        if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1420                return false;
1421        *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1422
1423        if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1424                *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1425                *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1426
1427                if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1428                        return false;
1429                if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1430                        return false;
1431
1432                uint32_t flags;
1433                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1434                        if (instr->flags.ac != V3D_QPU_COND_NONE ||
1435                            instr->flags.mc != V3D_QPU_COND_NONE ||
1436                            instr->flags.apf != V3D_QPU_PF_NONE ||
1437                            instr->flags.mpf != V3D_QPU_PF_NONE ||
1438                            instr->flags.auf != V3D_QPU_UF_NONE ||
1439                            instr->flags.muf != V3D_QPU_UF_NONE) {
1440                                return false;
1441                        }
1442
1443                        flags = instr->sig_addr;
1444                        if (instr->sig_magic)
1445                                flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1446                } else {
1447                        if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1448                                return false;
1449                }
1450
1451                *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1452        } else {
1453                if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1454                        return false;
1455        }
1456
1457        return true;
1458}
1459
1460static bool
1461v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1462                          const struct v3d_qpu_instr *instr,
1463                          uint64_t *packed_instr)
1464{
1465        *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1466
1467        if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1468                *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1469                                                    V3D_QPU_BRANCH_COND_A0),
1470                                               V3D_QPU_BRANCH_COND);
1471        }
1472
1473        *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1474                                       V3D_QPU_BRANCH_MSFIGN);
1475
1476        *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1477                                       V3D_QPU_BRANCH_BDI);
1478
1479        if (instr->branch.ub) {
1480                *packed_instr |= V3D_QPU_BRANCH_UB;
1481                *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1482                                               V3D_QPU_BRANCH_BDU);
1483        }
1484
1485        switch (instr->branch.bdi) {
1486        case V3D_QPU_BRANCH_DEST_ABS:
1487        case V3D_QPU_BRANCH_DEST_REL:
1488                *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1489                                               V3D_QPU_BRANCH_MSFIGN);
1490
1491                *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1492                                                ~0xff000000) >> 3,
1493                                               V3D_QPU_BRANCH_ADDR_LOW);
1494
1495                *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1496                                               V3D_QPU_BRANCH_ADDR_HIGH);
1497                break;
1498        default:
1499                break;
1500        }
1501
1502        if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1503            instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1504                *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1505                                               V3D_QPU_RADDR_A);
1506        }
1507
1508        return true;
1509}
1510
1511bool
1512v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1513                   const struct v3d_qpu_instr *instr,
1514                   uint64_t *packed_instr)
1515{
1516        *packed_instr = 0;
1517
1518        switch (instr->type) {
1519        case V3D_QPU_INSTR_TYPE_ALU:
1520                return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1521        case V3D_QPU_INSTR_TYPE_BRANCH:
1522                return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1523        default:
1524                return false;
1525        }
1526}
1527