1/*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "compiler.h"
25#include "bi_builder.h"
26
27/* Not all 8-bit and 16-bit instructions support all swizzles on all sources.
28 * These passes, intended to run after NIR->BIR but before scheduling/RA, lower
29 * away swizzles that cannot be represented. In the future, we should try to
30 * recombine swizzles where we can as an optimization.
31 */
32
33static void
34bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src)
35{
36        /* Identity is ok */
37        if (ins->src[src].swizzle == BI_SWIZZLE_H01)
38                return;
39
40        /* TODO: Use the opcode table and be a lot more methodical about this... */
41        switch (ins->op) {
42        /* Some instructions used with 16-bit data never have swizzles */
43        case BI_OPCODE_CSEL_V2F16:
44        case BI_OPCODE_CSEL_V2I16:
45        case BI_OPCODE_CSEL_V2S16:
46        case BI_OPCODE_CSEL_V2U16:
47
48        /* Despite ostensibly being 32-bit instructions, CLPER does not
49         * inherently interpret the data, so it can be used for v2f16
50         * derivatives, which might require swizzle lowering */
51        case BI_OPCODE_CLPER_I32:
52        case BI_OPCODE_CLPER_V6_I32:
53
54        /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
55         * boolean is implemented as a 16-bit integer, the swizzle is needed
56         * for correct operation if the instruction producing the 16-bit
57         * boolean does not replicate to both halves of the containing 32-bit
58         * register. As such, we may need to lower a swizzle.
59         *
60         * This is a silly hack. Ideally, code gen would be smart enough to
61         * avoid this case (by replicating). In practice, silly hardware design
62         * decisions force our hand here.
63         */
64        case BI_OPCODE_MUX_I32:
65        case BI_OPCODE_CSEL_I32:
66            break;
67
68        case BI_OPCODE_IADD_V2S16:
69        case BI_OPCODE_IADD_V2U16:
70        case BI_OPCODE_ISUB_V2S16:
71        case BI_OPCODE_ISUB_V2U16:
72            if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
73                    break;
74            else
75                    return;
76        case BI_OPCODE_LSHIFT_AND_V2I16:
77        case BI_OPCODE_LSHIFT_OR_V2I16:
78        case BI_OPCODE_LSHIFT_XOR_V2I16:
79        case BI_OPCODE_RSHIFT_AND_V2I16:
80        case BI_OPCODE_RSHIFT_OR_V2I16:
81        case BI_OPCODE_RSHIFT_XOR_V2I16:
82            if (src == 2)
83                    return;
84            else
85                    break;
86
87        /* We don't want to deal with reswizzling logic in modifier prop. Move
88         * the swizzle outside, it's easier for clamp propagation. */
89        case BI_OPCODE_FCLAMP_V2F16:
90        {
91                bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
92                bi_index dest = ins->dest[0];
93                bi_index tmp = bi_temp(ctx);
94
95                ins->dest[0] = tmp;
96                bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
97                return;
98        }
99
100        default:
101            return;
102        }
103
104        /* If the instruction is scalar we can ignore the other component */
105        if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
106                        ins->src[src].swizzle == BI_SWIZZLE_H00)
107        {
108                ins->src[src].swizzle = BI_SWIZZLE_H01;
109                return;
110        }
111
112        /* Lower it away */
113        bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
114        ins->src[src] = bi_replace_index(ins->src[src],
115                        bi_swz_v2i16(&b, ins->src[src]));
116        ins->src[src].swizzle = BI_SWIZZLE_H01;
117}
118
119void
120bi_lower_swizzle(bi_context *ctx)
121{
122        bi_foreach_instr_global_safe(ctx, ins) {
123                bi_foreach_src(ins, s) {
124                        if (!bi_is_null(ins->src[s]))
125                                bi_lower_swizzle_16(ctx, ins, s);
126                }
127        }
128}
129