1/* 2 * Copyright (C) 2020 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "compiler.h" 25#include "bi_builder.h" 26 27/* Not all 8-bit and 16-bit instructions support all swizzles on all sources. 28 * These passes, intended to run after NIR->BIR but before scheduling/RA, lower 29 * away swizzles that cannot be represented. In the future, we should try to 30 * recombine swizzles where we can as an optimization. 31 */ 32 33static void 34bi_lower_swizzle_16(bi_context *ctx, bi_instr *ins, unsigned src) 35{ 36 /* Identity is ok */ 37 if (ins->src[src].swizzle == BI_SWIZZLE_H01) 38 return; 39 40 /* TODO: Use the opcode table and be a lot more methodical about this... */ 41 switch (ins->op) { 42 /* Some instructions used with 16-bit data never have swizzles */ 43 case BI_OPCODE_CSEL_V2F16: 44 case BI_OPCODE_CSEL_V2I16: 45 case BI_OPCODE_CSEL_V2S16: 46 case BI_OPCODE_CSEL_V2U16: 47 48 /* Despite ostensibly being 32-bit instructions, CLPER does not 49 * inherently interpret the data, so it can be used for v2f16 50 * derivatives, which might require swizzle lowering */ 51 case BI_OPCODE_CLPER_I32: 52 case BI_OPCODE_CLPER_V6_I32: 53 54 /* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the 55 * boolean is implemented as a 16-bit integer, the swizzle is needed 56 * for correct operation if the instruction producing the 16-bit 57 * boolean does not replicate to both halves of the containing 32-bit 58 * register. As such, we may need to lower a swizzle. 59 * 60 * This is a silly hack. Ideally, code gen would be smart enough to 61 * avoid this case (by replicating). In practice, silly hardware design 62 * decisions force our hand here. 63 */ 64 case BI_OPCODE_MUX_I32: 65 case BI_OPCODE_CSEL_I32: 66 break; 67 68 case BI_OPCODE_IADD_V2S16: 69 case BI_OPCODE_IADD_V2U16: 70 case BI_OPCODE_ISUB_V2S16: 71 case BI_OPCODE_ISUB_V2U16: 72 if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10) 73 break; 74 else 75 return; 76 case BI_OPCODE_LSHIFT_AND_V2I16: 77 case BI_OPCODE_LSHIFT_OR_V2I16: 78 case BI_OPCODE_LSHIFT_XOR_V2I16: 79 case BI_OPCODE_RSHIFT_AND_V2I16: 80 case BI_OPCODE_RSHIFT_OR_V2I16: 81 case BI_OPCODE_RSHIFT_XOR_V2I16: 82 if (src == 2) 83 return; 84 else 85 break; 86 87 /* We don't want to deal with reswizzling logic in modifier prop. Move 88 * the swizzle outside, it's easier for clamp propagation. */ 89 case BI_OPCODE_FCLAMP_V2F16: 90 { 91 bi_builder b = bi_init_builder(ctx, bi_after_instr(ins)); 92 bi_index dest = ins->dest[0]; 93 bi_index tmp = bi_temp(ctx); 94 95 ins->dest[0] = tmp; 96 bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp)); 97 return; 98 } 99 100 default: 101 return; 102 } 103 104 /* If the instruction is scalar we can ignore the other component */ 105 if (ins->dest[0].swizzle == BI_SWIZZLE_H00 && 106 ins->src[src].swizzle == BI_SWIZZLE_H00) 107 { 108 ins->src[src].swizzle = BI_SWIZZLE_H01; 109 return; 110 } 111 112 /* Lower it away */ 113 bi_builder b = bi_init_builder(ctx, bi_before_instr(ins)); 114 ins->src[src] = bi_replace_index(ins->src[src], 115 bi_swz_v2i16(&b, ins->src[src])); 116 ins->src[src].swizzle = BI_SWIZZLE_H01; 117} 118 119void 120bi_lower_swizzle(bi_context *ctx) 121{ 122 bi_foreach_instr_global_safe(ctx, ins) { 123 bi_foreach_src(ins, s) { 124 if (!bi_is_null(ins->src[s])) 125 bi_lower_swizzle_16(ctx, ins, s); 126 } 127 } 128} 129