196c5ddc4Srjs/* 296c5ddc4Srjs * Copyright (C) 2020 Collabora, Ltd. 396c5ddc4Srjs * 496c5ddc4Srjs * Permission is hereby granted, free of charge, to any person obtaining a 596c5ddc4Srjs * copy of this software and associated documentation files (the "Software"), 696c5ddc4Srjs * to deal in the Software without restriction, including without limitation 796c5ddc4Srjs * the rights to use, copy, modify, merge, publish, distribute, sublicense, 896c5ddc4Srjs * and/or sell copies of the Software, and to permit persons to whom the 996c5ddc4Srjs * Software is furnished to do so, subject to the following conditions: 1096c5ddc4Srjs * 1196c5ddc4Srjs * The above copyright notice and this permission notice (including the next 1296c5ddc4Srjs * paragraph) shall be included in all copies or substantial portions of the 1396c5ddc4Srjs * Software. 1496c5ddc4Srjs * 1596c5ddc4Srjs * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1696c5ddc4Srjs * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1796c5ddc4Srjs * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1896c5ddc4Srjs * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1996c5ddc4Srjs * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 2096c5ddc4Srjs * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 2196c5ddc4Srjs * SOFTWARE. 2296c5ddc4Srjs */ 2396c5ddc4Srjs 2496c5ddc4Srjs/* Autogenerated file, do not edit */ 2596c5ddc4Srjs 2696c5ddc4Srjs#include "compiler.h" 2796c5ddc4Srjsstatic inline unsigned 2896c5ddc4Srjsbi_pack_fma_arshift_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2996c5ddc4Srjs{ 3096c5ddc4Srjs assert((1 << src1) & 0x8); 3196c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 3296c5ddc4Srjs assert(I->src[2].swizzle < 13); 3396c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 3496c5ddc4Srjs assert(lane2 < 4); 3596c5ddc4Srjs return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9); 3696c5ddc4Srjs} 3796c5ddc4Srjs 3896c5ddc4Srjsstatic inline unsigned 3996c5ddc4Srjsbi_pack_fma_arshift_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4096c5ddc4Srjs{ 4196c5ddc4Srjs assert((1 << src1) & 0x8); 4296c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 4396c5ddc4Srjs assert(I->src[2].swizzle < 13); 4496c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 4596c5ddc4Srjs assert(lanes2 < 8); 4696c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 4796c5ddc4Srjs unsigned derived_9 = 0; 4896c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 4996c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 5096c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 5196c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 5296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 5396c5ddc4Srjs 5496c5ddc4Srjs return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 5596c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 5696c5ddc4Srjs unsigned derived_9 = 0; 5796c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 5896c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 5996c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 6096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 6196c5ddc4Srjs 6296c5ddc4Srjs return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 6396c5ddc4Srjs } else { 6496c5ddc4Srjs unreachable("No matching state found in fma_arshift_v2i16"); 6596c5ddc4Srjs } 6696c5ddc4Srjs} 6796c5ddc4Srjs 6896c5ddc4Srjsstatic inline unsigned 6996c5ddc4Srjsbi_pack_fma_arshift_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 7096c5ddc4Srjs{ 7196c5ddc4Srjs assert((1 << src1) & 0x8); 7296c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 7396c5ddc4Srjs assert(I->src[2].swizzle < 13); 7496c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 7596c5ddc4Srjs assert(lanes2 < 8); 7696c5ddc4Srjs if (lanes2 != 0) { 7796c5ddc4Srjs unsigned derived_9 = 0; 7896c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 7996c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 8096c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 8196c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 8296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 8396c5ddc4Srjs 8496c5ddc4Srjs return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 8596c5ddc4Srjs } else if (lanes2 == 0) { 8696c5ddc4Srjs return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6); 8796c5ddc4Srjs } else { 8896c5ddc4Srjs unreachable("No matching state found in fma_arshift_v4i8"); 8996c5ddc4Srjs } 9096c5ddc4Srjs} 9196c5ddc4Srjs 9296c5ddc4Srjsstatic inline unsigned 9396c5ddc4Srjsbi_pack_fma_arshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 9496c5ddc4Srjs{ 9596c5ddc4Srjs unsigned bytes2 = I->bytes2; 9696c5ddc4Srjs assert(bytes2 < 2); 9796c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 9896c5ddc4Srjs assert(I->src[2].swizzle < 13); 9996c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 10096c5ddc4Srjs assert(lane2 < 2); 10196c5ddc4Srjs unsigned result_word = I->result_word; 10296c5ddc4Srjs assert(result_word < 2); 10396c5ddc4Srjs return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 10496c5ddc4Srjs} 10596c5ddc4Srjs 10696c5ddc4Srjsstatic inline unsigned 10796c5ddc4Srjsbi_pack_fma_atom_c_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 10896c5ddc4Srjs{ 10996c5ddc4Srjs assert((1 << src0) & 0xf3); 11096c5ddc4Srjs assert((1 << src1) & 0xf3); 11196c5ddc4Srjs assert((1 << src2) & 0xf7); 11296c5ddc4Srjs static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 11396c5ddc4Srjs assert(I->atom_opc < 16); 11496c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 11596c5ddc4Srjs assert(atom_opc < 16); 11696c5ddc4Srjs return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 11796c5ddc4Srjs} 11896c5ddc4Srjs 11996c5ddc4Srjsstatic inline unsigned 12096c5ddc4Srjsbi_pack_fma_atom_c_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 12196c5ddc4Srjs{ 12296c5ddc4Srjs assert((1 << src0) & 0xf3); 12396c5ddc4Srjs assert((1 << src1) & 0xf3); 12496c5ddc4Srjs assert((1 << src2) & 0xf7); 12596c5ddc4Srjs static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 12696c5ddc4Srjs assert(I->atom_opc < 16); 12796c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 12896c5ddc4Srjs assert(atom_opc < 16); 12996c5ddc4Srjs return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 13096c5ddc4Srjs} 13196c5ddc4Srjs 13296c5ddc4Srjsstatic inline unsigned 13396c5ddc4Srjsbi_pack_fma_atom_c1_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 13496c5ddc4Srjs{ 13596c5ddc4Srjs assert((1 << src0) & 0xf3); 13696c5ddc4Srjs assert((1 << src1) & 0xf3); 13796c5ddc4Srjs static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 13896c5ddc4Srjs assert(I->atom_opc < 16); 13996c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 14096c5ddc4Srjs assert(atom_opc < 8); 14196c5ddc4Srjs return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 14296c5ddc4Srjs} 14396c5ddc4Srjs 14496c5ddc4Srjsstatic inline unsigned 14596c5ddc4Srjsbi_pack_fma_atom_c1_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 14696c5ddc4Srjs{ 14796c5ddc4Srjs assert((1 << src0) & 0xf3); 14896c5ddc4Srjs assert((1 << src1) & 0xf3); 14996c5ddc4Srjs static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 15096c5ddc4Srjs assert(I->atom_opc < 16); 15196c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 15296c5ddc4Srjs assert(atom_opc < 8); 15396c5ddc4Srjs return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 15496c5ddc4Srjs} 15596c5ddc4Srjs 15696c5ddc4Srjsstatic inline unsigned 15796c5ddc4Srjsbi_pack_fma_atom_c1_return_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 15896c5ddc4Srjs{ 15996c5ddc4Srjs assert((1 << src0) & 0xf3); 16096c5ddc4Srjs assert((1 << src1) & 0xf3); 16196c5ddc4Srjs static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 16296c5ddc4Srjs assert(I->atom_opc < 16); 16396c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 16496c5ddc4Srjs assert(atom_opc < 8); 16596c5ddc4Srjs return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 16696c5ddc4Srjs} 16796c5ddc4Srjs 16896c5ddc4Srjsstatic inline unsigned 16996c5ddc4Srjsbi_pack_fma_atom_c1_return_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 17096c5ddc4Srjs{ 17196c5ddc4Srjs assert((1 << src0) & 0xf3); 17296c5ddc4Srjs assert((1 << src1) & 0xf3); 17396c5ddc4Srjs static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 17496c5ddc4Srjs assert(I->atom_opc < 16); 17596c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 17696c5ddc4Srjs assert(atom_opc < 8); 17796c5ddc4Srjs return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 17896c5ddc4Srjs} 17996c5ddc4Srjs 18096c5ddc4Srjsstatic inline unsigned 18196c5ddc4Srjsbi_pack_fma_atom_c_return_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 18296c5ddc4Srjs{ 18396c5ddc4Srjs assert((1 << src0) & 0xf3); 18496c5ddc4Srjs assert((1 << src1) & 0xf3); 18596c5ddc4Srjs assert((1 << src2) & 0xf7); 18696c5ddc4Srjs static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 18796c5ddc4Srjs assert(I->atom_opc < 16); 18896c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 18996c5ddc4Srjs assert(atom_opc < 16); 19096c5ddc4Srjs return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 19196c5ddc4Srjs} 19296c5ddc4Srjs 19396c5ddc4Srjsstatic inline unsigned 19496c5ddc4Srjsbi_pack_fma_atom_c_return_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 19596c5ddc4Srjs{ 19696c5ddc4Srjs assert((1 << src0) & 0xf3); 19796c5ddc4Srjs assert((1 << src1) & 0xf3); 19896c5ddc4Srjs assert((1 << src2) & 0xf7); 19996c5ddc4Srjs static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 20096c5ddc4Srjs assert(I->atom_opc < 16); 20196c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 20296c5ddc4Srjs assert(atom_opc < 16); 20396c5ddc4Srjs return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 20496c5ddc4Srjs} 20596c5ddc4Srjs 20696c5ddc4Srjsstatic inline unsigned 20796c5ddc4Srjsbi_pack_fma_atom_post_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 20896c5ddc4Srjs{ 20996c5ddc4Srjs static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 21096c5ddc4Srjs assert(I->atom_opc < 16); 21196c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 21296c5ddc4Srjs assert(atom_opc < 16); 21396c5ddc4Srjs return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 21496c5ddc4Srjs} 21596c5ddc4Srjs 21696c5ddc4Srjsstatic inline unsigned 21796c5ddc4Srjsbi_pack_fma_atom_post_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 21896c5ddc4Srjs{ 21996c5ddc4Srjs static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 22096c5ddc4Srjs assert(I->atom_opc < 16); 22196c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 22296c5ddc4Srjs assert(atom_opc < 16); 22396c5ddc4Srjs return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 22496c5ddc4Srjs} 22596c5ddc4Srjs 22696c5ddc4Srjsstatic inline unsigned 22796c5ddc4Srjsbi_pack_fma_atom_pre_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 22896c5ddc4Srjs{ 22996c5ddc4Srjs static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 23096c5ddc4Srjs assert(I->atom_opc < 16); 23196c5ddc4Srjs unsigned atom_opc = atom_opc_table[I->atom_opc]; 23296c5ddc4Srjs assert(atom_opc < 16); 23396c5ddc4Srjs return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 23496c5ddc4Srjs} 23596c5ddc4Srjs 23696c5ddc4Srjsstatic inline unsigned 23796c5ddc4Srjsbi_pack_fma_bitrev_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 23896c5ddc4Srjs{ 23996c5ddc4Srjs 24096c5ddc4Srjs return 0x701fc0 | (src0 << 0); 24196c5ddc4Srjs} 24296c5ddc4Srjs 24396c5ddc4Srjsstatic inline unsigned 24496c5ddc4Srjsbi_pack_fma_clz_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 24596c5ddc4Srjs{ 24696c5ddc4Srjs unsigned mask = I->mask; 24796c5ddc4Srjs assert(mask < 2); 24896c5ddc4Srjs return 0x701fd0 | (src0 << 0) | (mask << 3); 24996c5ddc4Srjs} 25096c5ddc4Srjs 25196c5ddc4Srjsstatic inline unsigned 25296c5ddc4Srjsbi_pack_fma_clz_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 25396c5ddc4Srjs{ 25496c5ddc4Srjs unsigned mask = I->mask; 25596c5ddc4Srjs assert(mask < 2); 25696c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 25796c5ddc4Srjs assert(I->src[0].swizzle < 13); 25896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 25996c5ddc4Srjs assert(swz0 < 4); 26096c5ddc4Srjs return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4); 26196c5ddc4Srjs} 26296c5ddc4Srjs 26396c5ddc4Srjsstatic inline unsigned 26496c5ddc4Srjsbi_pack_fma_clz_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 26596c5ddc4Srjs{ 26696c5ddc4Srjs unsigned mask = I->mask; 26796c5ddc4Srjs assert(mask < 2); 26896c5ddc4Srjs return 0x701f90 | (src0 << 0) | (mask << 3); 26996c5ddc4Srjs} 27096c5ddc4Srjs 27196c5ddc4Srjsstatic inline unsigned 27296c5ddc4Srjsbi_pack_fma_csel_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 27396c5ddc4Srjs{ 27496c5ddc4Srjs unsigned cmpf = I->cmpf; 27596c5ddc4Srjs assert(cmpf < 8); 27696c5ddc4Srjs if ((cmpf == 4) || (cmpf == 5)) { 27796c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 27896c5ddc4Srjs if (cmpf == 4) cmpf = 1; 27996c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 28096c5ddc4Srjs } 28196c5ddc4Srjs 28296c5ddc4Srjs if (cmpf == 3) { 28396c5ddc4Srjs { unsigned temp = src2; src2 = src3; src3 = temp; } 28496c5ddc4Srjs if (cmpf == 3) cmpf = 0; 28596c5ddc4Srjs } 28696c5ddc4Srjs 28796c5ddc4Srjs unsigned derived_12 = 0; 28896c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 28996c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 29096c5ddc4Srjs else if (cmpf == 2) derived_12 = 2; 29196c5ddc4Srjs else unreachable("No pattern match at pos 12"); 29296c5ddc4Srjs 29396c5ddc4Srjs return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 29496c5ddc4Srjs} 29596c5ddc4Srjs 29696c5ddc4Srjsstatic inline unsigned 29796c5ddc4Srjsbi_pack_fma_csel_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 29896c5ddc4Srjs{ 29996c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 30096c5ddc4Srjs assert(I->cmpf < 9); 30196c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 30296c5ddc4Srjs assert(cmpf < 2); 30396c5ddc4Srjs if (cmpf == 1) { 30496c5ddc4Srjs { unsigned temp = src2; src2 = src3; src3 = temp; } 30596c5ddc4Srjs if (cmpf == 1) cmpf = 0; 30696c5ddc4Srjs } 30796c5ddc4Srjs 30896c5ddc4Srjs unsigned derived_12 = 0; 30996c5ddc4Srjs if (cmpf == 0) derived_12 = 3; 31096c5ddc4Srjs else unreachable("No pattern match at pos 12"); 31196c5ddc4Srjs 31296c5ddc4Srjs return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 31396c5ddc4Srjs} 31496c5ddc4Srjs 31596c5ddc4Srjsstatic inline unsigned 31696c5ddc4Srjsbi_pack_fma_csel_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 31796c5ddc4Srjs{ 31896c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 31996c5ddc4Srjs assert(I->cmpf < 9); 32096c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 32196c5ddc4Srjs assert(cmpf < 4); 32296c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 32396c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 32496c5ddc4Srjs if (cmpf == 2) cmpf = 0; 32596c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 32696c5ddc4Srjs } 32796c5ddc4Srjs 32896c5ddc4Srjs unsigned derived_12 = 0; 32996c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 33096c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 33196c5ddc4Srjs else unreachable("No pattern match at pos 12"); 33296c5ddc4Srjs 33396c5ddc4Srjs return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 33496c5ddc4Srjs} 33596c5ddc4Srjs 33696c5ddc4Srjsstatic inline unsigned 33796c5ddc4Srjsbi_pack_fma_csel_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 33896c5ddc4Srjs{ 33996c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 34096c5ddc4Srjs assert(I->cmpf < 9); 34196c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 34296c5ddc4Srjs assert(cmpf < 4); 34396c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 34496c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 34596c5ddc4Srjs if (cmpf == 2) cmpf = 0; 34696c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 34796c5ddc4Srjs } 34896c5ddc4Srjs 34996c5ddc4Srjs unsigned derived_12 = 0; 35096c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 35196c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 35296c5ddc4Srjs else unreachable("No pattern match at pos 12"); 35396c5ddc4Srjs 35496c5ddc4Srjs return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 35596c5ddc4Srjs} 35696c5ddc4Srjs 35796c5ddc4Srjsstatic inline unsigned 35896c5ddc4Srjsbi_pack_fma_csel_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 35996c5ddc4Srjs{ 36096c5ddc4Srjs unsigned cmpf = I->cmpf; 36196c5ddc4Srjs assert(cmpf < 8); 36296c5ddc4Srjs if ((cmpf == 4) || (cmpf == 5)) { 36396c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 36496c5ddc4Srjs if (cmpf == 4) cmpf = 1; 36596c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 36696c5ddc4Srjs } 36796c5ddc4Srjs 36896c5ddc4Srjs if (cmpf == 3) { 36996c5ddc4Srjs { unsigned temp = src2; src2 = src3; src3 = temp; } 37096c5ddc4Srjs if (cmpf == 3) cmpf = 0; 37196c5ddc4Srjs } 37296c5ddc4Srjs 37396c5ddc4Srjs unsigned derived_12 = 0; 37496c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 37596c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 37696c5ddc4Srjs else if (cmpf == 2) derived_12 = 2; 37796c5ddc4Srjs else unreachable("No pattern match at pos 12"); 37896c5ddc4Srjs 37996c5ddc4Srjs return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 38096c5ddc4Srjs} 38196c5ddc4Srjs 38296c5ddc4Srjsstatic inline unsigned 38396c5ddc4Srjsbi_pack_fma_csel_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 38496c5ddc4Srjs{ 38596c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 38696c5ddc4Srjs assert(I->cmpf < 9); 38796c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 38896c5ddc4Srjs assert(cmpf < 2); 38996c5ddc4Srjs if (cmpf == 1) { 39096c5ddc4Srjs { unsigned temp = src2; src2 = src3; src3 = temp; } 39196c5ddc4Srjs if (cmpf == 1) cmpf = 0; 39296c5ddc4Srjs } 39396c5ddc4Srjs 39496c5ddc4Srjs unsigned derived_12 = 0; 39596c5ddc4Srjs if (cmpf == 0) derived_12 = 3; 39696c5ddc4Srjs else unreachable("No pattern match at pos 12"); 39796c5ddc4Srjs 39896c5ddc4Srjs return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 39996c5ddc4Srjs} 40096c5ddc4Srjs 40196c5ddc4Srjsstatic inline unsigned 40296c5ddc4Srjsbi_pack_fma_csel_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 40396c5ddc4Srjs{ 40496c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 40596c5ddc4Srjs assert(I->cmpf < 9); 40696c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 40796c5ddc4Srjs assert(cmpf < 4); 40896c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 40996c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 41096c5ddc4Srjs if (cmpf == 2) cmpf = 0; 41196c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 41296c5ddc4Srjs } 41396c5ddc4Srjs 41496c5ddc4Srjs unsigned derived_12 = 0; 41596c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 41696c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 41796c5ddc4Srjs else unreachable("No pattern match at pos 12"); 41896c5ddc4Srjs 41996c5ddc4Srjs return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 42096c5ddc4Srjs} 42196c5ddc4Srjs 42296c5ddc4Srjsstatic inline unsigned 42396c5ddc4Srjsbi_pack_fma_csel_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 42496c5ddc4Srjs{ 42596c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 42696c5ddc4Srjs assert(I->cmpf < 9); 42796c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 42896c5ddc4Srjs assert(cmpf < 4); 42996c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 43096c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 43196c5ddc4Srjs if (cmpf == 2) cmpf = 0; 43296c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 43396c5ddc4Srjs } 43496c5ddc4Srjs 43596c5ddc4Srjs unsigned derived_12 = 0; 43696c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 43796c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 43896c5ddc4Srjs else unreachable("No pattern match at pos 12"); 43996c5ddc4Srjs 44096c5ddc4Srjs return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 44196c5ddc4Srjs} 44296c5ddc4Srjs 44396c5ddc4Srjsstatic inline unsigned 44496c5ddc4Srjsbi_pack_fma_cubeface1(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 44596c5ddc4Srjs{ 44696c5ddc4Srjs unsigned neg0 = I->src[0].neg; 44796c5ddc4Srjs assert(neg0 < 2); 44896c5ddc4Srjs unsigned neg1 = I->src[1].neg; 44996c5ddc4Srjs assert(neg1 < 2); 45096c5ddc4Srjs unsigned neg2 = I->src[2].neg; 45196c5ddc4Srjs assert(neg2 < 2); 45296c5ddc4Srjs unsigned derived_9 = 0; 45396c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0; 45496c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1; 45596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 45696c5ddc4Srjs 45796c5ddc4Srjs return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 45896c5ddc4Srjs} 45996c5ddc4Srjs 46096c5ddc4Srjsstatic inline unsigned 46196c5ddc4Srjsbi_pack_fma_dtsel_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 46296c5ddc4Srjs{ 46396c5ddc4Srjs static uint8_t table_table[] = { 2, 1, 0, 3 }; 46496c5ddc4Srjs assert(I->table < 4); 46596c5ddc4Srjs unsigned table = table_table[I->table]; 46696c5ddc4Srjs assert(table < 4); 46796c5ddc4Srjs return 0x70f3e0 | (src0 << 0) | (table << 3); 46896c5ddc4Srjs} 46996c5ddc4Srjs 47096c5ddc4Srjsstatic inline unsigned 47196c5ddc4Srjsbi_pack_fma_f16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 47296c5ddc4Srjs{ 47396c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 47496c5ddc4Srjs assert(I->src[0].swizzle < 13); 47596c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 47696c5ddc4Srjs assert(lane0 < 2); 47796c5ddc4Srjs return 0x700d10 | (src0 << 0) | (lane0 << 3); 47896c5ddc4Srjs} 47996c5ddc4Srjs 48096c5ddc4Srjsstatic inline unsigned 48196c5ddc4Srjsbi_pack_fma_fadd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 48296c5ddc4Srjs{ 48396c5ddc4Srjs unsigned abs1 = I->src[1].abs; 48496c5ddc4Srjs assert(abs1 < 2); 48596c5ddc4Srjs unsigned neg0 = I->src[0].neg; 48696c5ddc4Srjs assert(neg0 < 2); 48796c5ddc4Srjs unsigned neg1 = I->src[1].neg; 48896c5ddc4Srjs assert(neg1 < 2); 48996c5ddc4Srjs unsigned abs0 = I->src[0].abs; 49096c5ddc4Srjs assert(abs0 < 2); 49196c5ddc4Srjs unsigned round = I->round; 49296c5ddc4Srjs assert(round < 4); 49396c5ddc4Srjs unsigned clamp = I->clamp; 49496c5ddc4Srjs assert(clamp < 4); 49596c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 49696c5ddc4Srjs assert(I->src[0].swizzle < 13); 49796c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 49896c5ddc4Srjs assert(widen0 < 4); 49996c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 50096c5ddc4Srjs assert(I->src[1].swizzle < 13); 50196c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 50296c5ddc4Srjs assert(widen1 < 4); 50396c5ddc4Srjs if ((widen0 == 2) && (widen1 == 1)) { 50496c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 50596c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 50696c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 50796c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 50896c5ddc4Srjs } 50996c5ddc4Srjs 51096c5ddc4Srjs unsigned derived_9 = 0; 51196c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 51296c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 51396c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 51496c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 51596c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; 51696c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; 51796c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; 51896c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; 51996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 52096c5ddc4Srjs 52196c5ddc4Srjs return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9); 52296c5ddc4Srjs} 52396c5ddc4Srjs 52496c5ddc4Srjsstatic inline unsigned 52596c5ddc4Srjsbi_pack_fma_fadd_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 52696c5ddc4Srjs{ 52796c5ddc4Srjs unsigned abs0 = I->src[0].abs; 52896c5ddc4Srjs assert(abs0 < 2); 52996c5ddc4Srjs unsigned abs1 = I->src[1].abs; 53096c5ddc4Srjs assert(abs1 < 2); 53196c5ddc4Srjs unsigned neg0 = I->src[0].neg; 53296c5ddc4Srjs assert(neg0 < 2); 53396c5ddc4Srjs unsigned neg1 = I->src[1].neg; 53496c5ddc4Srjs assert(neg1 < 2); 53596c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 53696c5ddc4Srjs assert(I->src[0].swizzle < 13); 53796c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 53896c5ddc4Srjs assert(swz0 < 4); 53996c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 54096c5ddc4Srjs assert(I->src[1].swizzle < 13); 54196c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 54296c5ddc4Srjs assert(swz1 < 4); 54396c5ddc4Srjs unsigned round = I->round; 54496c5ddc4Srjs assert(round < 4); 54596c5ddc4Srjs unsigned clamp = I->clamp; 54696c5ddc4Srjs assert(clamp < 4); 54796c5ddc4Srjs if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 54896c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 54996c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 55096c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 55196c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 55296c5ddc4Srjs } 55396c5ddc4Srjs 55496c5ddc4Srjs unsigned derived_6 = 0; 55596c5ddc4Srjs if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 55696c5ddc4Srjs else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 55796c5ddc4Srjs else unreachable("No pattern match at pos 6"); 55896c5ddc4Srjs 55996c5ddc4Srjs return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6); 56096c5ddc4Srjs} 56196c5ddc4Srjs 56296c5ddc4Srjsstatic inline unsigned 56396c5ddc4Srjsbi_pack_fma_fadd_lscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 56496c5ddc4Srjs{ 56596c5ddc4Srjs unsigned abs0 = I->src[0].abs; 56696c5ddc4Srjs assert(abs0 < 2); 56796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 56896c5ddc4Srjs assert(neg0 < 2); 56996c5ddc4Srjs unsigned abs1 = I->src[1].abs; 57096c5ddc4Srjs assert(abs1 < 2); 57196c5ddc4Srjs unsigned neg1 = I->src[1].neg; 57296c5ddc4Srjs assert(neg1 < 2); 57396c5ddc4Srjs return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9); 57496c5ddc4Srjs} 57596c5ddc4Srjs 57696c5ddc4Srjsstatic inline unsigned 57796c5ddc4Srjsbi_pack_fma_fcmp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 57896c5ddc4Srjs{ 57996c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 58096c5ddc4Srjs assert(I->src[0].swizzle < 13); 58196c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 58296c5ddc4Srjs assert(widen0 < 4); 58396c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 58496c5ddc4Srjs assert(I->src[1].swizzle < 13); 58596c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 58696c5ddc4Srjs assert(widen1 < 4); 58796c5ddc4Srjs unsigned abs1 = I->src[1].abs; 58896c5ddc4Srjs assert(abs1 < 2); 58996c5ddc4Srjs unsigned neg0 = I->src[0].neg; 59096c5ddc4Srjs assert(neg0 < 2); 59196c5ddc4Srjs unsigned neg1 = I->src[1].neg; 59296c5ddc4Srjs assert(neg1 < 2); 59396c5ddc4Srjs unsigned abs0 = I->src[0].abs; 59496c5ddc4Srjs assert(abs0 < 2); 59596c5ddc4Srjs static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 59696c5ddc4Srjs assert(I->cmpf < 9); 59796c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 59896c5ddc4Srjs assert(cmpf < 8); 59996c5ddc4Srjs unsigned result_type = I->result_type; 60096c5ddc4Srjs assert(result_type < 4); 60196c5ddc4Srjs if ((widen0 == 2) && (widen1 == 1)) { 60296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 60396c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 60496c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 60596c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 60696c5ddc4Srjs if (cmpf == 4) cmpf = 1; 60796c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 60896c5ddc4Srjs else if (cmpf == 1) cmpf = 4; 60996c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 61096c5ddc4Srjs } 61196c5ddc4Srjs 61296c5ddc4Srjs unsigned derived_9 = 0; 61396c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 61496c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 61596c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 61696c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 61796c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; 61896c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; 61996c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; 62096c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; 62196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 62296c5ddc4Srjs 62396c5ddc4Srjs return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9); 62496c5ddc4Srjs} 62596c5ddc4Srjs 62696c5ddc4Srjsstatic inline unsigned 62796c5ddc4Srjsbi_pack_fma_fcmp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 62896c5ddc4Srjs{ 62996c5ddc4Srjs unsigned abs0 = I->src[0].abs; 63096c5ddc4Srjs assert(abs0 < 2); 63196c5ddc4Srjs unsigned abs1 = I->src[1].abs; 63296c5ddc4Srjs assert(abs1 < 2); 63396c5ddc4Srjs static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 63496c5ddc4Srjs assert(I->cmpf < 9); 63596c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 63696c5ddc4Srjs assert(cmpf < 8); 63796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 63896c5ddc4Srjs assert(neg0 < 2); 63996c5ddc4Srjs unsigned neg1 = I->src[1].neg; 64096c5ddc4Srjs assert(neg1 < 2); 64196c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 64296c5ddc4Srjs assert(I->src[0].swizzle < 13); 64396c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 64496c5ddc4Srjs assert(swz0 < 4); 64596c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 64696c5ddc4Srjs assert(I->src[1].swizzle < 13); 64796c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 64896c5ddc4Srjs assert(swz1 < 4); 64996c5ddc4Srjs unsigned result_type = I->result_type; 65096c5ddc4Srjs assert(result_type < 4); 65196c5ddc4Srjs if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 65296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 65396c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 65496c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 65596c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 65696c5ddc4Srjs if (cmpf == 4) cmpf = 1; 65796c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 65896c5ddc4Srjs else if (cmpf == 1) cmpf = 4; 65996c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 66096c5ddc4Srjs } 66196c5ddc4Srjs 66296c5ddc4Srjs unsigned derived_6 = 0; 66396c5ddc4Srjs if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 66496c5ddc4Srjs else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 66596c5ddc4Srjs else unreachable("No pattern match at pos 6"); 66696c5ddc4Srjs 66796c5ddc4Srjs unsigned derived_13 = 0; 66896c5ddc4Srjs if (cmpf == 0) derived_13 = 0; 66996c5ddc4Srjs else if (cmpf == 1) derived_13 = 1; 67096c5ddc4Srjs else if (cmpf == 2) derived_13 = 2; 67196c5ddc4Srjs else if (cmpf == 3) derived_13 = 3; 67296c5ddc4Srjs else if (cmpf == 4) derived_13 = 4; 67396c5ddc4Srjs else if (cmpf == 5) derived_13 = 5; 67496c5ddc4Srjs else if (cmpf == 6) derived_13 = 6; 67596c5ddc4Srjs else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7; 67696c5ddc4Srjs else unreachable("No pattern match at pos 13"); 67796c5ddc4Srjs 67896c5ddc4Srjs return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13); 67996c5ddc4Srjs} 68096c5ddc4Srjs 68196c5ddc4Srjsstatic inline unsigned 68296c5ddc4Srjsbi_pack_fma_flshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 68396c5ddc4Srjs{ 68496c5ddc4Srjs unsigned bytes2 = I->bytes2; 68596c5ddc4Srjs assert(bytes2 < 2); 68696c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 68796c5ddc4Srjs assert(I->src[2].swizzle < 13); 68896c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 68996c5ddc4Srjs assert(lane2 < 2); 69096c5ddc4Srjs return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); 69196c5ddc4Srjs} 69296c5ddc4Srjs 69396c5ddc4Srjsstatic inline unsigned 69496c5ddc4Srjsbi_pack_fma_fma_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 69596c5ddc4Srjs{ 69696c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 69796c5ddc4Srjs assert(I->src[0].swizzle < 13); 69896c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 69996c5ddc4Srjs assert(widen0 < 4); 70096c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 70196c5ddc4Srjs assert(I->src[1].swizzle < 13); 70296c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 70396c5ddc4Srjs assert(widen1 < 4); 70496c5ddc4Srjs unsigned neg0 = I->src[0].neg; 70596c5ddc4Srjs assert(neg0 < 2); 70696c5ddc4Srjs unsigned neg1 = I->src[1].neg; 70796c5ddc4Srjs assert(neg1 < 2); 70896c5ddc4Srjs unsigned abs0 = I->src[0].abs; 70996c5ddc4Srjs assert(abs0 < 2); 71096c5ddc4Srjs unsigned round = I->round; 71196c5ddc4Srjs assert(round < 4); 71296c5ddc4Srjs unsigned clamp = I->clamp; 71396c5ddc4Srjs assert(clamp < 4); 71496c5ddc4Srjs unsigned abs1 = I->src[1].abs; 71596c5ddc4Srjs assert(abs1 < 2); 71696c5ddc4Srjs unsigned neg2 = I->src[2].neg; 71796c5ddc4Srjs assert(neg2 < 2); 71896c5ddc4Srjs unsigned abs2 = I->src[2].abs; 71996c5ddc4Srjs assert(abs2 < 2); 72096c5ddc4Srjs if ((widen0 == 2) && (widen1 == 1)) { 72196c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 72296c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 72396c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 72496c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 72596c5ddc4Srjs } 72696c5ddc4Srjs 72796c5ddc4Srjs unsigned derived_9 = 0; 72896c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 72996c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 73096c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 73196c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 73296c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; 73396c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; 73496c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; 73596c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; 73696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 73796c5ddc4Srjs 73896c5ddc4Srjs unsigned derived_17 = 0; 73996c5ddc4Srjs if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; 74096c5ddc4Srjs else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; 74196c5ddc4Srjs else unreachable("No pattern match at pos 17"); 74296c5ddc4Srjs 74396c5ddc4Srjs return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17); 74496c5ddc4Srjs} 74596c5ddc4Srjs 74696c5ddc4Srjsstatic inline unsigned 74796c5ddc4Srjsbi_pack_fma_fma_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 74896c5ddc4Srjs{ 74996c5ddc4Srjs unsigned neg0 = I->src[0].neg; 75096c5ddc4Srjs assert(neg0 < 2); 75196c5ddc4Srjs unsigned neg1 = I->src[1].neg; 75296c5ddc4Srjs assert(neg1 < 2); 75396c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 75496c5ddc4Srjs assert(I->src[0].swizzle < 13); 75596c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 75696c5ddc4Srjs assert(swz0 < 4); 75796c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 75896c5ddc4Srjs assert(I->src[1].swizzle < 13); 75996c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 76096c5ddc4Srjs assert(swz1 < 4); 76196c5ddc4Srjs unsigned round = I->round; 76296c5ddc4Srjs assert(round < 4); 76396c5ddc4Srjs unsigned clamp = I->clamp; 76496c5ddc4Srjs assert(clamp < 4); 76596c5ddc4Srjs unsigned neg2 = I->src[2].neg; 76696c5ddc4Srjs assert(neg2 < 2); 76796c5ddc4Srjs static uint8_t swz2_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 76896c5ddc4Srjs assert(I->src[2].swizzle < 13); 76996c5ddc4Srjs unsigned swz2 = swz2_table[I->src[2].swizzle]; 77096c5ddc4Srjs assert(swz2 < 4); 77196c5ddc4Srjs unsigned derived_17 = 0; 77296c5ddc4Srjs if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; 77396c5ddc4Srjs else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; 77496c5ddc4Srjs else unreachable("No pattern match at pos 17"); 77596c5ddc4Srjs 77696c5ddc4Srjs return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17); 77796c5ddc4Srjs} 77896c5ddc4Srjs 77996c5ddc4Srjsstatic inline unsigned 78096c5ddc4Srjsbi_pack_fma_fma_rscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 78196c5ddc4Srjs{ 78296c5ddc4Srjs static uint8_t round_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 78396c5ddc4Srjs assert(I->round < 9); 78496c5ddc4Srjs unsigned round = round_table[I->round]; 78596c5ddc4Srjs assert(round < 2); 78696c5ddc4Srjs unsigned clamp = I->clamp; 78796c5ddc4Srjs assert(clamp < 4); 78896c5ddc4Srjs unsigned neg0 = I->src[0].neg; 78996c5ddc4Srjs assert(neg0 < 2); 79096c5ddc4Srjs unsigned neg1 = I->src[1].neg; 79196c5ddc4Srjs assert(neg1 < 2); 79296c5ddc4Srjs unsigned abs0 = I->src[0].abs; 79396c5ddc4Srjs assert(abs0 < 2); 79496c5ddc4Srjs unsigned neg2 = I->src[2].neg; 79596c5ddc4Srjs assert(neg2 < 2); 79696c5ddc4Srjs unsigned special = I->special; 79796c5ddc4Srjs assert(special < 4); 79896c5ddc4Srjs unsigned derived_16 = 0; 79996c5ddc4Srjs if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; 80096c5ddc4Srjs else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; 80196c5ddc4Srjs else unreachable("No pattern match at pos 16"); 80296c5ddc4Srjs 80396c5ddc4Srjs unsigned derived_12 = 0; 80496c5ddc4Srjs if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; 80596c5ddc4Srjs else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; 80696c5ddc4Srjs else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; 80796c5ddc4Srjs else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; 80896c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; 80996c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; 81096c5ddc4Srjs else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6; 81196c5ddc4Srjs else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; 81296c5ddc4Srjs else unreachable("No pattern match at pos 12"); 81396c5ddc4Srjs 81496c5ddc4Srjs return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); 81596c5ddc4Srjs} 81696c5ddc4Srjs 81796c5ddc4Srjsstatic inline unsigned 81896c5ddc4Srjsbi_pack_fma_fma_rscale_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 81996c5ddc4Srjs{ 82096c5ddc4Srjs static uint8_t round_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 82196c5ddc4Srjs assert(I->round < 9); 82296c5ddc4Srjs unsigned round = round_table[I->round]; 82396c5ddc4Srjs assert(round < 2); 82496c5ddc4Srjs unsigned clamp = I->clamp; 82596c5ddc4Srjs assert(clamp < 4); 82696c5ddc4Srjs unsigned neg0 = I->src[0].neg; 82796c5ddc4Srjs assert(neg0 < 2); 82896c5ddc4Srjs unsigned neg1 = I->src[1].neg; 82996c5ddc4Srjs assert(neg1 < 2); 83096c5ddc4Srjs unsigned abs0 = I->src[0].abs; 83196c5ddc4Srjs assert(abs0 < 2); 83296c5ddc4Srjs unsigned neg2 = I->src[2].neg; 83396c5ddc4Srjs assert(neg2 < 2); 83496c5ddc4Srjs unsigned special = I->special; 83596c5ddc4Srjs assert(special < 4); 83696c5ddc4Srjs unsigned derived_16 = 0; 83796c5ddc4Srjs if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; 83896c5ddc4Srjs else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; 83996c5ddc4Srjs else unreachable("No pattern match at pos 16"); 84096c5ddc4Srjs 84196c5ddc4Srjs unsigned derived_12 = 0; 84296c5ddc4Srjs if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; 84396c5ddc4Srjs else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; 84496c5ddc4Srjs else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; 84596c5ddc4Srjs else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; 84696c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; 84796c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; 84896c5ddc4Srjs else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; 84996c5ddc4Srjs else unreachable("No pattern match at pos 12"); 85096c5ddc4Srjs 85196c5ddc4Srjs return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); 85296c5ddc4Srjs} 85396c5ddc4Srjs 85496c5ddc4Srjsstatic inline unsigned 85596c5ddc4Srjsbi_pack_fma_fmul_cslice(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 85696c5ddc4Srjs{ 85796c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 85896c5ddc4Srjs assert(I->src[0].swizzle < 13); 85996c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 86096c5ddc4Srjs assert(lane0 < 2); 86196c5ddc4Srjs unsigned abs0 = I->src[0].abs; 86296c5ddc4Srjs assert(abs0 < 2); 86396c5ddc4Srjs unsigned neg0 = I->src[0].neg; 86496c5ddc4Srjs assert(neg0 < 2); 86596c5ddc4Srjs return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8); 86696c5ddc4Srjs} 86796c5ddc4Srjs 86896c5ddc4Srjsstatic inline unsigned 86996c5ddc4Srjsbi_pack_fma_fmul_slice_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 87096c5ddc4Srjs{ 87196c5ddc4Srjs 87296c5ddc4Srjs return 0x70cb40 | (src0 << 0) | (src1 << 3); 87396c5ddc4Srjs} 87496c5ddc4Srjs 87596c5ddc4Srjsstatic inline unsigned 87696c5ddc4Srjsbi_pack_fma_frexpe_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 87796c5ddc4Srjs{ 87896c5ddc4Srjs unsigned neg0 = I->src[0].neg; 87996c5ddc4Srjs assert(neg0 < 2); 88096c5ddc4Srjs unsigned sqrt = I->sqrt; 88196c5ddc4Srjs assert(sqrt < 2); 88296c5ddc4Srjs unsigned log = I->log; 88396c5ddc4Srjs assert(log < 2); 88496c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 88596c5ddc4Srjs assert(I->src[0].swizzle < 13); 88696c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 88796c5ddc4Srjs assert(widen0 < 4); 88896c5ddc4Srjs if (log == 0) { 88996c5ddc4Srjs return 0x701c20 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (widen0 << 3); 89096c5ddc4Srjs } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 89196c5ddc4Srjs return 0x701e20 | (src0 << 0) | (widen0 << 3); 89296c5ddc4Srjs } else { 89396c5ddc4Srjs unreachable("No matching state found in fma_frexpe_f32"); 89496c5ddc4Srjs } 89596c5ddc4Srjs} 89696c5ddc4Srjs 89796c5ddc4Srjsstatic inline unsigned 89896c5ddc4Srjsbi_pack_fma_frexpe_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 89996c5ddc4Srjs{ 90096c5ddc4Srjs unsigned neg0 = I->src[0].neg; 90196c5ddc4Srjs assert(neg0 < 2); 90296c5ddc4Srjs unsigned sqrt = I->sqrt; 90396c5ddc4Srjs assert(sqrt < 2); 90496c5ddc4Srjs unsigned log = I->log; 90596c5ddc4Srjs assert(log < 2); 90696c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 90796c5ddc4Srjs assert(I->src[0].swizzle < 13); 90896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 90996c5ddc4Srjs assert(swz0 < 4); 91096c5ddc4Srjs if (log == 0) { 91196c5ddc4Srjs return 0x701c00 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (swz0 << 3); 91296c5ddc4Srjs } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 91396c5ddc4Srjs return 0x701e00 | (src0 << 0) | (swz0 << 3); 91496c5ddc4Srjs } else { 91596c5ddc4Srjs unreachable("No matching state found in fma_frexpe_v2f16"); 91696c5ddc4Srjs } 91796c5ddc4Srjs} 91896c5ddc4Srjs 91996c5ddc4Srjsstatic inline unsigned 92096c5ddc4Srjsbi_pack_fma_frexpm_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 92196c5ddc4Srjs{ 92296c5ddc4Srjs unsigned abs0 = I->src[0].abs; 92396c5ddc4Srjs assert(abs0 < 2); 92496c5ddc4Srjs unsigned sqrt = I->sqrt; 92596c5ddc4Srjs assert(sqrt < 2); 92696c5ddc4Srjs unsigned log = I->log; 92796c5ddc4Srjs assert(log < 2); 92896c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 92996c5ddc4Srjs assert(I->src[0].swizzle < 13); 93096c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 93196c5ddc4Srjs assert(widen0 < 4); 93296c5ddc4Srjs unsigned neg0 = I->src[0].neg; 93396c5ddc4Srjs assert(neg0 < 2); 93496c5ddc4Srjs if ((log == 0) && (neg0 == 0)) { 93596c5ddc4Srjs return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); 93696c5ddc4Srjs } else if ((log == 1) && (sqrt == 0)) { 93796c5ddc4Srjs return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); 93896c5ddc4Srjs } else { 93996c5ddc4Srjs unreachable("No matching state found in fma_frexpm_f32"); 94096c5ddc4Srjs } 94196c5ddc4Srjs} 94296c5ddc4Srjs 94396c5ddc4Srjsstatic inline unsigned 94496c5ddc4Srjsbi_pack_fma_frexpm_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 94596c5ddc4Srjs{ 94696c5ddc4Srjs unsigned abs0 = I->src[0].abs; 94796c5ddc4Srjs assert(abs0 < 2); 94896c5ddc4Srjs unsigned sqrt = I->sqrt; 94996c5ddc4Srjs assert(sqrt < 2); 95096c5ddc4Srjs unsigned log = I->log; 95196c5ddc4Srjs assert(log < 2); 95296c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 95396c5ddc4Srjs assert(I->src[0].swizzle < 13); 95496c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 95596c5ddc4Srjs assert(swz0 < 4); 95696c5ddc4Srjs unsigned neg0 = I->src[0].neg; 95796c5ddc4Srjs assert(neg0 < 2); 95896c5ddc4Srjs if ((log == 0) && (neg0 == 0)) { 95996c5ddc4Srjs return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); 96096c5ddc4Srjs } else if ((log == 1) && (sqrt == 0)) { 96196c5ddc4Srjs return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); 96296c5ddc4Srjs } else { 96396c5ddc4Srjs unreachable("No matching state found in fma_frexpm_v2f16"); 96496c5ddc4Srjs } 96596c5ddc4Srjs} 96696c5ddc4Srjs 96796c5ddc4Srjsstatic inline unsigned 96896c5ddc4Srjsbi_pack_fma_fround_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 96996c5ddc4Srjs{ 97096c5ddc4Srjs unsigned abs0 = I->src[0].abs; 97196c5ddc4Srjs assert(abs0 < 2); 97296c5ddc4Srjs unsigned neg0 = I->src[0].neg; 97396c5ddc4Srjs assert(neg0 < 2); 97496c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 97596c5ddc4Srjs assert(I->src[0].swizzle < 13); 97696c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 97796c5ddc4Srjs assert(widen0 < 4); 97896c5ddc4Srjs unsigned round = I->round; 97996c5ddc4Srjs assert(round < 8); 98096c5ddc4Srjs if (round != 4) { 98196c5ddc4Srjs unsigned derived_9 = 0; 98296c5ddc4Srjs if (round == 0) derived_9 = 0; 98396c5ddc4Srjs else if (round == 1) derived_9 = 1; 98496c5ddc4Srjs else if (round == 2) derived_9 = 2; 98596c5ddc4Srjs else if (round == 3) derived_9 = 3; 98696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 98796c5ddc4Srjs 98896c5ddc4Srjs return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9); 98996c5ddc4Srjs } else if (round == 4) { 99096c5ddc4Srjs return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3); 99196c5ddc4Srjs } else { 99296c5ddc4Srjs unreachable("No matching state found in fma_fround_f32"); 99396c5ddc4Srjs } 99496c5ddc4Srjs} 99596c5ddc4Srjs 99696c5ddc4Srjsstatic inline unsigned 99796c5ddc4Srjsbi_pack_fma_fround_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 99896c5ddc4Srjs{ 99996c5ddc4Srjs unsigned abs0 = I->src[0].abs; 100096c5ddc4Srjs assert(abs0 < 2); 100196c5ddc4Srjs unsigned neg0 = I->src[0].neg; 100296c5ddc4Srjs assert(neg0 < 2); 100396c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 100496c5ddc4Srjs assert(I->src[0].swizzle < 13); 100596c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 100696c5ddc4Srjs assert(swz0 < 4); 100796c5ddc4Srjs unsigned round = I->round; 100896c5ddc4Srjs assert(round < 8); 100996c5ddc4Srjs if (round != 4) { 101096c5ddc4Srjs unsigned derived_9 = 0; 101196c5ddc4Srjs if (round == 0) derived_9 = 0; 101296c5ddc4Srjs else if (round == 1) derived_9 = 1; 101396c5ddc4Srjs else if (round == 2) derived_9 = 2; 101496c5ddc4Srjs else if (round == 3) derived_9 = 3; 101596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 101696c5ddc4Srjs 101796c5ddc4Srjs return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9); 101896c5ddc4Srjs } else if (round == 4) { 101996c5ddc4Srjs return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3); 102096c5ddc4Srjs } else { 102196c5ddc4Srjs unreachable("No matching state found in fma_fround_v2f16"); 102296c5ddc4Srjs } 102396c5ddc4Srjs} 102496c5ddc4Srjs 102596c5ddc4Srjsstatic inline unsigned 102696c5ddc4Srjsbi_pack_fma_frshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 102796c5ddc4Srjs{ 102896c5ddc4Srjs unsigned bytes2 = I->bytes2; 102996c5ddc4Srjs assert(bytes2 < 2); 103096c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 103196c5ddc4Srjs assert(I->src[2].swizzle < 13); 103296c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 103396c5ddc4Srjs assert(lane2 < 2); 103496c5ddc4Srjs return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); 103596c5ddc4Srjs} 103696c5ddc4Srjs 103796c5ddc4Srjsstatic inline unsigned 103896c5ddc4Srjsbi_pack_fma_iaddc_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 103996c5ddc4Srjs{ 104096c5ddc4Srjs 104196c5ddc4Srjs return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 104296c5ddc4Srjs} 104396c5ddc4Srjs 104496c5ddc4Srjsstatic inline unsigned 104596c5ddc4Srjsbi_pack_fma_idp_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 104696c5ddc4Srjs{ 104796c5ddc4Srjs unsigned sign0 = I->src[0].abs; 104896c5ddc4Srjs assert(sign0 < 2); 104996c5ddc4Srjs unsigned sign1 = I->src[1].abs; 105096c5ddc4Srjs assert(sign1 < 2); 105196c5ddc4Srjs return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10); 105296c5ddc4Srjs} 105396c5ddc4Srjs 105496c5ddc4Srjsstatic inline unsigned 105596c5ddc4Srjsbi_pack_fma_imul_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 105696c5ddc4Srjs{ 105796c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 105896c5ddc4Srjs assert(I->src[1].swizzle < 13); 105996c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 106096c5ddc4Srjs assert(widen1 < 8); 106196c5ddc4Srjs unsigned extend = I->extend; 106296c5ddc4Srjs assert(extend < 4); 106396c5ddc4Srjs if ((extend == 0) && (widen1 == 0)) { 106496c5ddc4Srjs return 0x73c0c0 | (src0 << 0) | (src1 << 3); 106596c5ddc4Srjs } else if ((extend != 0) && ((widen1 == 1) || (widen1 == 2))) { 106696c5ddc4Srjs unsigned derived_9 = 0; 106796c5ddc4Srjs if (widen1 == 1) derived_9 = 0; 106896c5ddc4Srjs else if (widen1 == 2) derived_9 = 1; 106996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 107096c5ddc4Srjs 107196c5ddc4Srjs unsigned derived_10 = 0; 107296c5ddc4Srjs if (extend == 2) derived_10 = 0; 107396c5ddc4Srjs else if (extend == 1) derived_10 = 1; 107496c5ddc4Srjs else unreachable("No pattern match at pos 10"); 107596c5ddc4Srjs 107696c5ddc4Srjs return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10); 107796c5ddc4Srjs } else if ((extend != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) { 107896c5ddc4Srjs unsigned derived_9 = 0; 107996c5ddc4Srjs if (widen1 == 3) derived_9 = 0; 108096c5ddc4Srjs else if (widen1 == 4) derived_9 = 1; 108196c5ddc4Srjs else if (widen1 == 5) derived_9 = 2; 108296c5ddc4Srjs else if (widen1 == 6) derived_9 = 3; 108396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 108496c5ddc4Srjs 108596c5ddc4Srjs unsigned derived_11 = 0; 108696c5ddc4Srjs if (extend == 2) derived_11 = 0; 108796c5ddc4Srjs else if (extend == 1) derived_11 = 1; 108896c5ddc4Srjs else unreachable("No pattern match at pos 11"); 108996c5ddc4Srjs 109096c5ddc4Srjs return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11); 109196c5ddc4Srjs } else { 109296c5ddc4Srjs unreachable("No matching state found in fma_imul_i32"); 109396c5ddc4Srjs } 109496c5ddc4Srjs} 109596c5ddc4Srjs 109696c5ddc4Srjsstatic inline unsigned 109796c5ddc4Srjsbi_pack_fma_imul_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 109896c5ddc4Srjs{ 109996c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 110096c5ddc4Srjs assert(I->src[0].swizzle < 13); 110196c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 110296c5ddc4Srjs assert(swz0 < 4); 110396c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 110496c5ddc4Srjs assert(I->src[1].swizzle < 13); 110596c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 110696c5ddc4Srjs assert(swz1 < 4); 110796c5ddc4Srjs return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11); 110896c5ddc4Srjs} 110996c5ddc4Srjs 111096c5ddc4Srjsstatic inline unsigned 111196c5ddc4Srjsbi_pack_fma_imul_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 111296c5ddc4Srjs{ 111396c5ddc4Srjs static uint8_t replicate0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 111496c5ddc4Srjs assert(I->src[0].swizzle < 13); 111596c5ddc4Srjs unsigned replicate0 = replicate0_table[I->src[0].swizzle]; 111696c5ddc4Srjs assert(replicate0 < 8); 111796c5ddc4Srjs static uint8_t replicate1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 111896c5ddc4Srjs assert(I->src[1].swizzle < 13); 111996c5ddc4Srjs unsigned replicate1 = replicate1_table[I->src[1].swizzle]; 112096c5ddc4Srjs assert(replicate1 < 8); 112196c5ddc4Srjs if ((replicate0 == 0) && (replicate1 == 0)) { 112296c5ddc4Srjs return 0x73e0c0 | (src0 << 0) | (src1 << 3); 112396c5ddc4Srjs } else if ((replicate0 == 0) && (replicate1 != 0)) { 112496c5ddc4Srjs unsigned derived_9 = 0; 112596c5ddc4Srjs if (replicate1 == 1) derived_9 = 0; 112696c5ddc4Srjs else if (replicate1 == 2) derived_9 = 1; 112796c5ddc4Srjs else if (replicate1 == 3) derived_9 = 2; 112896c5ddc4Srjs else if (replicate1 == 4) derived_9 = 3; 112996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 113096c5ddc4Srjs 113196c5ddc4Srjs return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9); 113296c5ddc4Srjs } else { 113396c5ddc4Srjs unreachable("No matching state found in fma_imul_v4i8"); 113496c5ddc4Srjs } 113596c5ddc4Srjs} 113696c5ddc4Srjs 113796c5ddc4Srjsstatic inline unsigned 113896c5ddc4Srjsbi_pack_fma_imuld(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 113996c5ddc4Srjs{ 114096c5ddc4Srjs assert((1 << src0) & 0x33); 114196c5ddc4Srjs assert((1 << src1) & 0x33); 114296c5ddc4Srjs unsigned threads = I->threads; 114396c5ddc4Srjs assert(threads < 2); 114496c5ddc4Srjs return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6); 114596c5ddc4Srjs} 114696c5ddc4Srjs 114796c5ddc4Srjsstatic inline unsigned 114896c5ddc4Srjsbi_pack_fma_isubb_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 114996c5ddc4Srjs{ 115096c5ddc4Srjs 115196c5ddc4Srjs return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 115296c5ddc4Srjs} 115396c5ddc4Srjs 115496c5ddc4Srjsstatic inline unsigned 115596c5ddc4Srjsbi_pack_fma_jump_ex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 115696c5ddc4Srjs{ 115796c5ddc4Srjs unsigned test_mode = I->test_mode; 115896c5ddc4Srjs assert(test_mode < 2); 115996c5ddc4Srjs unsigned stack_mode = I->stack_mode; 116096c5ddc4Srjs assert(stack_mode < 4); 116196c5ddc4Srjs return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10); 116296c5ddc4Srjs} 116396c5ddc4Srjs 116496c5ddc4Srjsstatic inline unsigned 116596c5ddc4Srjsbi_pack_fma_lrot_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 116696c5ddc4Srjs{ 116796c5ddc4Srjs unsigned bytes2 = I->bytes2; 116896c5ddc4Srjs assert(bytes2 < 2); 116996c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 117096c5ddc4Srjs assert(I->src[2].swizzle < 13); 117196c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 117296c5ddc4Srjs assert(lane2 < 2); 117396c5ddc4Srjs unsigned result_word = I->result_word; 117496c5ddc4Srjs assert(result_word < 2); 117596c5ddc4Srjs return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 117696c5ddc4Srjs} 117796c5ddc4Srjs 117896c5ddc4Srjsstatic inline unsigned 117996c5ddc4Srjsbi_pack_fma_lshift_and_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 118096c5ddc4Srjs{ 118196c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 118296c5ddc4Srjs assert(I->src[2].swizzle < 13); 118396c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 118496c5ddc4Srjs assert(lane2 < 4); 118596c5ddc4Srjs unsigned not1 = I->src[1].neg; 118696c5ddc4Srjs assert(not1 < 2); 118796c5ddc4Srjs static uint8_t not_result_table[] = { 1, 0 }; 118896c5ddc4Srjs unsigned not_result = not_result_table[I->not_result]; 118996c5ddc4Srjs assert(not_result < 2); 119096c5ddc4Srjs return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 119196c5ddc4Srjs} 119296c5ddc4Srjs 119396c5ddc4Srjsstatic inline unsigned 119496c5ddc4Srjsbi_pack_fma_lshift_and_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 119596c5ddc4Srjs{ 119696c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 119796c5ddc4Srjs assert(I->src[2].swizzle < 13); 119896c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 119996c5ddc4Srjs assert(lanes2 < 8); 120096c5ddc4Srjs unsigned not1 = I->src[1].neg; 120196c5ddc4Srjs assert(not1 < 2); 120296c5ddc4Srjs static uint8_t not_result_table[] = { 1, 0 }; 120396c5ddc4Srjs unsigned not_result = not_result_table[I->not_result]; 120496c5ddc4Srjs assert(not_result < 2); 120596c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 120696c5ddc4Srjs unsigned derived_9 = 0; 120796c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 120896c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 120996c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 121096c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 121196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 121296c5ddc4Srjs 121396c5ddc4Srjs return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 121496c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 121596c5ddc4Srjs unsigned derived_9 = 0; 121696c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 121796c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 121896c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 121996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 122096c5ddc4Srjs 122196c5ddc4Srjs return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 122296c5ddc4Srjs } else { 122396c5ddc4Srjs unreachable("No matching state found in fma_lshift_and_v2i16"); 122496c5ddc4Srjs } 122596c5ddc4Srjs} 122696c5ddc4Srjs 122796c5ddc4Srjsstatic inline unsigned 122896c5ddc4Srjsbi_pack_fma_lshift_and_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 122996c5ddc4Srjs{ 123096c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 123196c5ddc4Srjs assert(I->src[2].swizzle < 13); 123296c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 123396c5ddc4Srjs assert(lanes2 < 8); 123496c5ddc4Srjs unsigned not1 = I->src[1].neg; 123596c5ddc4Srjs assert(not1 < 2); 123696c5ddc4Srjs static uint8_t not_result_table[] = { 1, 0 }; 123796c5ddc4Srjs unsigned not_result = not_result_table[I->not_result]; 123896c5ddc4Srjs assert(not_result < 2); 123996c5ddc4Srjs if (lanes2 != 0) { 124096c5ddc4Srjs unsigned derived_9 = 0; 124196c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 124296c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 124396c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 124496c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 124596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 124696c5ddc4Srjs 124796c5ddc4Srjs return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 124896c5ddc4Srjs } else if (lanes2 == 0) { 124996c5ddc4Srjs return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 125096c5ddc4Srjs } else { 125196c5ddc4Srjs unreachable("No matching state found in fma_lshift_and_v4i8"); 125296c5ddc4Srjs } 125396c5ddc4Srjs} 125496c5ddc4Srjs 125596c5ddc4Srjsstatic inline unsigned 125696c5ddc4Srjsbi_pack_fma_lshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 125796c5ddc4Srjs{ 125896c5ddc4Srjs unsigned bytes2 = I->bytes2; 125996c5ddc4Srjs assert(bytes2 < 2); 126096c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 126196c5ddc4Srjs assert(I->src[2].swizzle < 13); 126296c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 126396c5ddc4Srjs assert(lane2 < 2); 126496c5ddc4Srjs unsigned result_word = I->result_word; 126596c5ddc4Srjs assert(result_word < 2); 126696c5ddc4Srjs return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 126796c5ddc4Srjs} 126896c5ddc4Srjs 126996c5ddc4Srjsstatic inline unsigned 127096c5ddc4Srjsbi_pack_fma_lshift_or_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 127196c5ddc4Srjs{ 127296c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 127396c5ddc4Srjs assert(I->src[2].swizzle < 13); 127496c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 127596c5ddc4Srjs assert(lane2 < 4); 127696c5ddc4Srjs static uint8_t not1_table[] = { 1, 0 }; 127796c5ddc4Srjs unsigned not1 = not1_table[I->src[1].neg]; 127896c5ddc4Srjs assert(not1 < 2); 127996c5ddc4Srjs unsigned not_result = I->not_result; 128096c5ddc4Srjs assert(not_result < 2); 128196c5ddc4Srjs return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 128296c5ddc4Srjs} 128396c5ddc4Srjs 128496c5ddc4Srjsstatic inline unsigned 128596c5ddc4Srjsbi_pack_fma_lshift_or_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 128696c5ddc4Srjs{ 128796c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 128896c5ddc4Srjs assert(I->src[2].swizzle < 13); 128996c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 129096c5ddc4Srjs assert(lanes2 < 8); 129196c5ddc4Srjs static uint8_t not1_table[] = { 1, 0 }; 129296c5ddc4Srjs unsigned not1 = not1_table[I->src[1].neg]; 129396c5ddc4Srjs assert(not1 < 2); 129496c5ddc4Srjs unsigned not_result = I->not_result; 129596c5ddc4Srjs assert(not_result < 2); 129696c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 129796c5ddc4Srjs unsigned derived_9 = 0; 129896c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 129996c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 130096c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 130196c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 130296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 130396c5ddc4Srjs 130496c5ddc4Srjs return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 130596c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 130696c5ddc4Srjs unsigned derived_9 = 0; 130796c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 130896c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 130996c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 131096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 131196c5ddc4Srjs 131296c5ddc4Srjs return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 131396c5ddc4Srjs } else { 131496c5ddc4Srjs unreachable("No matching state found in fma_lshift_or_v2i16"); 131596c5ddc4Srjs } 131696c5ddc4Srjs} 131796c5ddc4Srjs 131896c5ddc4Srjsstatic inline unsigned 131996c5ddc4Srjsbi_pack_fma_lshift_or_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 132096c5ddc4Srjs{ 132196c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 132296c5ddc4Srjs assert(I->src[2].swizzle < 13); 132396c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 132496c5ddc4Srjs assert(lanes2 < 8); 132596c5ddc4Srjs static uint8_t not1_table[] = { 1, 0 }; 132696c5ddc4Srjs unsigned not1 = not1_table[I->src[1].neg]; 132796c5ddc4Srjs assert(not1 < 2); 132896c5ddc4Srjs unsigned not_result = I->not_result; 132996c5ddc4Srjs assert(not_result < 2); 133096c5ddc4Srjs if (lanes2 != 0) { 133196c5ddc4Srjs unsigned derived_9 = 0; 133296c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 133396c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 133496c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 133596c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 133696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 133796c5ddc4Srjs 133896c5ddc4Srjs return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 133996c5ddc4Srjs } else if (lanes2 == 0) { 134096c5ddc4Srjs return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 134196c5ddc4Srjs } else { 134296c5ddc4Srjs unreachable("No matching state found in fma_lshift_or_v4i8"); 134396c5ddc4Srjs } 134496c5ddc4Srjs} 134596c5ddc4Srjs 134696c5ddc4Srjsstatic inline unsigned 134796c5ddc4Srjsbi_pack_fma_lshift_xor_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 134896c5ddc4Srjs{ 134996c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 135096c5ddc4Srjs assert(I->src[2].swizzle < 13); 135196c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 135296c5ddc4Srjs assert(lane2 < 4); 135396c5ddc4Srjs unsigned not_result = I->not_result; 135496c5ddc4Srjs assert(not_result < 2); 135596c5ddc4Srjs return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); 135696c5ddc4Srjs} 135796c5ddc4Srjs 135896c5ddc4Srjsstatic inline unsigned 135996c5ddc4Srjsbi_pack_fma_lshift_xor_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 136096c5ddc4Srjs{ 136196c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 136296c5ddc4Srjs assert(I->src[2].swizzle < 13); 136396c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 136496c5ddc4Srjs assert(lanes2 < 8); 136596c5ddc4Srjs unsigned not_result = I->not_result; 136696c5ddc4Srjs assert(not_result < 2); 136796c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 136896c5ddc4Srjs unsigned derived_9 = 0; 136996c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 137096c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 137196c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 137296c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 137396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 137496c5ddc4Srjs 137596c5ddc4Srjs return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 137696c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 137796c5ddc4Srjs unsigned derived_9 = 0; 137896c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 137996c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 138096c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 138196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 138296c5ddc4Srjs 138396c5ddc4Srjs return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 138496c5ddc4Srjs } else { 138596c5ddc4Srjs unreachable("No matching state found in fma_lshift_xor_v2i16"); 138696c5ddc4Srjs } 138796c5ddc4Srjs} 138896c5ddc4Srjs 138996c5ddc4Srjsstatic inline unsigned 139096c5ddc4Srjsbi_pack_fma_lshift_xor_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 139196c5ddc4Srjs{ 139296c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 139396c5ddc4Srjs assert(I->src[2].swizzle < 13); 139496c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 139596c5ddc4Srjs assert(lanes2 < 8); 139696c5ddc4Srjs unsigned not_result = I->not_result; 139796c5ddc4Srjs assert(not_result < 2); 139896c5ddc4Srjs if (lanes2 != 0) { 139996c5ddc4Srjs unsigned derived_9 = 0; 140096c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 140196c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 140296c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 140396c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 140496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 140596c5ddc4Srjs 140696c5ddc4Srjs return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 140796c5ddc4Srjs } else if (lanes2 == 0) { 140896c5ddc4Srjs return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); 140996c5ddc4Srjs } else { 141096c5ddc4Srjs unreachable("No matching state found in fma_lshift_xor_v4i8"); 141196c5ddc4Srjs } 141296c5ddc4Srjs} 141396c5ddc4Srjs 141496c5ddc4Srjsstatic inline unsigned 141596c5ddc4Srjsbi_pack_fma_mkvec_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 141696c5ddc4Srjs{ 141796c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 141896c5ddc4Srjs assert(I->src[0].swizzle < 13); 141996c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 142096c5ddc4Srjs assert(lane0 < 2); 142196c5ddc4Srjs static uint8_t lane1_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 142296c5ddc4Srjs assert(I->src[1].swizzle < 13); 142396c5ddc4Srjs unsigned lane1 = lane1_table[I->src[1].swizzle]; 142496c5ddc4Srjs assert(lane1 < 2); 142596c5ddc4Srjs return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); 142696c5ddc4Srjs} 142796c5ddc4Srjs 142896c5ddc4Srjsstatic inline unsigned 142996c5ddc4Srjsbi_pack_fma_mkvec_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 143096c5ddc4Srjs{ 143196c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 143296c5ddc4Srjs assert(I->src[0].swizzle < 13); 143396c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 143496c5ddc4Srjs assert(lane0 < 2); 143596c5ddc4Srjs static uint8_t lane1_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 143696c5ddc4Srjs assert(I->src[1].swizzle < 13); 143796c5ddc4Srjs unsigned lane1 = lane1_table[I->src[1].swizzle]; 143896c5ddc4Srjs assert(lane1 < 2); 143996c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 144096c5ddc4Srjs assert(I->src[2].swizzle < 13); 144196c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 144296c5ddc4Srjs assert(lane2 < 2); 144396c5ddc4Srjs static uint8_t lane3_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 144496c5ddc4Srjs assert(I->src[3].swizzle < 13); 144596c5ddc4Srjs unsigned lane3 = lane3_table[I->src[3].swizzle]; 144696c5ddc4Srjs assert(lane3 < 2); 144796c5ddc4Srjs return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15); 144896c5ddc4Srjs} 144996c5ddc4Srjs 145096c5ddc4Srjsstatic inline unsigned 145196c5ddc4Srjsbi_pack_fma_mov_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 145296c5ddc4Srjs{ 145396c5ddc4Srjs 145496c5ddc4Srjs return 0x701968 | (src0 << 0); 145596c5ddc4Srjs} 145696c5ddc4Srjs 145796c5ddc4Srjsstatic inline unsigned 145896c5ddc4Srjsbi_pack_fma_nop(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 145996c5ddc4Srjs{ 146096c5ddc4Srjs 146196c5ddc4Srjs return 0x701963; 146296c5ddc4Srjs} 146396c5ddc4Srjs 146496c5ddc4Srjsstatic inline unsigned 146596c5ddc4Srjsbi_pack_fma_popcount_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 146696c5ddc4Srjs{ 146796c5ddc4Srjs 146896c5ddc4Srjs return 0x73c6d8 | (src0 << 0); 146996c5ddc4Srjs} 147096c5ddc4Srjs 147196c5ddc4Srjsstatic inline unsigned 147296c5ddc4Srjsbi_pack_fma_quiet_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 147396c5ddc4Srjs{ 147496c5ddc4Srjs 147596c5ddc4Srjs return 0x701970 | (src0 << 0); 147696c5ddc4Srjs} 147796c5ddc4Srjs 147896c5ddc4Srjsstatic inline unsigned 147996c5ddc4Srjsbi_pack_fma_quiet_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 148096c5ddc4Srjs{ 148196c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 148296c5ddc4Srjs assert(I->src[0].swizzle < 13); 148396c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 148496c5ddc4Srjs assert(swz0 < 4); 148596c5ddc4Srjs return 0x701900 | (src0 << 0) | (swz0 << 4); 148696c5ddc4Srjs} 148796c5ddc4Srjs 148896c5ddc4Srjsstatic inline unsigned 148996c5ddc4Srjsbi_pack_fma_rrot_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 149096c5ddc4Srjs{ 149196c5ddc4Srjs unsigned bytes2 = I->bytes2; 149296c5ddc4Srjs assert(bytes2 < 2); 149396c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 149496c5ddc4Srjs assert(I->src[2].swizzle < 13); 149596c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 149696c5ddc4Srjs assert(lane2 < 2); 149796c5ddc4Srjs unsigned result_word = I->result_word; 149896c5ddc4Srjs assert(result_word < 2); 149996c5ddc4Srjs return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 150096c5ddc4Srjs} 150196c5ddc4Srjs 150296c5ddc4Srjsstatic inline unsigned 150396c5ddc4Srjsbi_pack_fma_rshift_and_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 150496c5ddc4Srjs{ 150596c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 150696c5ddc4Srjs assert(I->src[2].swizzle < 13); 150796c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 150896c5ddc4Srjs assert(lane2 < 4); 150996c5ddc4Srjs unsigned not1 = I->src[1].neg; 151096c5ddc4Srjs assert(not1 < 2); 151196c5ddc4Srjs static uint8_t not_result_table[] = { 1, 0 }; 151296c5ddc4Srjs unsigned not_result = not_result_table[I->not_result]; 151396c5ddc4Srjs assert(not_result < 2); 151496c5ddc4Srjs return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 151596c5ddc4Srjs} 151696c5ddc4Srjs 151796c5ddc4Srjsstatic inline unsigned 151896c5ddc4Srjsbi_pack_fma_rshift_and_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 151996c5ddc4Srjs{ 152096c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 152196c5ddc4Srjs assert(I->src[2].swizzle < 13); 152296c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 152396c5ddc4Srjs assert(lanes2 < 8); 152496c5ddc4Srjs unsigned not1 = I->src[1].neg; 152596c5ddc4Srjs assert(not1 < 2); 152696c5ddc4Srjs static uint8_t not_result_table[] = { 1, 0 }; 152796c5ddc4Srjs unsigned not_result = not_result_table[I->not_result]; 152896c5ddc4Srjs assert(not_result < 2); 152996c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 153096c5ddc4Srjs unsigned derived_9 = 0; 153196c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 153296c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 153396c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 153496c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 153596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 153696c5ddc4Srjs 153796c5ddc4Srjs return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 153896c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 153996c5ddc4Srjs unsigned derived_9 = 0; 154096c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 154196c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 154296c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 154396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 154496c5ddc4Srjs 154596c5ddc4Srjs return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 154696c5ddc4Srjs } else { 154796c5ddc4Srjs unreachable("No matching state found in fma_rshift_and_v2i16"); 154896c5ddc4Srjs } 154996c5ddc4Srjs} 155096c5ddc4Srjs 155196c5ddc4Srjsstatic inline unsigned 155296c5ddc4Srjsbi_pack_fma_rshift_and_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 155396c5ddc4Srjs{ 155496c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 155596c5ddc4Srjs assert(I->src[2].swizzle < 13); 155696c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 155796c5ddc4Srjs assert(lanes2 < 8); 155896c5ddc4Srjs unsigned not1 = I->src[1].neg; 155996c5ddc4Srjs assert(not1 < 2); 156096c5ddc4Srjs static uint8_t not_result_table[] = { 1, 0 }; 156196c5ddc4Srjs unsigned not_result = not_result_table[I->not_result]; 156296c5ddc4Srjs assert(not_result < 2); 156396c5ddc4Srjs if (lanes2 != 0) { 156496c5ddc4Srjs unsigned derived_9 = 0; 156596c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 156696c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 156796c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 156896c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 156996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 157096c5ddc4Srjs 157196c5ddc4Srjs return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 157296c5ddc4Srjs } else if (lanes2 == 0) { 157396c5ddc4Srjs return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 157496c5ddc4Srjs } else { 157596c5ddc4Srjs unreachable("No matching state found in fma_rshift_and_v4i8"); 157696c5ddc4Srjs } 157796c5ddc4Srjs} 157896c5ddc4Srjs 157996c5ddc4Srjsstatic inline unsigned 158096c5ddc4Srjsbi_pack_fma_rshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 158196c5ddc4Srjs{ 158296c5ddc4Srjs unsigned bytes2 = I->bytes2; 158396c5ddc4Srjs assert(bytes2 < 2); 158496c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 158596c5ddc4Srjs assert(I->src[2].swizzle < 13); 158696c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 158796c5ddc4Srjs assert(lane2 < 2); 158896c5ddc4Srjs unsigned result_word = I->result_word; 158996c5ddc4Srjs assert(result_word < 2); 159096c5ddc4Srjs return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 159196c5ddc4Srjs} 159296c5ddc4Srjs 159396c5ddc4Srjsstatic inline unsigned 159496c5ddc4Srjsbi_pack_fma_rshift_or_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 159596c5ddc4Srjs{ 159696c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 159796c5ddc4Srjs assert(I->src[2].swizzle < 13); 159896c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 159996c5ddc4Srjs assert(lane2 < 4); 160096c5ddc4Srjs static uint8_t not1_table[] = { 1, 0 }; 160196c5ddc4Srjs unsigned not1 = not1_table[I->src[1].neg]; 160296c5ddc4Srjs assert(not1 < 2); 160396c5ddc4Srjs unsigned not_result = I->not_result; 160496c5ddc4Srjs assert(not_result < 2); 160596c5ddc4Srjs return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 160696c5ddc4Srjs} 160796c5ddc4Srjs 160896c5ddc4Srjsstatic inline unsigned 160996c5ddc4Srjsbi_pack_fma_rshift_or_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 161096c5ddc4Srjs{ 161196c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 161296c5ddc4Srjs assert(I->src[2].swizzle < 13); 161396c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 161496c5ddc4Srjs assert(lanes2 < 8); 161596c5ddc4Srjs static uint8_t not1_table[] = { 1, 0 }; 161696c5ddc4Srjs unsigned not1 = not1_table[I->src[1].neg]; 161796c5ddc4Srjs assert(not1 < 2); 161896c5ddc4Srjs unsigned not_result = I->not_result; 161996c5ddc4Srjs assert(not_result < 2); 162096c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 162196c5ddc4Srjs unsigned derived_9 = 0; 162296c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 162396c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 162496c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 162596c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 162696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 162796c5ddc4Srjs 162896c5ddc4Srjs return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 162996c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 163096c5ddc4Srjs unsigned derived_9 = 0; 163196c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 163296c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 163396c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 163496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 163596c5ddc4Srjs 163696c5ddc4Srjs return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 163796c5ddc4Srjs } else { 163896c5ddc4Srjs unreachable("No matching state found in fma_rshift_or_v2i16"); 163996c5ddc4Srjs } 164096c5ddc4Srjs} 164196c5ddc4Srjs 164296c5ddc4Srjsstatic inline unsigned 164396c5ddc4Srjsbi_pack_fma_rshift_or_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 164496c5ddc4Srjs{ 164596c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 164696c5ddc4Srjs assert(I->src[2].swizzle < 13); 164796c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 164896c5ddc4Srjs assert(lanes2 < 8); 164996c5ddc4Srjs static uint8_t not1_table[] = { 1, 0 }; 165096c5ddc4Srjs unsigned not1 = not1_table[I->src[1].neg]; 165196c5ddc4Srjs assert(not1 < 2); 165296c5ddc4Srjs unsigned not_result = I->not_result; 165396c5ddc4Srjs assert(not_result < 2); 165496c5ddc4Srjs if (lanes2 != 0) { 165596c5ddc4Srjs unsigned derived_9 = 0; 165696c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 165796c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 165896c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 165996c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 166096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 166196c5ddc4Srjs 166296c5ddc4Srjs return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 166396c5ddc4Srjs } else if (lanes2 == 0) { 166496c5ddc4Srjs return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 166596c5ddc4Srjs } else { 166696c5ddc4Srjs unreachable("No matching state found in fma_rshift_or_v4i8"); 166796c5ddc4Srjs } 166896c5ddc4Srjs} 166996c5ddc4Srjs 167096c5ddc4Srjsstatic inline unsigned 167196c5ddc4Srjsbi_pack_fma_rshift_xor_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 167296c5ddc4Srjs{ 167396c5ddc4Srjs static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 167496c5ddc4Srjs assert(I->src[2].swizzle < 13); 167596c5ddc4Srjs unsigned lane2 = lane2_table[I->src[2].swizzle]; 167696c5ddc4Srjs assert(lane2 < 4); 167796c5ddc4Srjs unsigned not_result = I->not_result; 167896c5ddc4Srjs assert(not_result < 2); 167996c5ddc4Srjs return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); 168096c5ddc4Srjs} 168196c5ddc4Srjs 168296c5ddc4Srjsstatic inline unsigned 168396c5ddc4Srjsbi_pack_fma_rshift_xor_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 168496c5ddc4Srjs{ 168596c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 168696c5ddc4Srjs assert(I->src[2].swizzle < 13); 168796c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 168896c5ddc4Srjs assert(lanes2 < 8); 168996c5ddc4Srjs unsigned not_result = I->not_result; 169096c5ddc4Srjs assert(not_result < 2); 169196c5ddc4Srjs if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 169296c5ddc4Srjs unsigned derived_9 = 0; 169396c5ddc4Srjs if (lanes2 == 0) derived_9 = 0; 169496c5ddc4Srjs else if (lanes2 == 1) derived_9 = 1; 169596c5ddc4Srjs else if (lanes2 == 2) derived_9 = 2; 169696c5ddc4Srjs else if (lanes2 == 3) derived_9 = 3; 169796c5ddc4Srjs else unreachable("No pattern match at pos 9"); 169896c5ddc4Srjs 169996c5ddc4Srjs return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 170096c5ddc4Srjs } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 170196c5ddc4Srjs unsigned derived_9 = 0; 170296c5ddc4Srjs if (lanes2 == 4) derived_9 = 1; 170396c5ddc4Srjs else if (lanes2 == 5) derived_9 = 2; 170496c5ddc4Srjs else if (lanes2 == 6) derived_9 = 3; 170596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 170696c5ddc4Srjs 170796c5ddc4Srjs return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 170896c5ddc4Srjs } else { 170996c5ddc4Srjs unreachable("No matching state found in fma_rshift_xor_v2i16"); 171096c5ddc4Srjs } 171196c5ddc4Srjs} 171296c5ddc4Srjs 171396c5ddc4Srjsstatic inline unsigned 171496c5ddc4Srjsbi_pack_fma_rshift_xor_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 171596c5ddc4Srjs{ 171696c5ddc4Srjs static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 171796c5ddc4Srjs assert(I->src[2].swizzle < 13); 171896c5ddc4Srjs unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 171996c5ddc4Srjs assert(lanes2 < 8); 172096c5ddc4Srjs unsigned not_result = I->not_result; 172196c5ddc4Srjs assert(not_result < 2); 172296c5ddc4Srjs if (lanes2 != 0) { 172396c5ddc4Srjs unsigned derived_9 = 0; 172496c5ddc4Srjs if (lanes2 == 1) derived_9 = 0; 172596c5ddc4Srjs else if (lanes2 == 2) derived_9 = 1; 172696c5ddc4Srjs else if (lanes2 == 3) derived_9 = 2; 172796c5ddc4Srjs else if (lanes2 == 4) derived_9 = 3; 172896c5ddc4Srjs else unreachable("No pattern match at pos 9"); 172996c5ddc4Srjs 173096c5ddc4Srjs return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 173196c5ddc4Srjs } else if (lanes2 == 0) { 173296c5ddc4Srjs return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); 173396c5ddc4Srjs } else { 173496c5ddc4Srjs unreachable("No matching state found in fma_rshift_xor_v4i8"); 173596c5ddc4Srjs } 173696c5ddc4Srjs} 173796c5ddc4Srjs 173896c5ddc4Srjsstatic inline unsigned 173996c5ddc4Srjsbi_pack_fma_s16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 174096c5ddc4Srjs{ 174196c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 174296c5ddc4Srjs assert(I->src[0].swizzle < 13); 174396c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 174496c5ddc4Srjs assert(lane0 < 2); 174596c5ddc4Srjs return 0x700cc0 | (src0 << 0) | (lane0 << 4); 174696c5ddc4Srjs} 174796c5ddc4Srjs 174896c5ddc4Srjsstatic inline unsigned 174996c5ddc4Srjsbi_pack_fma_s8_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 175096c5ddc4Srjs{ 175196c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 175296c5ddc4Srjs assert(I->src[0].swizzle < 13); 175396c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 175496c5ddc4Srjs assert(lane0 < 4); 175596c5ddc4Srjs return 0x700b40 | (src0 << 0) | (lane0 << 4); 175696c5ddc4Srjs} 175796c5ddc4Srjs 175896c5ddc4Srjsstatic inline unsigned 175996c5ddc4Srjsbi_pack_fma_seg_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 176096c5ddc4Srjs{ 176196c5ddc4Srjs static uint8_t seg_table[] = { ~0, 2, 0, ~0, ~0, 7 }; 176296c5ddc4Srjs assert(I->seg < 6); 176396c5ddc4Srjs unsigned seg = seg_table[I->seg]; 176496c5ddc4Srjs assert(seg < 8); 176596c5ddc4Srjs unsigned preserve_null = I->preserve_null; 176696c5ddc4Srjs assert(preserve_null < 2); 176796c5ddc4Srjs return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); 176896c5ddc4Srjs} 176996c5ddc4Srjs 177096c5ddc4Srjsstatic inline unsigned 177196c5ddc4Srjsbi_pack_fma_shaddxl_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 177296c5ddc4Srjs{ 177396c5ddc4Srjs unsigned shift = I->shift; 177496c5ddc4Srjs assert(shift < 0x8); 177596c5ddc4Srjs return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6); 177696c5ddc4Srjs} 177796c5ddc4Srjs 177896c5ddc4Srjsstatic inline unsigned 177996c5ddc4Srjsbi_pack_fma_shaddxl_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 178096c5ddc4Srjs{ 178196c5ddc4Srjs static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 178296c5ddc4Srjs assert(I->src[1].swizzle < 13); 178396c5ddc4Srjs unsigned lane1 = lane1_table[I->src[1].swizzle]; 178496c5ddc4Srjs assert(lane1 < 4); 178596c5ddc4Srjs unsigned shift = I->shift; 178696c5ddc4Srjs assert(shift < 0x8); 178796c5ddc4Srjs return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); 178896c5ddc4Srjs} 178996c5ddc4Srjs 179096c5ddc4Srjsstatic inline unsigned 179196c5ddc4Srjsbi_pack_fma_shaddxl_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 179296c5ddc4Srjs{ 179396c5ddc4Srjs static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 179496c5ddc4Srjs assert(I->src[1].swizzle < 13); 179596c5ddc4Srjs unsigned lane1 = lane1_table[I->src[1].swizzle]; 179696c5ddc4Srjs assert(lane1 < 4); 179796c5ddc4Srjs unsigned shift = I->shift; 179896c5ddc4Srjs assert(shift < 0x8); 179996c5ddc4Srjs return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); 180096c5ddc4Srjs} 180196c5ddc4Srjs 180296c5ddc4Srjsstatic inline unsigned 180396c5ddc4Srjsbi_pack_fma_u16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 180496c5ddc4Srjs{ 180596c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 180696c5ddc4Srjs assert(I->src[0].swizzle < 13); 180796c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 180896c5ddc4Srjs assert(lane0 < 2); 180996c5ddc4Srjs return 0x700cc8 | (src0 << 0) | (lane0 << 4); 181096c5ddc4Srjs} 181196c5ddc4Srjs 181296c5ddc4Srjsstatic inline unsigned 181396c5ddc4Srjsbi_pack_fma_u8_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 181496c5ddc4Srjs{ 181596c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 181696c5ddc4Srjs assert(I->src[0].swizzle < 13); 181796c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 181896c5ddc4Srjs assert(lane0 < 4); 181996c5ddc4Srjs return 0x700b48 | (src0 << 0) | (lane0 << 4); 182096c5ddc4Srjs} 182196c5ddc4Srjs 182296c5ddc4Srjsstatic inline unsigned 182396c5ddc4Srjsbi_pack_fma_v2f32_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 182496c5ddc4Srjs{ 182596c5ddc4Srjs unsigned abs0 = I->src[0].abs; 182696c5ddc4Srjs assert(abs0 < 2); 182796c5ddc4Srjs unsigned abs1 = I->src[1].abs; 182896c5ddc4Srjs assert(abs1 < 2); 182996c5ddc4Srjs unsigned neg0 = I->src[0].neg; 183096c5ddc4Srjs assert(neg0 < 2); 183196c5ddc4Srjs unsigned neg1 = I->src[1].neg; 183296c5ddc4Srjs assert(neg1 < 2); 183396c5ddc4Srjs unsigned clamp = I->clamp; 183496c5ddc4Srjs assert(clamp < 4); 183596c5ddc4Srjs unsigned round = I->round; 183696c5ddc4Srjs assert(round < 8); 183796c5ddc4Srjs unsigned derived_6 = 0; 183896c5ddc4Srjs if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; 183996c5ddc4Srjs else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; 184096c5ddc4Srjs else unreachable("No pattern match at pos 6"); 184196c5ddc4Srjs 184296c5ddc4Srjs unsigned derived_7 = 0; 184396c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; 184496c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; 184596c5ddc4Srjs else unreachable("No pattern match at pos 7"); 184696c5ddc4Srjs 184796c5ddc4Srjs return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); 184896c5ddc4Srjs} 184996c5ddc4Srjs 185096c5ddc4Srjsstatic inline unsigned 185196c5ddc4Srjsbi_pack_fma_vn_asst1_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 185296c5ddc4Srjs{ 185396c5ddc4Srjs unsigned h = I->h; 185496c5ddc4Srjs assert(h < 2); 185596c5ddc4Srjs unsigned l = I->l; 185696c5ddc4Srjs assert(l < 2); 185796c5ddc4Srjs unsigned neg2 = I->src[2].neg; 185896c5ddc4Srjs assert(neg2 < 2); 185996c5ddc4Srjs return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11); 186096c5ddc4Srjs} 186196c5ddc4Srjs 186296c5ddc4Srjsstatic inline unsigned 186396c5ddc4Srjsbi_pack_fma_vn_asst1_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 186496c5ddc4Srjs{ 186596c5ddc4Srjs unsigned neg2 = I->src[2].neg; 186696c5ddc4Srjs assert(neg2 < 2); 186796c5ddc4Srjs return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12); 186896c5ddc4Srjs} 186996c5ddc4Srjs 187096c5ddc4Srjsstatic inline unsigned 187196c5ddc4Srjsbi_pack_add_acmpstore_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 187296c5ddc4Srjs{ 187396c5ddc4Srjs unsigned seg = I->seg; 187496c5ddc4Srjs assert(seg < 2); 187596c5ddc4Srjs return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9); 187696c5ddc4Srjs} 187796c5ddc4Srjs 187896c5ddc4Srjsstatic inline unsigned 187996c5ddc4Srjsbi_pack_add_acmpstore_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 188096c5ddc4Srjs{ 188196c5ddc4Srjs unsigned seg = I->seg; 188296c5ddc4Srjs assert(seg < 2); 188396c5ddc4Srjs return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9); 188496c5ddc4Srjs} 188596c5ddc4Srjs 188696c5ddc4Srjsstatic inline unsigned 188796c5ddc4Srjsbi_pack_add_acmpxchg_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 188896c5ddc4Srjs{ 188996c5ddc4Srjs unsigned seg = I->seg; 189096c5ddc4Srjs assert(seg < 2); 189196c5ddc4Srjs return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9); 189296c5ddc4Srjs} 189396c5ddc4Srjs 189496c5ddc4Srjsstatic inline unsigned 189596c5ddc4Srjsbi_pack_add_acmpxchg_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 189696c5ddc4Srjs{ 189796c5ddc4Srjs unsigned seg = I->seg; 189896c5ddc4Srjs assert(seg < 2); 189996c5ddc4Srjs return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9); 190096c5ddc4Srjs} 190196c5ddc4Srjs 190296c5ddc4Srjsstatic inline unsigned 190396c5ddc4Srjsbi_pack_add_atest(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 190496c5ddc4Srjs{ 190596c5ddc4Srjs assert((1 << src0) & 0xf7); 190696c5ddc4Srjs assert((1 << src1) & 0xf7); 190796c5ddc4Srjs static uint8_t widen1_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 190896c5ddc4Srjs assert(I->src[1].swizzle < 13); 190996c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 191096c5ddc4Srjs assert(widen1 < 4); 191196c5ddc4Srjs return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6); 191296c5ddc4Srjs} 191396c5ddc4Srjs 191496c5ddc4Srjsstatic inline unsigned 191596c5ddc4Srjsbi_pack_add_atom_cx(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 191696c5ddc4Srjs{ 191796c5ddc4Srjs 191896c5ddc4Srjs return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6); 191996c5ddc4Srjs} 192096c5ddc4Srjs 192196c5ddc4Srjsstatic inline unsigned 192296c5ddc4Srjsbi_pack_add_axchg_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 192396c5ddc4Srjs{ 192496c5ddc4Srjs unsigned seg = I->seg; 192596c5ddc4Srjs assert(seg < 2); 192696c5ddc4Srjs return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9); 192796c5ddc4Srjs} 192896c5ddc4Srjs 192996c5ddc4Srjsstatic inline unsigned 193096c5ddc4Srjsbi_pack_add_axchg_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 193196c5ddc4Srjs{ 193296c5ddc4Srjs unsigned seg = I->seg; 193396c5ddc4Srjs assert(seg < 2); 193496c5ddc4Srjs return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9); 193596c5ddc4Srjs} 193696c5ddc4Srjs 193796c5ddc4Srjsstatic inline unsigned 193896c5ddc4Srjsbi_pack_add_barrier(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 193996c5ddc4Srjs{ 194096c5ddc4Srjs 194196c5ddc4Srjs return 0xd7874; 194296c5ddc4Srjs} 194396c5ddc4Srjs 194496c5ddc4Srjsstatic inline unsigned 194596c5ddc4Srjsbi_pack_add_blend(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 194696c5ddc4Srjs{ 194796c5ddc4Srjs assert((1 << src1) & 0xf7); 194896c5ddc4Srjs assert((1 << src2) & 0xf7); 194996c5ddc4Srjs return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6); 195096c5ddc4Srjs} 195196c5ddc4Srjs 195296c5ddc4Srjsstatic inline unsigned 195396c5ddc4Srjsbi_pack_add_branch_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 195496c5ddc4Srjs{ 195596c5ddc4Srjs assert((1 << src2) & 0xf7); 195696c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 195796c5ddc4Srjs assert(I->src[0].swizzle < 13); 195896c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 195996c5ddc4Srjs assert(widen0 < 4); 196096c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 196196c5ddc4Srjs assert(I->src[1].swizzle < 13); 196296c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 196396c5ddc4Srjs assert(widen1 < 4); 196496c5ddc4Srjs unsigned cmpf = I->cmpf; 196596c5ddc4Srjs assert(cmpf < 8); 196696c5ddc4Srjs if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { 196796c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 196896c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 196996c5ddc4Srjs if (cmpf == 4) cmpf = 1; 197096c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 197196c5ddc4Srjs else if (cmpf == 1) cmpf = 4; 197296c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 197396c5ddc4Srjs } 197496c5ddc4Srjs 197596c5ddc4Srjs unsigned derived_12 = 0; 197696c5ddc4Srjs if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 197796c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 197896c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3; 197996c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4; 198096c5ddc4Srjs else unreachable("No pattern match at pos 12"); 198196c5ddc4Srjs 198296c5ddc4Srjs unsigned derived_9 = 0; 198396c5ddc4Srjs if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; 198496c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; 198596c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; 198696c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; 198796c5ddc4Srjs else unreachable("No pattern match at pos 9"); 198896c5ddc4Srjs 198996c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 199096c5ddc4Srjs} 199196c5ddc4Srjs 199296c5ddc4Srjsstatic inline unsigned 199396c5ddc4Srjsbi_pack_add_branch_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 199496c5ddc4Srjs{ 199596c5ddc4Srjs assert((1 << src2) & 0xf7); 199696c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 199796c5ddc4Srjs assert(I->src[0].swizzle < 13); 199896c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 199996c5ddc4Srjs assert(widen0 < 4); 200096c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 200196c5ddc4Srjs assert(I->src[1].swizzle < 13); 200296c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 200396c5ddc4Srjs assert(widen1 < 4); 200496c5ddc4Srjs unsigned cmpf = I->cmpf; 200596c5ddc4Srjs assert(cmpf < 8); 200696c5ddc4Srjs if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { 200796c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 200896c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 200996c5ddc4Srjs if (cmpf == 4) cmpf = 1; 201096c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 201196c5ddc4Srjs else if (cmpf == 1) cmpf = 4; 201296c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 201396c5ddc4Srjs } 201496c5ddc4Srjs 201596c5ddc4Srjs unsigned derived_12 = 0; 201696c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 201796c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5; 201896c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6; 201996c5ddc4Srjs else unreachable("No pattern match at pos 12"); 202096c5ddc4Srjs 202196c5ddc4Srjs unsigned derived_9 = 0; 202296c5ddc4Srjs if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1; 202396c5ddc4Srjs else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2; 202496c5ddc4Srjs else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3; 202596c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; 202696c5ddc4Srjs else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; 202796c5ddc4Srjs else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; 202896c5ddc4Srjs else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; 202996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 203096c5ddc4Srjs 203196c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 203296c5ddc4Srjs} 203396c5ddc4Srjs 203496c5ddc4Srjsstatic inline unsigned 203596c5ddc4Srjsbi_pack_add_branch_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 203696c5ddc4Srjs{ 203796c5ddc4Srjs assert((1 << src2) & 0xf7); 203896c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 203996c5ddc4Srjs assert(I->src[0].swizzle < 13); 204096c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 204196c5ddc4Srjs assert(widen0 < 4); 204296c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 204396c5ddc4Srjs assert(I->src[1].swizzle < 13); 204496c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 204596c5ddc4Srjs assert(widen1 < 4); 204696c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 204796c5ddc4Srjs assert(I->cmpf < 9); 204896c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 204996c5ddc4Srjs assert(cmpf < 2); 205096c5ddc4Srjs if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) { 205196c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 205296c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 205396c5ddc4Srjs } 205496c5ddc4Srjs 205596c5ddc4Srjs unsigned derived_12 = 0; 205696c5ddc4Srjs if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 205796c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 205896c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3; 205996c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4; 206096c5ddc4Srjs else unreachable("No pattern match at pos 12"); 206196c5ddc4Srjs 206296c5ddc4Srjs unsigned derived_9 = 0; 206396c5ddc4Srjs if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1; 206496c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4; 206596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 206696c5ddc4Srjs 206796c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 206896c5ddc4Srjs} 206996c5ddc4Srjs 207096c5ddc4Srjsstatic inline unsigned 207196c5ddc4Srjsbi_pack_add_branch_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 207296c5ddc4Srjs{ 207396c5ddc4Srjs assert((1 << src2) & 0xf7); 207496c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 207596c5ddc4Srjs assert(I->src[0].swizzle < 13); 207696c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 207796c5ddc4Srjs assert(widen0 < 4); 207896c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 207996c5ddc4Srjs assert(I->src[1].swizzle < 13); 208096c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 208196c5ddc4Srjs assert(widen1 < 4); 208296c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 208396c5ddc4Srjs assert(I->cmpf < 9); 208496c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 208596c5ddc4Srjs assert(cmpf < 2); 208696c5ddc4Srjs if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) { 208796c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 208896c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 208996c5ddc4Srjs } 209096c5ddc4Srjs 209196c5ddc4Srjs unsigned derived_12 = 0; 209296c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 209396c5ddc4Srjs else unreachable("No pattern match at pos 12"); 209496c5ddc4Srjs 209596c5ddc4Srjs unsigned derived_9 = 0; 209696c5ddc4Srjs if ((src0 == src1) && (cmpf == 0)) derived_9 = 1; 209796c5ddc4Srjs else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4; 209896c5ddc4Srjs else unreachable("No pattern match at pos 9"); 209996c5ddc4Srjs 210096c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 210196c5ddc4Srjs} 210296c5ddc4Srjs 210396c5ddc4Srjsstatic inline unsigned 210496c5ddc4Srjsbi_pack_add_branch_s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 210596c5ddc4Srjs{ 210696c5ddc4Srjs assert((1 << src2) & 0xf7); 210796c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 210896c5ddc4Srjs assert(I->src[0].swizzle < 13); 210996c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 211096c5ddc4Srjs assert(widen0 < 4); 211196c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 211296c5ddc4Srjs assert(I->src[1].swizzle < 13); 211396c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 211496c5ddc4Srjs assert(widen1 < 4); 211596c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 211696c5ddc4Srjs assert(I->cmpf < 9); 211796c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 211896c5ddc4Srjs assert(cmpf < 4); 211996c5ddc4Srjs if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) { 212096c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 212196c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 212296c5ddc4Srjs if (cmpf == 2) cmpf = 0; 212396c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 212496c5ddc4Srjs else if (cmpf == 0) cmpf = 2; 212596c5ddc4Srjs else if (cmpf == 1) cmpf = 3; 212696c5ddc4Srjs } 212796c5ddc4Srjs 212896c5ddc4Srjs unsigned derived_12 = 0; 212996c5ddc4Srjs if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 213096c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 213196c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4; 213296c5ddc4Srjs else unreachable("No pattern match at pos 12"); 213396c5ddc4Srjs 213496c5ddc4Srjs unsigned derived_9 = 0; 213596c5ddc4Srjs if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0; 213696c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; 213796c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2; 213896c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3; 213996c5ddc4Srjs else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; 214096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 214196c5ddc4Srjs 214296c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 214396c5ddc4Srjs} 214496c5ddc4Srjs 214596c5ddc4Srjsstatic inline unsigned 214696c5ddc4Srjsbi_pack_add_branch_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 214796c5ddc4Srjs{ 214896c5ddc4Srjs assert((1 << src2) & 0xf7); 214996c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 215096c5ddc4Srjs assert(I->src[0].swizzle < 13); 215196c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 215296c5ddc4Srjs assert(widen0 < 4); 215396c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 215496c5ddc4Srjs assert(I->src[1].swizzle < 13); 215596c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 215696c5ddc4Srjs assert(widen1 < 4); 215796c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 215896c5ddc4Srjs assert(I->cmpf < 9); 215996c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 216096c5ddc4Srjs assert(cmpf < 4); 216196c5ddc4Srjs if (src0 > src1) { 216296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 216396c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 216496c5ddc4Srjs if (cmpf == 2) cmpf = 0; 216596c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 216696c5ddc4Srjs else if (cmpf == 0) cmpf = 2; 216796c5ddc4Srjs else if (cmpf == 1) cmpf = 3; 216896c5ddc4Srjs } 216996c5ddc4Srjs 217096c5ddc4Srjs unsigned derived_12 = 0; 217196c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 217296c5ddc4Srjs else unreachable("No pattern match at pos 12"); 217396c5ddc4Srjs 217496c5ddc4Srjs unsigned derived_9 = 0; 217596c5ddc4Srjs if ((src0 < src1) && (cmpf == 2)) derived_9 = 0; 217696c5ddc4Srjs else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; 217796c5ddc4Srjs else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2; 217896c5ddc4Srjs else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3; 217996c5ddc4Srjs else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; 218096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 218196c5ddc4Srjs 218296c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 218396c5ddc4Srjs} 218496c5ddc4Srjs 218596c5ddc4Srjsstatic inline unsigned 218696c5ddc4Srjsbi_pack_add_branch_u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 218796c5ddc4Srjs{ 218896c5ddc4Srjs assert((1 << src2) & 0xf7); 218996c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 219096c5ddc4Srjs assert(I->src[0].swizzle < 13); 219196c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 219296c5ddc4Srjs assert(widen0 < 4); 219396c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 219496c5ddc4Srjs assert(I->src[1].swizzle < 13); 219596c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 219696c5ddc4Srjs assert(widen1 < 4); 219796c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 219896c5ddc4Srjs assert(I->cmpf < 9); 219996c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 220096c5ddc4Srjs assert(cmpf < 4); 220196c5ddc4Srjs if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) { 220296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 220396c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 220496c5ddc4Srjs if (cmpf == 2) cmpf = 0; 220596c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 220696c5ddc4Srjs else if (cmpf == 0) cmpf = 2; 220796c5ddc4Srjs else if (cmpf == 1) cmpf = 3; 220896c5ddc4Srjs } 220996c5ddc4Srjs 221096c5ddc4Srjs unsigned derived_12 = 0; 221196c5ddc4Srjs if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 221296c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 221396c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3; 221496c5ddc4Srjs else unreachable("No pattern match at pos 12"); 221596c5ddc4Srjs 221696c5ddc4Srjs unsigned derived_9 = 0; 221796c5ddc4Srjs if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0; 221896c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1; 221996c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2; 222096c5ddc4Srjs else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3; 222196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 222296c5ddc4Srjs 222396c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 222496c5ddc4Srjs} 222596c5ddc4Srjs 222696c5ddc4Srjsstatic inline unsigned 222796c5ddc4Srjsbi_pack_add_branch_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 222896c5ddc4Srjs{ 222996c5ddc4Srjs assert((1 << src2) & 0xf7); 223096c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 223196c5ddc4Srjs assert(I->src[0].swizzle < 13); 223296c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 223396c5ddc4Srjs assert(widen0 < 4); 223496c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 223596c5ddc4Srjs assert(I->src[1].swizzle < 13); 223696c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 223796c5ddc4Srjs assert(widen1 < 4); 223896c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 223996c5ddc4Srjs assert(I->cmpf < 9); 224096c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 224196c5ddc4Srjs assert(cmpf < 4); 224296c5ddc4Srjs if (src0 < src1) { 224396c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 224496c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 224596c5ddc4Srjs if (cmpf == 2) cmpf = 0; 224696c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 224796c5ddc4Srjs else if (cmpf == 0) cmpf = 2; 224896c5ddc4Srjs else if (cmpf == 1) cmpf = 3; 224996c5ddc4Srjs } 225096c5ddc4Srjs 225196c5ddc4Srjs unsigned derived_12 = 0; 225296c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 225396c5ddc4Srjs else unreachable("No pattern match at pos 12"); 225496c5ddc4Srjs 225596c5ddc4Srjs unsigned derived_9 = 0; 225696c5ddc4Srjs if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0; 225796c5ddc4Srjs else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1; 225896c5ddc4Srjs else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2; 225996c5ddc4Srjs else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3; 226096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 226196c5ddc4Srjs 226296c5ddc4Srjs return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 226396c5ddc4Srjs} 226496c5ddc4Srjs 226596c5ddc4Srjsstatic inline unsigned 226696c5ddc4Srjsbi_pack_add_branchc_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 226796c5ddc4Srjs{ 226896c5ddc4Srjs assert((1 << src1) & 0xf7); 226996c5ddc4Srjs unsigned combine = I->combine; 227096c5ddc4Srjs assert(combine < 2); 227196c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 227296c5ddc4Srjs assert(I->src[0].swizzle < 13); 227396c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 227496c5ddc4Srjs assert(lane0 < 2); 227596c5ddc4Srjs unsigned derived_9 = 0; 227696c5ddc4Srjs if (lane0 == 0) derived_9 = 0; 227796c5ddc4Srjs else if (lane0 == 1) derived_9 = 1; 227896c5ddc4Srjs else unreachable("No pattern match at pos 9"); 227996c5ddc4Srjs 228096c5ddc4Srjs unsigned derived_3 = 0; 228196c5ddc4Srjs if (lane0 == 1) derived_3 = 0; 228296c5ddc4Srjs else if (lane0 == 0) derived_3 = 1; 228396c5ddc4Srjs else unreachable("No pattern match at pos 3"); 228496c5ddc4Srjs 228596c5ddc4Srjs return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3); 228696c5ddc4Srjs} 228796c5ddc4Srjs 228896c5ddc4Srjsstatic inline unsigned 228996c5ddc4Srjsbi_pack_add_branchc_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 229096c5ddc4Srjs{ 229196c5ddc4Srjs assert((1 << src1) & 0xf7); 229296c5ddc4Srjs unsigned combine = I->combine; 229396c5ddc4Srjs assert(combine < 2); 229496c5ddc4Srjs return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10); 229596c5ddc4Srjs} 229696c5ddc4Srjs 229796c5ddc4Srjsstatic inline unsigned 229896c5ddc4Srjsbi_pack_add_branchz_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 229996c5ddc4Srjs{ 230096c5ddc4Srjs assert((1 << src1) & 0xf7); 230196c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 230296c5ddc4Srjs assert(I->src[0].swizzle < 13); 230396c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 230496c5ddc4Srjs assert(widen0 < 4); 230596c5ddc4Srjs unsigned cmpf = I->cmpf; 230696c5ddc4Srjs assert(cmpf < 8); 230796c5ddc4Srjs unsigned derived_4 = 0; 230896c5ddc4Srjs if (widen0 == 2) derived_4 = 1; 230996c5ddc4Srjs else if (widen0 == 1) derived_4 = 2; 231096c5ddc4Srjs else unreachable("No pattern match at pos 4"); 231196c5ddc4Srjs 231296c5ddc4Srjs unsigned derived_3 = 0; 231396c5ddc4Srjs if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; 231496c5ddc4Srjs else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; 231596c5ddc4Srjs else unreachable("No pattern match at pos 3"); 231696c5ddc4Srjs 231796c5ddc4Srjs unsigned derived_9 = 0; 231896c5ddc4Srjs if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; 231996c5ddc4Srjs else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; 232096c5ddc4Srjs else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; 232196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 232296c5ddc4Srjs 232396c5ddc4Srjs return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9); 232496c5ddc4Srjs} 232596c5ddc4Srjs 232696c5ddc4Srjsstatic inline unsigned 232796c5ddc4Srjsbi_pack_add_branchz_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 232896c5ddc4Srjs{ 232996c5ddc4Srjs assert((1 << src1) & 0xf7); 233096c5ddc4Srjs unsigned cmpf = I->cmpf; 233196c5ddc4Srjs assert(cmpf < 8); 233296c5ddc4Srjs unsigned derived_3 = 0; 233396c5ddc4Srjs if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; 233496c5ddc4Srjs else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; 233596c5ddc4Srjs else unreachable("No pattern match at pos 3"); 233696c5ddc4Srjs 233796c5ddc4Srjs unsigned derived_9 = 0; 233896c5ddc4Srjs if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; 233996c5ddc4Srjs else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; 234096c5ddc4Srjs else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; 234196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 234296c5ddc4Srjs 234396c5ddc4Srjs return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9); 234496c5ddc4Srjs} 234596c5ddc4Srjs 234696c5ddc4Srjsstatic inline unsigned 234796c5ddc4Srjsbi_pack_add_branchz_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 234896c5ddc4Srjs{ 234996c5ddc4Srjs assert((1 << src1) & 0xf7); 235096c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 235196c5ddc4Srjs assert(I->src[0].swizzle < 13); 235296c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 235396c5ddc4Srjs assert(widen0 < 4); 235496c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 235596c5ddc4Srjs assert(I->cmpf < 9); 235696c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 235796c5ddc4Srjs assert(cmpf < 2); 235896c5ddc4Srjs unsigned derived_4 = 0; 235996c5ddc4Srjs if (widen0 == 2) derived_4 = 1; 236096c5ddc4Srjs else if (widen0 == 1) derived_4 = 2; 236196c5ddc4Srjs else unreachable("No pattern match at pos 4"); 236296c5ddc4Srjs 236396c5ddc4Srjs unsigned derived_3 = 0; 236496c5ddc4Srjs if (cmpf == 1) derived_3 = 0; 236596c5ddc4Srjs else if (cmpf == 0) derived_3 = 1; 236696c5ddc4Srjs else unreachable("No pattern match at pos 3"); 236796c5ddc4Srjs 236896c5ddc4Srjs return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3); 236996c5ddc4Srjs} 237096c5ddc4Srjs 237196c5ddc4Srjsstatic inline unsigned 237296c5ddc4Srjsbi_pack_add_branchz_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 237396c5ddc4Srjs{ 237496c5ddc4Srjs assert((1 << src1) & 0xf7); 237596c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 237696c5ddc4Srjs assert(I->cmpf < 9); 237796c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 237896c5ddc4Srjs assert(cmpf < 2); 237996c5ddc4Srjs unsigned derived_3 = 0; 238096c5ddc4Srjs if (cmpf == 1) derived_3 = 0; 238196c5ddc4Srjs else if (cmpf == 0) derived_3 = 1; 238296c5ddc4Srjs else unreachable("No pattern match at pos 3"); 238396c5ddc4Srjs 238496c5ddc4Srjs return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3); 238596c5ddc4Srjs} 238696c5ddc4Srjs 238796c5ddc4Srjsstatic inline unsigned 238896c5ddc4Srjsbi_pack_add_branchz_s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 238996c5ddc4Srjs{ 239096c5ddc4Srjs assert((1 << src1) & 0xf7); 239196c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 239296c5ddc4Srjs assert(I->src[0].swizzle < 13); 239396c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 239496c5ddc4Srjs assert(widen0 < 4); 239596c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 239696c5ddc4Srjs assert(I->cmpf < 9); 239796c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 239896c5ddc4Srjs assert(cmpf < 4); 239996c5ddc4Srjs unsigned derived_4 = 0; 240096c5ddc4Srjs if (widen0 == 2) derived_4 = 1; 240196c5ddc4Srjs else if (widen0 == 1) derived_4 = 2; 240296c5ddc4Srjs else unreachable("No pattern match at pos 4"); 240396c5ddc4Srjs 240496c5ddc4Srjs unsigned derived_9 = 0; 240596c5ddc4Srjs if (cmpf == 2) derived_9 = 0; 240696c5ddc4Srjs else if (cmpf == 3) derived_9 = 1; 240796c5ddc4Srjs else if (cmpf == 1) derived_9 = 2; 240896c5ddc4Srjs else if (cmpf == 0) derived_9 = 3; 240996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 241096c5ddc4Srjs 241196c5ddc4Srjs return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); 241296c5ddc4Srjs} 241396c5ddc4Srjs 241496c5ddc4Srjsstatic inline unsigned 241596c5ddc4Srjsbi_pack_add_branchz_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 241696c5ddc4Srjs{ 241796c5ddc4Srjs assert((1 << src1) & 0xf7); 241896c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 241996c5ddc4Srjs assert(I->cmpf < 9); 242096c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 242196c5ddc4Srjs assert(cmpf < 4); 242296c5ddc4Srjs unsigned derived_9 = 0; 242396c5ddc4Srjs if (cmpf == 2) derived_9 = 0; 242496c5ddc4Srjs else if (cmpf == 3) derived_9 = 1; 242596c5ddc4Srjs else if (cmpf == 1) derived_9 = 2; 242696c5ddc4Srjs else if (cmpf == 0) derived_9 = 3; 242796c5ddc4Srjs else unreachable("No pattern match at pos 9"); 242896c5ddc4Srjs 242996c5ddc4Srjs return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); 243096c5ddc4Srjs} 243196c5ddc4Srjs 243296c5ddc4Srjsstatic inline unsigned 243396c5ddc4Srjsbi_pack_add_branchz_u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 243496c5ddc4Srjs{ 243596c5ddc4Srjs assert((1 << src1) & 0xf7); 243696c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 243796c5ddc4Srjs assert(I->src[0].swizzle < 13); 243896c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 243996c5ddc4Srjs assert(widen0 < 4); 244096c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 244196c5ddc4Srjs assert(I->cmpf < 9); 244296c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 244396c5ddc4Srjs assert(cmpf < 4); 244496c5ddc4Srjs unsigned derived_4 = 0; 244596c5ddc4Srjs if (widen0 == 2) derived_4 = 1; 244696c5ddc4Srjs else if (widen0 == 1) derived_4 = 2; 244796c5ddc4Srjs else unreachable("No pattern match at pos 4"); 244896c5ddc4Srjs 244996c5ddc4Srjs unsigned derived_9 = 0; 245096c5ddc4Srjs if (cmpf == 2) derived_9 = 0; 245196c5ddc4Srjs else if (cmpf == 3) derived_9 = 1; 245296c5ddc4Srjs else if (cmpf == 1) derived_9 = 2; 245396c5ddc4Srjs else if (cmpf == 0) derived_9 = 3; 245496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 245596c5ddc4Srjs 245696c5ddc4Srjs return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); 245796c5ddc4Srjs} 245896c5ddc4Srjs 245996c5ddc4Srjsstatic inline unsigned 246096c5ddc4Srjsbi_pack_add_branchz_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 246196c5ddc4Srjs{ 246296c5ddc4Srjs assert((1 << src1) & 0xf7); 246396c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 246496c5ddc4Srjs assert(I->cmpf < 9); 246596c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 246696c5ddc4Srjs assert(cmpf < 4); 246796c5ddc4Srjs unsigned derived_9 = 0; 246896c5ddc4Srjs if (cmpf == 2) derived_9 = 0; 246996c5ddc4Srjs else if (cmpf == 3) derived_9 = 1; 247096c5ddc4Srjs else if (cmpf == 1) derived_9 = 2; 247196c5ddc4Srjs else if (cmpf == 0) derived_9 = 3; 247296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 247396c5ddc4Srjs 247496c5ddc4Srjs return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); 247596c5ddc4Srjs} 247696c5ddc4Srjs 247796c5ddc4Srjsstatic inline unsigned 247896c5ddc4Srjsbi_pack_add_branch_diverg(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 247996c5ddc4Srjs{ 248096c5ddc4Srjs assert((1 << src0) & 0xf7); 248196c5ddc4Srjs return 0x6f83c | (src0 << 6); 248296c5ddc4Srjs} 248396c5ddc4Srjs 248496c5ddc4Srjsstatic inline unsigned 248596c5ddc4Srjsbi_pack_add_branch_lowbits_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 248696c5ddc4Srjs{ 248796c5ddc4Srjs assert((1 << src1) & 0xf7); 248896c5ddc4Srjs return 0x6fa38 | (src0 << 0) | (src1 << 6); 248996c5ddc4Srjs} 249096c5ddc4Srjs 249196c5ddc4Srjsstatic inline unsigned 249296c5ddc4Srjsbi_pack_add_branch_no_diverg(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 249396c5ddc4Srjs{ 249496c5ddc4Srjs assert((1 << src0) & 0xf7); 249596c5ddc4Srjs return 0x6fa34 | (src0 << 6); 249696c5ddc4Srjs} 249796c5ddc4Srjs 249896c5ddc4Srjsstatic inline unsigned 249996c5ddc4Srjsbi_pack_add_clper_v6_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 250096c5ddc4Srjs{ 250196c5ddc4Srjs assert((1 << src0) & 0x7); 250296c5ddc4Srjs return 0x3f0c0 | (src0 << 0) | (src1 << 3); 250396c5ddc4Srjs} 250496c5ddc4Srjs 250596c5ddc4Srjsstatic inline unsigned 250696c5ddc4Srjsbi_pack_add_clper_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 250796c5ddc4Srjs{ 250896c5ddc4Srjs assert((1 << src0) & 0x7); 250996c5ddc4Srjs unsigned lane_op = I->lane_op; 251096c5ddc4Srjs assert(lane_op < 4); 251196c5ddc4Srjs unsigned subgroup = I->subgroup; 251296c5ddc4Srjs assert(subgroup < 4); 251396c5ddc4Srjs unsigned inactive_result = I->inactive_result; 251496c5ddc4Srjs assert(inactive_result < 16); 251596c5ddc4Srjs return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10); 251696c5ddc4Srjs} 251796c5ddc4Srjs 251896c5ddc4Srjsstatic inline unsigned 251996c5ddc4Srjsbi_pack_add_cubeface2(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 252096c5ddc4Srjs{ 252196c5ddc4Srjs 252296c5ddc4Srjs return 0x3de58 | (src0 << 0); 252396c5ddc4Srjs} 252496c5ddc4Srjs 252596c5ddc4Srjsstatic inline unsigned 252696c5ddc4Srjsbi_pack_add_cube_ssel(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 252796c5ddc4Srjs{ 252896c5ddc4Srjs unsigned neg0 = I->src[0].neg; 252996c5ddc4Srjs assert(neg0 < 2); 253096c5ddc4Srjs unsigned neg1 = I->src[1].neg; 253196c5ddc4Srjs assert(neg1 < 2); 253296c5ddc4Srjs unsigned derived_9 = 0; 253396c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; 253496c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; 253596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 253696c5ddc4Srjs 253796c5ddc4Srjs return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 253896c5ddc4Srjs} 253996c5ddc4Srjs 254096c5ddc4Srjsstatic inline unsigned 254196c5ddc4Srjsbi_pack_add_cube_tsel(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 254296c5ddc4Srjs{ 254396c5ddc4Srjs unsigned neg0 = I->src[0].neg; 254496c5ddc4Srjs assert(neg0 < 2); 254596c5ddc4Srjs unsigned neg1 = I->src[1].neg; 254696c5ddc4Srjs assert(neg1 < 2); 254796c5ddc4Srjs unsigned derived_9 = 0; 254896c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; 254996c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; 255096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 255196c5ddc4Srjs 255296c5ddc4Srjs return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 255396c5ddc4Srjs} 255496c5ddc4Srjs 255596c5ddc4Srjsstatic inline unsigned 255696c5ddc4Srjsbi_pack_add_discard_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 255796c5ddc4Srjs{ 255896c5ddc4Srjs unsigned cmpf = I->cmpf; 255996c5ddc4Srjs assert(cmpf < 8); 256096c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 256196c5ddc4Srjs assert(I->src[0].swizzle < 13); 256296c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 256396c5ddc4Srjs assert(widen0 < 4); 256496c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 256596c5ddc4Srjs assert(I->src[1].swizzle < 13); 256696c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 256796c5ddc4Srjs assert(widen1 < 4); 256896c5ddc4Srjs if ((cmpf == 1) || (cmpf == 2)) { 256996c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 257096c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 257196c5ddc4Srjs if (cmpf == 1) cmpf = 4; 257296c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 257396c5ddc4Srjs } 257496c5ddc4Srjs 257596c5ddc4Srjs unsigned derived_6 = 0; 257696c5ddc4Srjs if (cmpf == 0) derived_6 = 0; 257796c5ddc4Srjs else if (cmpf == 3) derived_6 = 1; 257896c5ddc4Srjs else if (cmpf == 4) derived_6 = 2; 257996c5ddc4Srjs else if (cmpf == 5) derived_6 = 3; 258096c5ddc4Srjs else unreachable("No pattern match at pos 6"); 258196c5ddc4Srjs 258296c5ddc4Srjs unsigned derived_8 = 0; 258396c5ddc4Srjs if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0; 258496c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1; 258596c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2; 258696c5ddc4Srjs else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3; 258796c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4; 258896c5ddc4Srjs else unreachable("No pattern match at pos 8"); 258996c5ddc4Srjs 259096c5ddc4Srjs return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8); 259196c5ddc4Srjs} 259296c5ddc4Srjs 259396c5ddc4Srjsstatic inline unsigned 259496c5ddc4Srjsbi_pack_add_f16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 259596c5ddc4Srjs{ 259696c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 259796c5ddc4Srjs assert(I->src[0].swizzle < 13); 259896c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 259996c5ddc4Srjs assert(lane0 < 2); 260096c5ddc4Srjs return 0x3cd10 | (src0 << 0) | (lane0 << 3); 260196c5ddc4Srjs} 260296c5ddc4Srjs 260396c5ddc4Srjsstatic inline unsigned 260496c5ddc4Srjsbi_pack_add_f16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 260596c5ddc4Srjs{ 260696c5ddc4Srjs unsigned round = I->round; 260796c5ddc4Srjs assert(round < 8); 260896c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 260996c5ddc4Srjs assert(I->src[0].swizzle < 13); 261096c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 261196c5ddc4Srjs assert(lane0 < 2); 261296c5ddc4Srjs if (round != 4) { 261396c5ddc4Srjs unsigned derived_4 = 0; 261496c5ddc4Srjs if (round == 0) derived_4 = 0; 261596c5ddc4Srjs else if (round == 1) derived_4 = 1; 261696c5ddc4Srjs else if (round == 2) derived_4 = 2; 261796c5ddc4Srjs else if (round == 3) derived_4 = 3; 261896c5ddc4Srjs else unreachable("No pattern match at pos 4"); 261996c5ddc4Srjs 262096c5ddc4Srjs return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); 262196c5ddc4Srjs } else if (round == 4) { 262296c5ddc4Srjs return 0x3cc40 | (src0 << 0) | (lane0 << 5); 262396c5ddc4Srjs } else { 262496c5ddc4Srjs unreachable("No matching state found in add_f16_to_s32"); 262596c5ddc4Srjs } 262696c5ddc4Srjs} 262796c5ddc4Srjs 262896c5ddc4Srjsstatic inline unsigned 262996c5ddc4Srjsbi_pack_add_f16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 263096c5ddc4Srjs{ 263196c5ddc4Srjs unsigned round = I->round; 263296c5ddc4Srjs assert(round < 8); 263396c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 263496c5ddc4Srjs assert(I->src[0].swizzle < 13); 263596c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 263696c5ddc4Srjs assert(lane0 < 2); 263796c5ddc4Srjs if (round != 4) { 263896c5ddc4Srjs unsigned derived_4 = 0; 263996c5ddc4Srjs if (round == 0) derived_4 = 0; 264096c5ddc4Srjs else if (round == 1) derived_4 = 1; 264196c5ddc4Srjs else if (round == 2) derived_4 = 2; 264296c5ddc4Srjs else if (round == 3) derived_4 = 3; 264396c5ddc4Srjs else unreachable("No pattern match at pos 4"); 264496c5ddc4Srjs 264596c5ddc4Srjs return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); 264696c5ddc4Srjs } else if (round == 4) { 264796c5ddc4Srjs return 0x3cc48 | (src0 << 0) | (lane0 << 5); 264896c5ddc4Srjs } else { 264996c5ddc4Srjs unreachable("No matching state found in add_f16_to_u32"); 265096c5ddc4Srjs } 265196c5ddc4Srjs} 265296c5ddc4Srjs 265396c5ddc4Srjsstatic inline unsigned 265496c5ddc4Srjsbi_pack_add_f32_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 265596c5ddc4Srjs{ 265696c5ddc4Srjs unsigned round = I->round; 265796c5ddc4Srjs assert(round < 8); 265896c5ddc4Srjs if (round != 4) { 265996c5ddc4Srjs unsigned derived_4 = 0; 266096c5ddc4Srjs if (round == 0) derived_4 = 0; 266196c5ddc4Srjs else if (round == 1) derived_4 = 1; 266296c5ddc4Srjs else if (round == 2) derived_4 = 2; 266396c5ddc4Srjs else if (round == 3) derived_4 = 3; 266496c5ddc4Srjs else unreachable("No pattern match at pos 4"); 266596c5ddc4Srjs 266696c5ddc4Srjs return 0x3c980 | (src0 << 0) | (derived_4 << 4); 266796c5ddc4Srjs } else if (round == 4) { 266896c5ddc4Srjs return 0x3cca0 | (src0 << 0); 266996c5ddc4Srjs } else { 267096c5ddc4Srjs unreachable("No matching state found in add_f32_to_s32"); 267196c5ddc4Srjs } 267296c5ddc4Srjs} 267396c5ddc4Srjs 267496c5ddc4Srjsstatic inline unsigned 267596c5ddc4Srjsbi_pack_add_f32_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 267696c5ddc4Srjs{ 267796c5ddc4Srjs unsigned round = I->round; 267896c5ddc4Srjs assert(round < 8); 267996c5ddc4Srjs if (round != 4) { 268096c5ddc4Srjs unsigned derived_4 = 0; 268196c5ddc4Srjs if (round == 0) derived_4 = 0; 268296c5ddc4Srjs else if (round == 1) derived_4 = 1; 268396c5ddc4Srjs else if (round == 2) derived_4 = 2; 268496c5ddc4Srjs else if (round == 3) derived_4 = 3; 268596c5ddc4Srjs else unreachable("No pattern match at pos 4"); 268696c5ddc4Srjs 268796c5ddc4Srjs return 0x3c988 | (src0 << 0) | (derived_4 << 4); 268896c5ddc4Srjs } else if (round == 4) { 268996c5ddc4Srjs return 0x3cca8 | (src0 << 0); 269096c5ddc4Srjs } else { 269196c5ddc4Srjs unreachable("No matching state found in add_f32_to_u32"); 269296c5ddc4Srjs } 269396c5ddc4Srjs} 269496c5ddc4Srjs 269596c5ddc4Srjsstatic inline unsigned 269696c5ddc4Srjsbi_pack_add_fadd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 269796c5ddc4Srjs{ 269896c5ddc4Srjs static uint8_t round_table[] = { 0, 1, 2, 3, ~0, 5, 4, ~0, ~0 }; 269996c5ddc4Srjs assert(I->round < 9); 270096c5ddc4Srjs unsigned round = round_table[I->round]; 270196c5ddc4Srjs assert(round < 8); 270296c5ddc4Srjs unsigned abs1 = I->src[1].abs; 270396c5ddc4Srjs assert(abs1 < 2); 270496c5ddc4Srjs unsigned neg0 = I->src[0].neg; 270596c5ddc4Srjs assert(neg0 < 2); 270696c5ddc4Srjs unsigned neg1 = I->src[1].neg; 270796c5ddc4Srjs assert(neg1 < 2); 270896c5ddc4Srjs unsigned clamp = I->clamp; 270996c5ddc4Srjs assert(clamp < 4); 271096c5ddc4Srjs unsigned abs0 = I->src[0].abs; 271196c5ddc4Srjs assert(abs0 < 2); 271296c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 271396c5ddc4Srjs assert(I->src[0].swizzle < 13); 271496c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 271596c5ddc4Srjs assert(widen0 < 4); 271696c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 271796c5ddc4Srjs assert(I->src[1].swizzle < 13); 271896c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 271996c5ddc4Srjs assert(widen1 < 4); 272096c5ddc4Srjs if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { 272196c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 272296c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 272396c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 272496c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 272596c5ddc4Srjs } 272696c5ddc4Srjs 272796c5ddc4Srjs if (round != 4) { 272896c5ddc4Srjs unsigned derived_13 = 0; 272996c5ddc4Srjs if (round == 0) derived_13 = 0; 273096c5ddc4Srjs else if (round == 1) derived_13 = 1; 273196c5ddc4Srjs else if (round == 2) derived_13 = 2; 273296c5ddc4Srjs else if (round == 3) derived_13 = 3; 273396c5ddc4Srjs else unreachable("No pattern match at pos 13"); 273496c5ddc4Srjs 273596c5ddc4Srjs unsigned derived_9 = 0; 273696c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 273796c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 273896c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 273996c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 274096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 274196c5ddc4Srjs 274296c5ddc4Srjs return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9); 274396c5ddc4Srjs } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) { 274496c5ddc4Srjs return 0x75200 | (src0 << 0) | (src1 << 3); 274596c5ddc4Srjs } else { 274696c5ddc4Srjs unreachable("No matching state found in add_fadd_f32"); 274796c5ddc4Srjs } 274896c5ddc4Srjs} 274996c5ddc4Srjs 275096c5ddc4Srjsstatic inline unsigned 275196c5ddc4Srjsbi_pack_add_fadd_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 275296c5ddc4Srjs{ 275396c5ddc4Srjs unsigned abs1 = I->src[1].abs; 275496c5ddc4Srjs assert(abs1 < 2); 275596c5ddc4Srjs unsigned neg0 = I->src[0].neg; 275696c5ddc4Srjs assert(neg0 < 2); 275796c5ddc4Srjs unsigned neg1 = I->src[1].neg; 275896c5ddc4Srjs assert(neg1 < 2); 275996c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 276096c5ddc4Srjs assert(I->src[0].swizzle < 13); 276196c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 276296c5ddc4Srjs assert(swz0 < 4); 276396c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 276496c5ddc4Srjs assert(I->src[1].swizzle < 13); 276596c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 276696c5ddc4Srjs assert(swz1 < 4); 276796c5ddc4Srjs unsigned round = I->round; 276896c5ddc4Srjs assert(round < 4); 276996c5ddc4Srjs unsigned abs0 = I->src[0].abs; 277096c5ddc4Srjs assert(abs0 < 2); 277196c5ddc4Srjs return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15); 277296c5ddc4Srjs} 277396c5ddc4Srjs 277496c5ddc4Srjsstatic inline unsigned 277596c5ddc4Srjsbi_pack_add_fadd_rscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 277696c5ddc4Srjs{ 277796c5ddc4Srjs static uint8_t clamp_table[] = { 0, ~0, ~0, 1 }; 277896c5ddc4Srjs assert(I->clamp < 4); 277996c5ddc4Srjs unsigned clamp = clamp_table[I->clamp]; 278096c5ddc4Srjs assert(clamp < 2); 278196c5ddc4Srjs unsigned special = I->special; 278296c5ddc4Srjs assert(special < 2); 278396c5ddc4Srjs unsigned round = I->round; 278496c5ddc4Srjs assert(round < 8); 278596c5ddc4Srjs unsigned abs1 = I->src[1].abs; 278696c5ddc4Srjs assert(abs1 < 2); 278796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 278896c5ddc4Srjs assert(neg0 < 2); 278996c5ddc4Srjs unsigned neg1 = I->src[1].neg; 279096c5ddc4Srjs assert(neg1 < 2); 279196c5ddc4Srjs unsigned abs0 = I->src[0].abs; 279296c5ddc4Srjs assert(abs0 < 2); 279396c5ddc4Srjs unsigned derived_9 = 0; 279496c5ddc4Srjs if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0; 279596c5ddc4Srjs else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2; 279696c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3; 279796c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4; 279896c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5; 279996c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6; 280096c5ddc4Srjs else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7; 280196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 280296c5ddc4Srjs 280396c5ddc4Srjs return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9); 280496c5ddc4Srjs} 280596c5ddc4Srjs 280696c5ddc4Srjsstatic inline unsigned 280796c5ddc4Srjsbi_pack_add_fcmp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 280896c5ddc4Srjs{ 280996c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 281096c5ddc4Srjs assert(I->src[0].swizzle < 13); 281196c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 281296c5ddc4Srjs assert(widen0 < 4); 281396c5ddc4Srjs static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 281496c5ddc4Srjs assert(I->src[1].swizzle < 13); 281596c5ddc4Srjs unsigned widen1 = widen1_table[I->src[1].swizzle]; 281696c5ddc4Srjs assert(widen1 < 4); 281796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 281896c5ddc4Srjs assert(neg0 < 2); 281996c5ddc4Srjs unsigned neg1 = I->src[1].neg; 282096c5ddc4Srjs assert(neg1 < 2); 282196c5ddc4Srjs static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 282296c5ddc4Srjs assert(I->cmpf < 9); 282396c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 282496c5ddc4Srjs assert(cmpf < 8); 282596c5ddc4Srjs unsigned abs0 = I->src[0].abs; 282696c5ddc4Srjs assert(abs0 < 2); 282796c5ddc4Srjs unsigned abs1 = I->src[1].abs; 282896c5ddc4Srjs assert(abs1 < 2); 282996c5ddc4Srjs unsigned result_type = I->result_type; 283096c5ddc4Srjs assert(result_type < 4); 283196c5ddc4Srjs if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { 283296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 283396c5ddc4Srjs { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 283496c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 283596c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 283696c5ddc4Srjs if (cmpf == 4) cmpf = 1; 283796c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 283896c5ddc4Srjs else if (cmpf == 1) cmpf = 4; 283996c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 284096c5ddc4Srjs } 284196c5ddc4Srjs 284296c5ddc4Srjs unsigned derived_9 = 0; 284396c5ddc4Srjs if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 284496c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 284596c5ddc4Srjs else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 284696c5ddc4Srjs else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 284796c5ddc4Srjs else unreachable("No pattern match at pos 9"); 284896c5ddc4Srjs 284996c5ddc4Srjs unsigned derived_13 = 0; 285096c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; 285196c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; 285296c5ddc4Srjs else unreachable("No pattern match at pos 13"); 285396c5ddc4Srjs 285496c5ddc4Srjs return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13); 285596c5ddc4Srjs} 285696c5ddc4Srjs 285796c5ddc4Srjsstatic inline unsigned 285896c5ddc4Srjsbi_pack_add_fcmp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 285996c5ddc4Srjs{ 286096c5ddc4Srjs unsigned neg0 = I->src[0].neg; 286196c5ddc4Srjs assert(neg0 < 2); 286296c5ddc4Srjs unsigned neg1 = I->src[1].neg; 286396c5ddc4Srjs assert(neg1 < 2); 286496c5ddc4Srjs static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 286596c5ddc4Srjs assert(I->cmpf < 9); 286696c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 286796c5ddc4Srjs assert(cmpf < 8); 286896c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 286996c5ddc4Srjs assert(I->src[0].swizzle < 13); 287096c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 287196c5ddc4Srjs assert(swz0 < 4); 287296c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 287396c5ddc4Srjs assert(I->src[1].swizzle < 13); 287496c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 287596c5ddc4Srjs assert(swz1 < 4); 287696c5ddc4Srjs unsigned result_type = I->result_type; 287796c5ddc4Srjs assert(result_type < 4); 287896c5ddc4Srjs if ((neg0 == 0) && (neg1 == 1)) { 287996c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 288096c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 288196c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 288296c5ddc4Srjs if (cmpf == 4) cmpf = 1; 288396c5ddc4Srjs else if (cmpf == 5) cmpf = 2; 288496c5ddc4Srjs else if (cmpf == 1) cmpf = 4; 288596c5ddc4Srjs else if (cmpf == 2) cmpf = 5; 288696c5ddc4Srjs } 288796c5ddc4Srjs 288896c5ddc4Srjs unsigned derived_13 = 0; 288996c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; 289096c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; 289196c5ddc4Srjs else unreachable("No pattern match at pos 13"); 289296c5ddc4Srjs 289396c5ddc4Srjs return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13); 289496c5ddc4Srjs} 289596c5ddc4Srjs 289696c5ddc4Srjsstatic inline unsigned 289796c5ddc4Srjsbi_pack_add_fcos_table_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 289896c5ddc4Srjs{ 289996c5ddc4Srjs assert((1 << src0) & 0xf7); 290096c5ddc4Srjs unsigned offset = I->offset; 290196c5ddc4Srjs assert(offset < 2); 290296c5ddc4Srjs return 0x67a88 | (src0 << 0) | (offset << 4); 290396c5ddc4Srjs} 290496c5ddc4Srjs 290596c5ddc4Srjsstatic inline unsigned 290696c5ddc4Srjsbi_pack_add_fexp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 290796c5ddc4Srjs{ 290896c5ddc4Srjs assert((1 << src0) & 0xf7); 290996c5ddc4Srjs assert((1 << src1) & 0xf7); 291096c5ddc4Srjs return 0x66ac0 | (src0 << 0) | (src1 << 3); 291196c5ddc4Srjs} 291296c5ddc4Srjs 291396c5ddc4Srjsstatic inline unsigned 291496c5ddc4Srjsbi_pack_add_fexp_table_u4(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 291596c5ddc4Srjs{ 291696c5ddc4Srjs assert((1 << src0) & 0xf7); 291796c5ddc4Srjs unsigned adj = I->adj; 291896c5ddc4Srjs assert(adj < 4); 291996c5ddc4Srjs return 0x67ac0 | (src0 << 0) | (adj << 3); 292096c5ddc4Srjs} 292196c5ddc4Srjs 292296c5ddc4Srjsstatic inline unsigned 292396c5ddc4Srjsbi_pack_add_flogd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 292496c5ddc4Srjs{ 292596c5ddc4Srjs assert((1 << src0) & 0xf7); 292696c5ddc4Srjs return 0x66340 | (src0 << 0); 292796c5ddc4Srjs} 292896c5ddc4Srjs 292996c5ddc4Srjsstatic inline unsigned 293096c5ddc4Srjsbi_pack_add_flog_table_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 293196c5ddc4Srjs{ 293296c5ddc4Srjs assert((1 << src0) & 0xf7); 293396c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 293496c5ddc4Srjs assert(I->src[0].swizzle < 13); 293596c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 293696c5ddc4Srjs assert(widen0 < 4); 293796c5ddc4Srjs unsigned mode = I->mode; 293896c5ddc4Srjs assert(mode < 4); 293996c5ddc4Srjs unsigned precision = I->precision; 294096c5ddc4Srjs assert(precision < 4); 294196c5ddc4Srjs unsigned neg0 = I->src[0].neg; 294296c5ddc4Srjs assert(neg0 < 2); 294396c5ddc4Srjs unsigned abs0 = I->src[0].abs; 294496c5ddc4Srjs assert(abs0 < 2); 294596c5ddc4Srjs unsigned divzero = I->divzero; 294696c5ddc4Srjs assert(divzero < 2); 294796c5ddc4Srjs if ((mode == 0) && (widen0 == 0) && (precision == 0)) { 294896c5ddc4Srjs return 0x67300 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); 294996c5ddc4Srjs } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) { 295096c5ddc4Srjs unsigned derived_7 = 0; 295196c5ddc4Srjs if (widen0 == 1) derived_7 = 0; 295296c5ddc4Srjs else if (widen0 == 2) derived_7 = 1; 295396c5ddc4Srjs else unreachable("No pattern match at pos 7"); 295496c5ddc4Srjs 295596c5ddc4Srjs return 0x67340 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); 295696c5ddc4Srjs } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) { 295796c5ddc4Srjs unsigned derived_5 = 0; 295896c5ddc4Srjs if (mode == 1) derived_5 = 0; 295996c5ddc4Srjs else if (mode == 2) derived_5 = 1; 296096c5ddc4Srjs else unreachable("No pattern match at pos 5"); 296196c5ddc4Srjs 296296c5ddc4Srjs return 0x67b00 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_5 << 5); 296396c5ddc4Srjs } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) { 296496c5ddc4Srjs unsigned derived_5 = 0; 296596c5ddc4Srjs if (mode == 1) derived_5 = 0; 296696c5ddc4Srjs else if (mode == 2) derived_5 = 1; 296796c5ddc4Srjs else unreachable("No pattern match at pos 5"); 296896c5ddc4Srjs 296996c5ddc4Srjs unsigned derived_7 = 0; 297096c5ddc4Srjs if (widen0 == 1) derived_7 = 0; 297196c5ddc4Srjs else if (widen0 == 2) derived_7 = 1; 297296c5ddc4Srjs else unreachable("No pattern match at pos 7"); 297396c5ddc4Srjs 297496c5ddc4Srjs return 0x67b40 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7); 297596c5ddc4Srjs } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg0 == 0)) { 297696c5ddc4Srjs unsigned derived_3 = 0; 297796c5ddc4Srjs if (mode == 2) derived_3 = 0; 297896c5ddc4Srjs else if (mode == 1) derived_3 = 1; 297996c5ddc4Srjs else unreachable("No pattern match at pos 3"); 298096c5ddc4Srjs 298196c5ddc4Srjs unsigned derived_4 = 0; 298296c5ddc4Srjs if (precision == 1) derived_4 = 0; 298396c5ddc4Srjs else if (precision == 2) derived_4 = 1; 298496c5ddc4Srjs else unreachable("No pattern match at pos 4"); 298596c5ddc4Srjs 298696c5ddc4Srjs return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4); 298796c5ddc4Srjs } else { 298896c5ddc4Srjs unreachable("No matching state found in add_flog_table_f32"); 298996c5ddc4Srjs } 299096c5ddc4Srjs} 299196c5ddc4Srjs 299296c5ddc4Srjsstatic inline unsigned 299396c5ddc4Srjsbi_pack_add_fmax_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 299496c5ddc4Srjs{ 299596c5ddc4Srjs unsigned abs1 = I->src[1].abs; 299696c5ddc4Srjs assert(abs1 < 2); 299796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 299896c5ddc4Srjs assert(neg0 < 2); 299996c5ddc4Srjs unsigned neg1 = I->src[1].neg; 300096c5ddc4Srjs assert(neg1 < 2); 300196c5ddc4Srjs unsigned clamp = I->clamp; 300296c5ddc4Srjs assert(clamp < 4); 300396c5ddc4Srjs unsigned sem = I->sem; 300496c5ddc4Srjs assert(sem < 4); 300596c5ddc4Srjs unsigned abs0 = I->src[0].abs; 300696c5ddc4Srjs assert(abs0 < 2); 300796c5ddc4Srjs return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); 300896c5ddc4Srjs} 300996c5ddc4Srjs 301096c5ddc4Srjsstatic inline unsigned 301196c5ddc4Srjsbi_pack_add_fmax_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 301296c5ddc4Srjs{ 301396c5ddc4Srjs unsigned abs0 = I->src[0].abs; 301496c5ddc4Srjs assert(abs0 < 2); 301596c5ddc4Srjs unsigned abs1 = I->src[1].abs; 301696c5ddc4Srjs assert(abs1 < 2); 301796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 301896c5ddc4Srjs assert(neg0 < 2); 301996c5ddc4Srjs unsigned neg1 = I->src[1].neg; 302096c5ddc4Srjs assert(neg1 < 2); 302196c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 302296c5ddc4Srjs assert(I->src[0].swizzle < 13); 302396c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 302496c5ddc4Srjs assert(swz0 < 4); 302596c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 302696c5ddc4Srjs assert(I->src[1].swizzle < 13); 302796c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 302896c5ddc4Srjs assert(swz1 < 4); 302996c5ddc4Srjs unsigned sem = I->sem; 303096c5ddc4Srjs assert(sem < 4); 303196c5ddc4Srjs if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 303296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 303396c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 303496c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 303596c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 303696c5ddc4Srjs if (sem == 2) sem = 3; 303796c5ddc4Srjs else if (sem == 3) sem = 2; 303896c5ddc4Srjs } 303996c5ddc4Srjs 304096c5ddc4Srjs unsigned derived_6 = 0; 304196c5ddc4Srjs if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 304296c5ddc4Srjs else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 304396c5ddc4Srjs else unreachable("No pattern match at pos 6"); 304496c5ddc4Srjs 304596c5ddc4Srjs return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); 304696c5ddc4Srjs} 304796c5ddc4Srjs 304896c5ddc4Srjsstatic inline unsigned 304996c5ddc4Srjsbi_pack_add_fmin_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 305096c5ddc4Srjs{ 305196c5ddc4Srjs unsigned abs1 = I->src[1].abs; 305296c5ddc4Srjs assert(abs1 < 2); 305396c5ddc4Srjs unsigned neg0 = I->src[0].neg; 305496c5ddc4Srjs assert(neg0 < 2); 305596c5ddc4Srjs unsigned neg1 = I->src[1].neg; 305696c5ddc4Srjs assert(neg1 < 2); 305796c5ddc4Srjs unsigned clamp = I->clamp; 305896c5ddc4Srjs assert(clamp < 4); 305996c5ddc4Srjs unsigned sem = I->sem; 306096c5ddc4Srjs assert(sem < 4); 306196c5ddc4Srjs unsigned abs0 = I->src[0].abs; 306296c5ddc4Srjs assert(abs0 < 2); 306396c5ddc4Srjs return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); 306496c5ddc4Srjs} 306596c5ddc4Srjs 306696c5ddc4Srjsstatic inline unsigned 306796c5ddc4Srjsbi_pack_add_fmin_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 306896c5ddc4Srjs{ 306996c5ddc4Srjs unsigned abs0 = I->src[0].abs; 307096c5ddc4Srjs assert(abs0 < 2); 307196c5ddc4Srjs unsigned abs1 = I->src[1].abs; 307296c5ddc4Srjs assert(abs1 < 2); 307396c5ddc4Srjs unsigned neg0 = I->src[0].neg; 307496c5ddc4Srjs assert(neg0 < 2); 307596c5ddc4Srjs unsigned neg1 = I->src[1].neg; 307696c5ddc4Srjs assert(neg1 < 2); 307796c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 307896c5ddc4Srjs assert(I->src[0].swizzle < 13); 307996c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 308096c5ddc4Srjs assert(swz0 < 4); 308196c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 308296c5ddc4Srjs assert(I->src[1].swizzle < 13); 308396c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 308496c5ddc4Srjs assert(swz1 < 4); 308596c5ddc4Srjs unsigned sem = I->sem; 308696c5ddc4Srjs assert(sem < 4); 308796c5ddc4Srjs if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 308896c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 308996c5ddc4Srjs { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 309096c5ddc4Srjs { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 309196c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 309296c5ddc4Srjs if (sem == 2) sem = 3; 309396c5ddc4Srjs else if (sem == 3) sem = 2; 309496c5ddc4Srjs } 309596c5ddc4Srjs 309696c5ddc4Srjs unsigned derived_6 = 0; 309796c5ddc4Srjs if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 309896c5ddc4Srjs else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 309996c5ddc4Srjs else unreachable("No pattern match at pos 6"); 310096c5ddc4Srjs 310196c5ddc4Srjs return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); 310296c5ddc4Srjs} 310396c5ddc4Srjs 310496c5ddc4Srjsstatic inline unsigned 310596c5ddc4Srjsbi_pack_add_fpclass_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 310696c5ddc4Srjs{ 310796c5ddc4Srjs assert((1 << src0) & 0xf7); 310896c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 310996c5ddc4Srjs assert(I->src[0].swizzle < 13); 311096c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 311196c5ddc4Srjs assert(lane0 < 2); 311296c5ddc4Srjs return 0x67c40 | (src0 << 0) | (lane0 << 3); 311396c5ddc4Srjs} 311496c5ddc4Srjs 311596c5ddc4Srjsstatic inline unsigned 311696c5ddc4Srjsbi_pack_add_fpclass_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 311796c5ddc4Srjs{ 311896c5ddc4Srjs assert((1 << src0) & 0xf7); 311996c5ddc4Srjs return 0x67c50 | (src0 << 0); 312096c5ddc4Srjs} 312196c5ddc4Srjs 312296c5ddc4Srjsstatic inline unsigned 312396c5ddc4Srjsbi_pack_add_fpow_sc_apply(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 312496c5ddc4Srjs{ 312596c5ddc4Srjs 312696c5ddc4Srjs return 0x75080 | (src0 << 0) | (src1 << 3); 312796c5ddc4Srjs} 312896c5ddc4Srjs 312996c5ddc4Srjsstatic inline unsigned 313096c5ddc4Srjsbi_pack_add_fpow_sc_det_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 313196c5ddc4Srjs{ 313296c5ddc4Srjs assert((1 << src0) & 0xf7); 313396c5ddc4Srjs assert((1 << src1) & 0xf7); 313496c5ddc4Srjs unsigned func = I->func; 313596c5ddc4Srjs assert(func < 4); 313696c5ddc4Srjs static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 313796c5ddc4Srjs assert(I->src[1].swizzle < 13); 313896c5ddc4Srjs unsigned lane1 = lane1_table[I->src[1].swizzle]; 313996c5ddc4Srjs assert(lane1 < 4); 314096c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 314196c5ddc4Srjs assert(I->src[0].swizzle < 13); 314296c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 314396c5ddc4Srjs assert(lane0 < 2); 314496c5ddc4Srjs if ((func == 0) || (func == 1)) { 314596c5ddc4Srjs unsigned derived_6 = 0; 314696c5ddc4Srjs if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0; 314796c5ddc4Srjs else if (lane1 == 1) derived_6 = 1; 314896c5ddc4Srjs else unreachable("No pattern match at pos 6"); 314996c5ddc4Srjs 315096c5ddc4Srjs unsigned derived_8 = 0; 315196c5ddc4Srjs if (func == 0) derived_8 = 0; 315296c5ddc4Srjs else if (func == 1) derived_8 = 1; 315396c5ddc4Srjs else unreachable("No pattern match at pos 8"); 315496c5ddc4Srjs 315596c5ddc4Srjs return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8); 315696c5ddc4Srjs } else if (((func == 2) || (func == 3)) && (lane1 == 2)) { 315796c5ddc4Srjs unsigned derived_8 = 0; 315896c5ddc4Srjs if (func == 2) derived_8 = 0; 315996c5ddc4Srjs else if (func == 3) derived_8 = 1; 316096c5ddc4Srjs else unreachable("No pattern match at pos 8"); 316196c5ddc4Srjs 316296c5ddc4Srjs return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8); 316396c5ddc4Srjs } else { 316496c5ddc4Srjs unreachable("No matching state found in add_fpow_sc_det_f16"); 316596c5ddc4Srjs } 316696c5ddc4Srjs} 316796c5ddc4Srjs 316896c5ddc4Srjsstatic inline unsigned 316996c5ddc4Srjsbi_pack_add_fpow_sc_det_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 317096c5ddc4Srjs{ 317196c5ddc4Srjs assert((1 << src0) & 0xf7); 317296c5ddc4Srjs assert((1 << src1) & 0xf7); 317396c5ddc4Srjs unsigned func = I->func; 317496c5ddc4Srjs assert(func < 4); 317596c5ddc4Srjs return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7); 317696c5ddc4Srjs} 317796c5ddc4Srjs 317896c5ddc4Srjsstatic inline unsigned 317996c5ddc4Srjsbi_pack_add_frcp_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 318096c5ddc4Srjs{ 318196c5ddc4Srjs assert((1 << src0) & 0xf7); 318296c5ddc4Srjs unsigned neg0 = I->src[0].neg; 318396c5ddc4Srjs assert(neg0 < 2); 318496c5ddc4Srjs unsigned abs0 = I->src[0].abs; 318596c5ddc4Srjs assert(abs0 < 2); 318696c5ddc4Srjs unsigned divzero = I->divzero; 318796c5ddc4Srjs assert(divzero < 2); 318896c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 318996c5ddc4Srjs assert(I->src[0].swizzle < 13); 319096c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 319196c5ddc4Srjs assert(lane0 < 2); 319296c5ddc4Srjs return 0x67080 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); 319396c5ddc4Srjs} 319496c5ddc4Srjs 319596c5ddc4Srjsstatic inline unsigned 319696c5ddc4Srjsbi_pack_add_frcp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 319796c5ddc4Srjs{ 319896c5ddc4Srjs assert((1 << src0) & 0xf7); 319996c5ddc4Srjs static uint8_t widen0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 320096c5ddc4Srjs assert(I->src[0].swizzle < 13); 320196c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 320296c5ddc4Srjs assert(widen0 < 4); 320396c5ddc4Srjs unsigned neg0 = I->src[0].neg; 320496c5ddc4Srjs assert(neg0 < 2); 320596c5ddc4Srjs unsigned abs0 = I->src[0].abs; 320696c5ddc4Srjs assert(abs0 < 2); 320796c5ddc4Srjs unsigned derived_6 = 0; 320896c5ddc4Srjs if (widen0 == 0) derived_6 = 0; 320996c5ddc4Srjs else unreachable("No pattern match at pos 6"); 321096c5ddc4Srjs 321196c5ddc4Srjs return 0x66000 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_6 << 6); 321296c5ddc4Srjs} 321396c5ddc4Srjs 321496c5ddc4Srjsstatic inline unsigned 321596c5ddc4Srjsbi_pack_add_frcp_approx_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 321696c5ddc4Srjs{ 321796c5ddc4Srjs assert((1 << src0) & 0xf7); 321896c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 321996c5ddc4Srjs assert(I->src[0].swizzle < 13); 322096c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 322196c5ddc4Srjs assert(widen0 < 4); 322296c5ddc4Srjs unsigned neg0 = I->src[0].neg; 322396c5ddc4Srjs assert(neg0 < 2); 322496c5ddc4Srjs unsigned abs0 = I->src[0].abs; 322596c5ddc4Srjs assert(abs0 < 2); 322696c5ddc4Srjs unsigned divzero = I->divzero; 322796c5ddc4Srjs assert(divzero < 2); 322896c5ddc4Srjs if (widen0 == 0) { 322996c5ddc4Srjs return 0x67000 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); 323096c5ddc4Srjs } else if (widen0 != 0) { 323196c5ddc4Srjs unsigned derived_7 = 0; 323296c5ddc4Srjs if (widen0 == 1) derived_7 = 0; 323396c5ddc4Srjs else if (widen0 == 2) derived_7 = 1; 323496c5ddc4Srjs else unreachable("No pattern match at pos 7"); 323596c5ddc4Srjs 323696c5ddc4Srjs return 0x67040 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); 323796c5ddc4Srjs } else { 323896c5ddc4Srjs unreachable("No matching state found in add_frcp_approx_f32"); 323996c5ddc4Srjs } 324096c5ddc4Srjs} 324196c5ddc4Srjs 324296c5ddc4Srjsstatic inline unsigned 324396c5ddc4Srjsbi_pack_add_frexpe_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 324496c5ddc4Srjs{ 324596c5ddc4Srjs unsigned neg0 = I->src[0].neg; 324696c5ddc4Srjs assert(neg0 < 2); 324796c5ddc4Srjs unsigned sqrt = I->sqrt; 324896c5ddc4Srjs assert(sqrt < 2); 324996c5ddc4Srjs unsigned log = I->log; 325096c5ddc4Srjs assert(log < 2); 325196c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 325296c5ddc4Srjs assert(I->src[0].swizzle < 13); 325396c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 325496c5ddc4Srjs assert(widen0 < 4); 325596c5ddc4Srjs if (log == 0) { 325696c5ddc4Srjs return 0x3dc20 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (widen0 << 3); 325796c5ddc4Srjs } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 325896c5ddc4Srjs return 0x3de20 | (src0 << 0) | (widen0 << 3); 325996c5ddc4Srjs } else { 326096c5ddc4Srjs unreachable("No matching state found in add_frexpe_f32"); 326196c5ddc4Srjs } 326296c5ddc4Srjs} 326396c5ddc4Srjs 326496c5ddc4Srjsstatic inline unsigned 326596c5ddc4Srjsbi_pack_add_frexpe_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 326696c5ddc4Srjs{ 326796c5ddc4Srjs unsigned neg0 = I->src[0].neg; 326896c5ddc4Srjs assert(neg0 < 2); 326996c5ddc4Srjs unsigned sqrt = I->sqrt; 327096c5ddc4Srjs assert(sqrt < 2); 327196c5ddc4Srjs unsigned log = I->log; 327296c5ddc4Srjs assert(log < 2); 327396c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 327496c5ddc4Srjs assert(I->src[0].swizzle < 13); 327596c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 327696c5ddc4Srjs assert(swz0 < 4); 327796c5ddc4Srjs if (log == 0) { 327896c5ddc4Srjs return 0x3dc00 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (swz0 << 3); 327996c5ddc4Srjs } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 328096c5ddc4Srjs return 0x3de00 | (src0 << 0) | (swz0 << 3); 328196c5ddc4Srjs } else { 328296c5ddc4Srjs unreachable("No matching state found in add_frexpe_v2f16"); 328396c5ddc4Srjs } 328496c5ddc4Srjs} 328596c5ddc4Srjs 328696c5ddc4Srjsstatic inline unsigned 328796c5ddc4Srjsbi_pack_add_frexpm_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 328896c5ddc4Srjs{ 328996c5ddc4Srjs unsigned abs0 = I->src[0].abs; 329096c5ddc4Srjs assert(abs0 < 2); 329196c5ddc4Srjs unsigned sqrt = I->sqrt; 329296c5ddc4Srjs assert(sqrt < 2); 329396c5ddc4Srjs unsigned log = I->log; 329496c5ddc4Srjs assert(log < 2); 329596c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 329696c5ddc4Srjs assert(I->src[0].swizzle < 13); 329796c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 329896c5ddc4Srjs assert(widen0 < 4); 329996c5ddc4Srjs unsigned neg0 = I->src[0].neg; 330096c5ddc4Srjs assert(neg0 < 2); 330196c5ddc4Srjs if ((log == 0) && (neg0 == 0)) { 330296c5ddc4Srjs return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); 330396c5ddc4Srjs } else if ((log == 1) && (sqrt == 0)) { 330496c5ddc4Srjs return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); 330596c5ddc4Srjs } else { 330696c5ddc4Srjs unreachable("No matching state found in add_frexpm_f32"); 330796c5ddc4Srjs } 330896c5ddc4Srjs} 330996c5ddc4Srjs 331096c5ddc4Srjsstatic inline unsigned 331196c5ddc4Srjsbi_pack_add_frexpm_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 331296c5ddc4Srjs{ 331396c5ddc4Srjs unsigned abs0 = I->src[0].abs; 331496c5ddc4Srjs assert(abs0 < 2); 331596c5ddc4Srjs unsigned sqrt = I->sqrt; 331696c5ddc4Srjs assert(sqrt < 2); 331796c5ddc4Srjs unsigned log = I->log; 331896c5ddc4Srjs assert(log < 2); 331996c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 332096c5ddc4Srjs assert(I->src[0].swizzle < 13); 332196c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 332296c5ddc4Srjs assert(swz0 < 4); 332396c5ddc4Srjs unsigned neg0 = I->src[0].neg; 332496c5ddc4Srjs assert(neg0 < 2); 332596c5ddc4Srjs if ((log == 0) && (neg0 == 0)) { 332696c5ddc4Srjs return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); 332796c5ddc4Srjs } else if ((log == 1) && (sqrt == 0)) { 332896c5ddc4Srjs return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); 332996c5ddc4Srjs } else { 333096c5ddc4Srjs unreachable("No matching state found in add_frexpm_v2f16"); 333196c5ddc4Srjs } 333296c5ddc4Srjs} 333396c5ddc4Srjs 333496c5ddc4Srjsstatic inline unsigned 333596c5ddc4Srjsbi_pack_add_fround_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 333696c5ddc4Srjs{ 333796c5ddc4Srjs unsigned abs0 = I->src[0].abs; 333896c5ddc4Srjs assert(abs0 < 2); 333996c5ddc4Srjs unsigned neg0 = I->src[0].neg; 334096c5ddc4Srjs assert(neg0 < 2); 334196c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 334296c5ddc4Srjs assert(I->src[0].swizzle < 13); 334396c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 334496c5ddc4Srjs assert(widen0 < 4); 334596c5ddc4Srjs unsigned round = I->round; 334696c5ddc4Srjs assert(round < 4); 334796c5ddc4Srjs return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9); 334896c5ddc4Srjs} 334996c5ddc4Srjs 335096c5ddc4Srjsstatic inline unsigned 335196c5ddc4Srjsbi_pack_add_fround_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 335296c5ddc4Srjs{ 335396c5ddc4Srjs unsigned abs0 = I->src[0].abs; 335496c5ddc4Srjs assert(abs0 < 2); 335596c5ddc4Srjs unsigned neg0 = I->src[0].neg; 335696c5ddc4Srjs assert(neg0 < 2); 335796c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 335896c5ddc4Srjs assert(I->src[0].swizzle < 13); 335996c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 336096c5ddc4Srjs assert(swz0 < 4); 336196c5ddc4Srjs unsigned round = I->round; 336296c5ddc4Srjs assert(round < 4); 336396c5ddc4Srjs return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9); 336496c5ddc4Srjs} 336596c5ddc4Srjs 336696c5ddc4Srjsstatic inline unsigned 336796c5ddc4Srjsbi_pack_add_frsq_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 336896c5ddc4Srjs{ 336996c5ddc4Srjs assert((1 << src0) & 0xf7); 337096c5ddc4Srjs unsigned neg0 = I->src[0].neg; 337196c5ddc4Srjs assert(neg0 < 2); 337296c5ddc4Srjs unsigned abs0 = I->src[0].abs; 337396c5ddc4Srjs assert(abs0 < 2); 337496c5ddc4Srjs unsigned divzero = I->divzero; 337596c5ddc4Srjs assert(divzero < 2); 337696c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 337796c5ddc4Srjs assert(I->src[0].swizzle < 13); 337896c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 337996c5ddc4Srjs assert(lane0 < 2); 338096c5ddc4Srjs return 0x67280 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); 338196c5ddc4Srjs} 338296c5ddc4Srjs 338396c5ddc4Srjsstatic inline unsigned 338496c5ddc4Srjsbi_pack_add_frsq_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 338596c5ddc4Srjs{ 338696c5ddc4Srjs assert((1 << src0) & 0xf7); 338796c5ddc4Srjs static uint8_t widen0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 338896c5ddc4Srjs assert(I->src[0].swizzle < 13); 338996c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 339096c5ddc4Srjs assert(widen0 < 4); 339196c5ddc4Srjs unsigned neg0 = I->src[0].neg; 339296c5ddc4Srjs assert(neg0 < 2); 339396c5ddc4Srjs unsigned abs0 = I->src[0].abs; 339496c5ddc4Srjs assert(abs0 < 2); 339596c5ddc4Srjs unsigned derived_6 = 0; 339696c5ddc4Srjs if (widen0 == 0) derived_6 = 0; 339796c5ddc4Srjs else unreachable("No pattern match at pos 6"); 339896c5ddc4Srjs 339996c5ddc4Srjs return 0x66100 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_6 << 6); 340096c5ddc4Srjs} 340196c5ddc4Srjs 340296c5ddc4Srjsstatic inline unsigned 340396c5ddc4Srjsbi_pack_add_frsq_approx_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 340496c5ddc4Srjs{ 340596c5ddc4Srjs assert((1 << src0) & 0xf7); 340696c5ddc4Srjs static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 340796c5ddc4Srjs assert(I->src[0].swizzle < 13); 340896c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 340996c5ddc4Srjs assert(widen0 < 4); 341096c5ddc4Srjs unsigned neg0 = I->src[0].neg; 341196c5ddc4Srjs assert(neg0 < 2); 341296c5ddc4Srjs unsigned abs0 = I->src[0].abs; 341396c5ddc4Srjs assert(abs0 < 2); 341496c5ddc4Srjs unsigned divzero = I->divzero; 341596c5ddc4Srjs assert(divzero < 2); 341696c5ddc4Srjs if (widen0 == 0) { 341796c5ddc4Srjs return 0x67100 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); 341896c5ddc4Srjs } else if (widen0 != 0) { 341996c5ddc4Srjs unsigned derived_7 = 0; 342096c5ddc4Srjs if (widen0 == 1) derived_7 = 0; 342196c5ddc4Srjs else if (widen0 == 2) derived_7 = 1; 342296c5ddc4Srjs else unreachable("No pattern match at pos 7"); 342396c5ddc4Srjs 342496c5ddc4Srjs return 0x67140 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); 342596c5ddc4Srjs } else { 342696c5ddc4Srjs unreachable("No matching state found in add_frsq_approx_f32"); 342796c5ddc4Srjs } 342896c5ddc4Srjs} 342996c5ddc4Srjs 343096c5ddc4Srjsstatic inline unsigned 343196c5ddc4Srjsbi_pack_add_fsincos_offset_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 343296c5ddc4Srjs{ 343396c5ddc4Srjs assert((1 << src0) & 0xf7); 343496c5ddc4Srjs unsigned scale = I->scale; 343596c5ddc4Srjs assert(scale < 2); 343696c5ddc4Srjs return 0x67aa0 | (src0 << 0) | (scale << 3); 343796c5ddc4Srjs} 343896c5ddc4Srjs 343996c5ddc4Srjsstatic inline unsigned 344096c5ddc4Srjsbi_pack_add_fsin_table_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 344196c5ddc4Srjs{ 344296c5ddc4Srjs assert((1 << src0) & 0xf7); 344396c5ddc4Srjs unsigned offset = I->offset; 344496c5ddc4Srjs assert(offset < 2); 344596c5ddc4Srjs return 0x67a80 | (src0 << 0) | (offset << 4); 344696c5ddc4Srjs} 344796c5ddc4Srjs 344896c5ddc4Srjsstatic inline unsigned 344996c5ddc4Srjsbi_pack_add_hadd_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 345096c5ddc4Srjs{ 345196c5ddc4Srjs static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 345296c5ddc4Srjs assert(I->round < 9); 345396c5ddc4Srjs unsigned round = round_table[I->round]; 345496c5ddc4Srjs assert(round < 2); 345596c5ddc4Srjs return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12); 345696c5ddc4Srjs} 345796c5ddc4Srjs 345896c5ddc4Srjsstatic inline unsigned 345996c5ddc4Srjsbi_pack_add_hadd_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 346096c5ddc4Srjs{ 346196c5ddc4Srjs static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 346296c5ddc4Srjs assert(I->round < 9); 346396c5ddc4Srjs unsigned round = round_table[I->round]; 346496c5ddc4Srjs assert(round < 2); 346596c5ddc4Srjs return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12); 346696c5ddc4Srjs} 346796c5ddc4Srjs 346896c5ddc4Srjsstatic inline unsigned 346996c5ddc4Srjsbi_pack_add_hadd_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 347096c5ddc4Srjs{ 347196c5ddc4Srjs static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 347296c5ddc4Srjs assert(I->round < 9); 347396c5ddc4Srjs unsigned round = round_table[I->round]; 347496c5ddc4Srjs assert(round < 2); 347596c5ddc4Srjs static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 347696c5ddc4Srjs assert(I->src[1].swizzle < 13); 347796c5ddc4Srjs unsigned swap1 = swap1_table[I->src[1].swizzle]; 347896c5ddc4Srjs assert(swap1 < 2); 347996c5ddc4Srjs static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 348096c5ddc4Srjs assert(I->src[0].swizzle < 13); 348196c5ddc4Srjs unsigned swap0 = swap0_table[I->src[0].swizzle]; 348296c5ddc4Srjs assert(swap0 < 2); 348396c5ddc4Srjs return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); 348496c5ddc4Srjs} 348596c5ddc4Srjs 348696c5ddc4Srjsstatic inline unsigned 348796c5ddc4Srjsbi_pack_add_hadd_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 348896c5ddc4Srjs{ 348996c5ddc4Srjs static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 349096c5ddc4Srjs assert(I->round < 9); 349196c5ddc4Srjs unsigned round = round_table[I->round]; 349296c5ddc4Srjs assert(round < 2); 349396c5ddc4Srjs static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 349496c5ddc4Srjs assert(I->src[1].swizzle < 13); 349596c5ddc4Srjs unsigned swap1 = swap1_table[I->src[1].swizzle]; 349696c5ddc4Srjs assert(swap1 < 2); 349796c5ddc4Srjs static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 349896c5ddc4Srjs assert(I->src[0].swizzle < 13); 349996c5ddc4Srjs unsigned swap0 = swap0_table[I->src[0].swizzle]; 350096c5ddc4Srjs assert(swap0 < 2); 350196c5ddc4Srjs return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); 350296c5ddc4Srjs} 350396c5ddc4Srjs 350496c5ddc4Srjsstatic inline unsigned 350596c5ddc4Srjsbi_pack_add_hadd_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 350696c5ddc4Srjs{ 350796c5ddc4Srjs static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 350896c5ddc4Srjs assert(I->round < 9); 350996c5ddc4Srjs unsigned round = round_table[I->round]; 351096c5ddc4Srjs assert(round < 2); 351196c5ddc4Srjs return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12); 351296c5ddc4Srjs} 351396c5ddc4Srjs 351496c5ddc4Srjsstatic inline unsigned 351596c5ddc4Srjsbi_pack_add_hadd_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 351696c5ddc4Srjs{ 351796c5ddc4Srjs static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 351896c5ddc4Srjs assert(I->round < 9); 351996c5ddc4Srjs unsigned round = round_table[I->round]; 352096c5ddc4Srjs assert(round < 2); 352196c5ddc4Srjs return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12); 352296c5ddc4Srjs} 352396c5ddc4Srjs 352496c5ddc4Srjsstatic inline unsigned 352596c5ddc4Srjsbi_pack_add_iabs_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 352696c5ddc4Srjs{ 352796c5ddc4Srjs 352896c5ddc4Srjs return 0x3dea0 | (src0 << 0); 352996c5ddc4Srjs} 353096c5ddc4Srjs 353196c5ddc4Srjsstatic inline unsigned 353296c5ddc4Srjsbi_pack_add_iabs_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 353396c5ddc4Srjs{ 353496c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 353596c5ddc4Srjs assert(I->src[0].swizzle < 13); 353696c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 353796c5ddc4Srjs assert(swz0 < 4); 353896c5ddc4Srjs return 0x3de88 | (src0 << 0) | (swz0 << 4); 353996c5ddc4Srjs} 354096c5ddc4Srjs 354196c5ddc4Srjsstatic inline unsigned 354296c5ddc4Srjsbi_pack_add_iabs_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 354396c5ddc4Srjs{ 354496c5ddc4Srjs 354596c5ddc4Srjs return 0x3deb0 | (src0 << 0); 354696c5ddc4Srjs} 354796c5ddc4Srjs 354896c5ddc4Srjsstatic inline unsigned 354996c5ddc4Srjsbi_pack_add_iadd_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 355096c5ddc4Srjs{ 355196c5ddc4Srjs unsigned saturate = I->saturate; 355296c5ddc4Srjs assert(saturate < 2); 355396c5ddc4Srjs static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 355496c5ddc4Srjs assert(I->src[1].swizzle < 13); 355596c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 355696c5ddc4Srjs assert(lanes1 < 8); 355796c5ddc4Srjs if (lanes1 == 0) { 355896c5ddc4Srjs return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8); 355996c5ddc4Srjs } else if ((lanes1 == 1) || (lanes1 == 2)) { 356096c5ddc4Srjs unsigned derived_9 = 0; 356196c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 356296c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 356396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 356496c5ddc4Srjs 356596c5ddc4Srjs return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 356696c5ddc4Srjs } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 356796c5ddc4Srjs unsigned derived_9 = 0; 356896c5ddc4Srjs if (lanes1 == 3) derived_9 = 0; 356996c5ddc4Srjs else if (lanes1 == 4) derived_9 = 1; 357096c5ddc4Srjs else if (lanes1 == 5) derived_9 = 2; 357196c5ddc4Srjs else if (lanes1 == 6) derived_9 = 3; 357296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 357396c5ddc4Srjs 357496c5ddc4Srjs return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 357596c5ddc4Srjs } else { 357696c5ddc4Srjs unreachable("No matching state found in add_iadd_s32"); 357796c5ddc4Srjs } 357896c5ddc4Srjs} 357996c5ddc4Srjs 358096c5ddc4Srjsstatic inline unsigned 358196c5ddc4Srjsbi_pack_add_iadd_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 358296c5ddc4Srjs{ 358396c5ddc4Srjs unsigned saturate = I->saturate; 358496c5ddc4Srjs assert(saturate < 2); 358596c5ddc4Srjs static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 358696c5ddc4Srjs assert(I->src[1].swizzle < 13); 358796c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 358896c5ddc4Srjs assert(lanes1 < 8); 358996c5ddc4Srjs if (lanes1 == 0) { 359096c5ddc4Srjs unsigned derived_7 = 0; 359196c5ddc4Srjs if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 359296c5ddc4Srjs else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 359396c5ddc4Srjs else unreachable("No pattern match at pos 7"); 359496c5ddc4Srjs 359596c5ddc4Srjs return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 359696c5ddc4Srjs } else if ((lanes1 == 1) || (lanes1 == 2)) { 359796c5ddc4Srjs unsigned derived_7 = 0; 359896c5ddc4Srjs if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 359996c5ddc4Srjs else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 360096c5ddc4Srjs else unreachable("No pattern match at pos 7"); 360196c5ddc4Srjs 360296c5ddc4Srjs unsigned derived_9 = 0; 360396c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 360496c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 360596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 360696c5ddc4Srjs 360796c5ddc4Srjs return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 360896c5ddc4Srjs } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 360996c5ddc4Srjs unsigned derived_7 = 0; 361096c5ddc4Srjs if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 361196c5ddc4Srjs else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 361296c5ddc4Srjs else unreachable("No pattern match at pos 7"); 361396c5ddc4Srjs 361496c5ddc4Srjs unsigned derived_9 = 0; 361596c5ddc4Srjs if (lanes1 == 3) derived_9 = 0; 361696c5ddc4Srjs else if (lanes1 == 4) derived_9 = 1; 361796c5ddc4Srjs else if (lanes1 == 5) derived_9 = 2; 361896c5ddc4Srjs else if (lanes1 == 6) derived_9 = 3; 361996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 362096c5ddc4Srjs 362196c5ddc4Srjs return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 362296c5ddc4Srjs } else { 362396c5ddc4Srjs unreachable("No matching state found in add_iadd_u32"); 362496c5ddc4Srjs } 362596c5ddc4Srjs} 362696c5ddc4Srjs 362796c5ddc4Srjsstatic inline unsigned 362896c5ddc4Srjsbi_pack_add_iadd_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 362996c5ddc4Srjs{ 363096c5ddc4Srjs unsigned saturate = I->saturate; 363196c5ddc4Srjs assert(saturate < 2); 363296c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 363396c5ddc4Srjs assert(I->src[0].swizzle < 13); 363496c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 363596c5ddc4Srjs assert(lanes0 < 2); 363696c5ddc4Srjs static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 363796c5ddc4Srjs assert(I->src[1].swizzle < 13); 363896c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 363996c5ddc4Srjs assert(lanes1 < 8); 364096c5ddc4Srjs if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 364196c5ddc4Srjs unsigned derived_9 = 0; 364296c5ddc4Srjs if (lanes1 == 0) derived_9 = 0; 364396c5ddc4Srjs else if (lanes1 == 1) derived_9 = 1; 364496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 364596c5ddc4Srjs 364696c5ddc4Srjs unsigned derived_10 = 0; 364796c5ddc4Srjs if (lanes0 == 0) derived_10 = 0; 364896c5ddc4Srjs else if (lanes0 == 1) derived_10 = 1; 364996c5ddc4Srjs else unreachable("No pattern match at pos 10"); 365096c5ddc4Srjs 365196c5ddc4Srjs return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); 365296c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 365396c5ddc4Srjs unsigned derived_9 = 0; 365496c5ddc4Srjs if (lanes1 == 2) derived_9 = 0; 365596c5ddc4Srjs else if (lanes1 == 3) derived_9 = 1; 365696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 365796c5ddc4Srjs 365896c5ddc4Srjs return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 365996c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 366096c5ddc4Srjs unsigned derived_9 = 0; 366196c5ddc4Srjs if (lanes1 == 4) derived_9 = 0; 366296c5ddc4Srjs else if (lanes1 == 5) derived_9 = 1; 366396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 366496c5ddc4Srjs 366596c5ddc4Srjs return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 366696c5ddc4Srjs } else { 366796c5ddc4Srjs unreachable("No matching state found in add_iadd_v2s16"); 366896c5ddc4Srjs } 366996c5ddc4Srjs} 367096c5ddc4Srjs 367196c5ddc4Srjsstatic inline unsigned 367296c5ddc4Srjsbi_pack_add_iadd_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 367396c5ddc4Srjs{ 367496c5ddc4Srjs unsigned saturate = I->saturate; 367596c5ddc4Srjs assert(saturate < 2); 367696c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 367796c5ddc4Srjs assert(I->src[0].swizzle < 13); 367896c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 367996c5ddc4Srjs assert(lanes0 < 2); 368096c5ddc4Srjs static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 368196c5ddc4Srjs assert(I->src[1].swizzle < 13); 368296c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 368396c5ddc4Srjs assert(lanes1 < 8); 368496c5ddc4Srjs if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 368596c5ddc4Srjs unsigned derived_7 = 0; 368696c5ddc4Srjs if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 368796c5ddc4Srjs else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 368896c5ddc4Srjs else unreachable("No pattern match at pos 7"); 368996c5ddc4Srjs 369096c5ddc4Srjs unsigned derived_9 = 0; 369196c5ddc4Srjs if (lanes1 == 0) derived_9 = 0; 369296c5ddc4Srjs else if (lanes1 == 1) derived_9 = 1; 369396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 369496c5ddc4Srjs 369596c5ddc4Srjs unsigned derived_10 = 0; 369696c5ddc4Srjs if (lanes0 == 0) derived_10 = 0; 369796c5ddc4Srjs else if (lanes0 == 1) derived_10 = 1; 369896c5ddc4Srjs else unreachable("No pattern match at pos 10"); 369996c5ddc4Srjs 370096c5ddc4Srjs return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); 370196c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 370296c5ddc4Srjs unsigned derived_7 = 0; 370396c5ddc4Srjs if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 370496c5ddc4Srjs else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 370596c5ddc4Srjs else unreachable("No pattern match at pos 7"); 370696c5ddc4Srjs 370796c5ddc4Srjs unsigned derived_9 = 0; 370896c5ddc4Srjs if (lanes1 == 2) derived_9 = 0; 370996c5ddc4Srjs else if (lanes1 == 3) derived_9 = 1; 371096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 371196c5ddc4Srjs 371296c5ddc4Srjs return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 371396c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 371496c5ddc4Srjs unsigned derived_7 = 0; 371596c5ddc4Srjs if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 371696c5ddc4Srjs else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 371796c5ddc4Srjs else unreachable("No pattern match at pos 7"); 371896c5ddc4Srjs 371996c5ddc4Srjs unsigned derived_9 = 0; 372096c5ddc4Srjs if (lanes1 == 4) derived_9 = 0; 372196c5ddc4Srjs else if (lanes1 == 5) derived_9 = 1; 372296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 372396c5ddc4Srjs 372496c5ddc4Srjs return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 372596c5ddc4Srjs } else { 372696c5ddc4Srjs unreachable("No matching state found in add_iadd_v2u16"); 372796c5ddc4Srjs } 372896c5ddc4Srjs} 372996c5ddc4Srjs 373096c5ddc4Srjsstatic inline unsigned 373196c5ddc4Srjsbi_pack_add_iadd_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 373296c5ddc4Srjs{ 373396c5ddc4Srjs unsigned saturate = I->saturate; 373496c5ddc4Srjs assert(saturate < 2); 373596c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 373696c5ddc4Srjs assert(I->src[0].swizzle < 13); 373796c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 373896c5ddc4Srjs assert(lanes0 < 8); 373996c5ddc4Srjs static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 374096c5ddc4Srjs assert(I->src[1].swizzle < 13); 374196c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 374296c5ddc4Srjs assert(lanes1 < 8); 374396c5ddc4Srjs if ((lanes0 == 0) && (lanes1 == 0)) { 374496c5ddc4Srjs return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8); 374596c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 374696c5ddc4Srjs unsigned derived_9 = 0; 374796c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 374896c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 374996c5ddc4Srjs else if (lanes1 == 3) derived_9 = 2; 375096c5ddc4Srjs else if (lanes1 == 4) derived_9 = 3; 375196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 375296c5ddc4Srjs 375396c5ddc4Srjs return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 375496c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 375596c5ddc4Srjs unsigned derived_9 = 0; 375696c5ddc4Srjs if (lanes1 == 5) derived_9 = 0; 375796c5ddc4Srjs else if (lanes1 == 6) derived_9 = 1; 375896c5ddc4Srjs else unreachable("No pattern match at pos 9"); 375996c5ddc4Srjs 376096c5ddc4Srjs return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 376196c5ddc4Srjs } else { 376296c5ddc4Srjs unreachable("No matching state found in add_iadd_v4s8"); 376396c5ddc4Srjs } 376496c5ddc4Srjs} 376596c5ddc4Srjs 376696c5ddc4Srjsstatic inline unsigned 376796c5ddc4Srjsbi_pack_add_iadd_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 376896c5ddc4Srjs{ 376996c5ddc4Srjs unsigned saturate = I->saturate; 377096c5ddc4Srjs assert(saturate < 2); 377196c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 377296c5ddc4Srjs assert(I->src[0].swizzle < 13); 377396c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 377496c5ddc4Srjs assert(lanes0 < 8); 377596c5ddc4Srjs static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 377696c5ddc4Srjs assert(I->src[1].swizzle < 13); 377796c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 377896c5ddc4Srjs assert(lanes1 < 8); 377996c5ddc4Srjs if ((lanes0 == 0) && (lanes1 == 0)) { 378096c5ddc4Srjs unsigned derived_7 = 0; 378196c5ddc4Srjs if (saturate == 0) derived_7 = 0; 378296c5ddc4Srjs else if (saturate == 1) derived_7 = 1; 378396c5ddc4Srjs else unreachable("No pattern match at pos 7"); 378496c5ddc4Srjs 378596c5ddc4Srjs return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 378696c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 378796c5ddc4Srjs unsigned derived_7 = 0; 378896c5ddc4Srjs if (saturate == 0) derived_7 = 0; 378996c5ddc4Srjs else if (saturate == 1) derived_7 = 1; 379096c5ddc4Srjs else unreachable("No pattern match at pos 7"); 379196c5ddc4Srjs 379296c5ddc4Srjs unsigned derived_9 = 0; 379396c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 379496c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 379596c5ddc4Srjs else if (lanes1 == 3) derived_9 = 2; 379696c5ddc4Srjs else if (lanes1 == 4) derived_9 = 3; 379796c5ddc4Srjs else unreachable("No pattern match at pos 9"); 379896c5ddc4Srjs 379996c5ddc4Srjs return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 380096c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 380196c5ddc4Srjs unsigned derived_7 = 0; 380296c5ddc4Srjs if (saturate == 0) derived_7 = 0; 380396c5ddc4Srjs else if (saturate == 1) derived_7 = 1; 380496c5ddc4Srjs else unreachable("No pattern match at pos 7"); 380596c5ddc4Srjs 380696c5ddc4Srjs unsigned derived_9 = 0; 380796c5ddc4Srjs if (lanes1 == 5) derived_9 = 0; 380896c5ddc4Srjs else if (lanes1 == 6) derived_9 = 1; 380996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 381096c5ddc4Srjs 381196c5ddc4Srjs return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 381296c5ddc4Srjs } else { 381396c5ddc4Srjs unreachable("No matching state found in add_iadd_v4u8"); 381496c5ddc4Srjs } 381596c5ddc4Srjs} 381696c5ddc4Srjs 381796c5ddc4Srjsstatic inline unsigned 381896c5ddc4Srjsbi_pack_add_icmp_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 381996c5ddc4Srjs{ 382096c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 382196c5ddc4Srjs assert(I->result_type < 4); 382296c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 382396c5ddc4Srjs assert(result_type < 2); 382496c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 382596c5ddc4Srjs assert(I->cmpf < 9); 382696c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 382796c5ddc4Srjs assert(cmpf < 2); 382896c5ddc4Srjs return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 382996c5ddc4Srjs} 383096c5ddc4Srjs 383196c5ddc4Srjsstatic inline unsigned 383296c5ddc4Srjsbi_pack_add_icmp_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 383396c5ddc4Srjs{ 383496c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 383596c5ddc4Srjs assert(I->result_type < 4); 383696c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 383796c5ddc4Srjs assert(result_type < 2); 383896c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 383996c5ddc4Srjs assert(I->cmpf < 9); 384096c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 384196c5ddc4Srjs assert(cmpf < 4); 384296c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 384396c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 384496c5ddc4Srjs if (cmpf == 2) cmpf = 0; 384596c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 384696c5ddc4Srjs } 384796c5ddc4Srjs 384896c5ddc4Srjs unsigned derived_6 = 0; 384996c5ddc4Srjs if (cmpf == 0) derived_6 = 0; 385096c5ddc4Srjs else if (cmpf == 1) derived_6 = 1; 385196c5ddc4Srjs else unreachable("No pattern match at pos 6"); 385296c5ddc4Srjs 385396c5ddc4Srjs return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 385496c5ddc4Srjs} 385596c5ddc4Srjs 385696c5ddc4Srjsstatic inline unsigned 385796c5ddc4Srjsbi_pack_add_icmp_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 385896c5ddc4Srjs{ 385996c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 386096c5ddc4Srjs assert(I->result_type < 4); 386196c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 386296c5ddc4Srjs assert(result_type < 2); 386396c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 386496c5ddc4Srjs assert(I->cmpf < 9); 386596c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 386696c5ddc4Srjs assert(cmpf < 4); 386796c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 386896c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 386996c5ddc4Srjs if (cmpf == 2) cmpf = 0; 387096c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 387196c5ddc4Srjs } 387296c5ddc4Srjs 387396c5ddc4Srjs unsigned derived_6 = 0; 387496c5ddc4Srjs if (cmpf == 0) derived_6 = 0; 387596c5ddc4Srjs else if (cmpf == 1) derived_6 = 1; 387696c5ddc4Srjs else unreachable("No pattern match at pos 6"); 387796c5ddc4Srjs 387896c5ddc4Srjs return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 387996c5ddc4Srjs} 388096c5ddc4Srjs 388196c5ddc4Srjsstatic inline unsigned 388296c5ddc4Srjsbi_pack_add_icmp_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 388396c5ddc4Srjs{ 388496c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 388596c5ddc4Srjs assert(I->src[0].swizzle < 13); 388696c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 388796c5ddc4Srjs assert(swz0 < 4); 388896c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 388996c5ddc4Srjs assert(I->src[1].swizzle < 13); 389096c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 389196c5ddc4Srjs assert(swz1 < 4); 389296c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 389396c5ddc4Srjs assert(I->result_type < 4); 389496c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 389596c5ddc4Srjs assert(result_type < 2); 389696c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 389796c5ddc4Srjs assert(I->cmpf < 9); 389896c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 389996c5ddc4Srjs assert(cmpf < 2); 390096c5ddc4Srjs return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11); 390196c5ddc4Srjs} 390296c5ddc4Srjs 390396c5ddc4Srjsstatic inline unsigned 390496c5ddc4Srjsbi_pack_add_icmp_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 390596c5ddc4Srjs{ 390696c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 390796c5ddc4Srjs assert(I->src[0].swizzle < 13); 390896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 390996c5ddc4Srjs assert(swz0 < 4); 391096c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 391196c5ddc4Srjs assert(I->src[1].swizzle < 13); 391296c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 391396c5ddc4Srjs assert(swz1 < 4); 391496c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 391596c5ddc4Srjs assert(I->result_type < 4); 391696c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 391796c5ddc4Srjs assert(result_type < 2); 391896c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 391996c5ddc4Srjs assert(I->cmpf < 9); 392096c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 392196c5ddc4Srjs assert(cmpf < 4); 392296c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 392396c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 392496c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 392596c5ddc4Srjs if (cmpf == 2) cmpf = 0; 392696c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 392796c5ddc4Srjs } 392896c5ddc4Srjs 392996c5ddc4Srjs unsigned derived_12 = 0; 393096c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 393196c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 393296c5ddc4Srjs else unreachable("No pattern match at pos 12"); 393396c5ddc4Srjs 393496c5ddc4Srjs return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); 393596c5ddc4Srjs} 393696c5ddc4Srjs 393796c5ddc4Srjsstatic inline unsigned 393896c5ddc4Srjsbi_pack_add_icmp_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 393996c5ddc4Srjs{ 394096c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 394196c5ddc4Srjs assert(I->src[0].swizzle < 13); 394296c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 394396c5ddc4Srjs assert(swz0 < 4); 394496c5ddc4Srjs static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 394596c5ddc4Srjs assert(I->src[1].swizzle < 13); 394696c5ddc4Srjs unsigned swz1 = swz1_table[I->src[1].swizzle]; 394796c5ddc4Srjs assert(swz1 < 4); 394896c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 394996c5ddc4Srjs assert(I->result_type < 4); 395096c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 395196c5ddc4Srjs assert(result_type < 2); 395296c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 395396c5ddc4Srjs assert(I->cmpf < 9); 395496c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 395596c5ddc4Srjs assert(cmpf < 4); 395696c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 395796c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 395896c5ddc4Srjs { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 395996c5ddc4Srjs if (cmpf == 2) cmpf = 0; 396096c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 396196c5ddc4Srjs } 396296c5ddc4Srjs 396396c5ddc4Srjs unsigned derived_12 = 0; 396496c5ddc4Srjs if (cmpf == 0) derived_12 = 0; 396596c5ddc4Srjs else if (cmpf == 1) derived_12 = 1; 396696c5ddc4Srjs else unreachable("No pattern match at pos 12"); 396796c5ddc4Srjs 396896c5ddc4Srjs return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); 396996c5ddc4Srjs} 397096c5ddc4Srjs 397196c5ddc4Srjsstatic inline unsigned 397296c5ddc4Srjsbi_pack_add_icmp_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 397396c5ddc4Srjs{ 397496c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 397596c5ddc4Srjs assert(I->result_type < 4); 397696c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 397796c5ddc4Srjs assert(result_type < 2); 397896c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 397996c5ddc4Srjs assert(I->cmpf < 9); 398096c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 398196c5ddc4Srjs assert(cmpf < 2); 398296c5ddc4Srjs return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 398396c5ddc4Srjs} 398496c5ddc4Srjs 398596c5ddc4Srjsstatic inline unsigned 398696c5ddc4Srjsbi_pack_add_icmp_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 398796c5ddc4Srjs{ 398896c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 398996c5ddc4Srjs assert(I->result_type < 4); 399096c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 399196c5ddc4Srjs assert(result_type < 2); 399296c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 399396c5ddc4Srjs assert(I->cmpf < 9); 399496c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 399596c5ddc4Srjs assert(cmpf < 4); 399696c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 399796c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 399896c5ddc4Srjs if (cmpf == 2) cmpf = 0; 399996c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 400096c5ddc4Srjs } 400196c5ddc4Srjs 400296c5ddc4Srjs unsigned derived_6 = 0; 400396c5ddc4Srjs if (cmpf == 0) derived_6 = 0; 400496c5ddc4Srjs else if (cmpf == 1) derived_6 = 1; 400596c5ddc4Srjs else unreachable("No pattern match at pos 6"); 400696c5ddc4Srjs 400796c5ddc4Srjs return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 400896c5ddc4Srjs} 400996c5ddc4Srjs 401096c5ddc4Srjsstatic inline unsigned 401196c5ddc4Srjsbi_pack_add_icmp_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 401296c5ddc4Srjs{ 401396c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 401496c5ddc4Srjs assert(I->result_type < 4); 401596c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 401696c5ddc4Srjs assert(result_type < 2); 401796c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 401896c5ddc4Srjs assert(I->cmpf < 9); 401996c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 402096c5ddc4Srjs assert(cmpf < 4); 402196c5ddc4Srjs if ((cmpf == 2) || (cmpf == 3)) { 402296c5ddc4Srjs { unsigned temp = src0; src0 = src1; src1 = temp; } 402396c5ddc4Srjs if (cmpf == 2) cmpf = 0; 402496c5ddc4Srjs else if (cmpf == 3) cmpf = 1; 402596c5ddc4Srjs } 402696c5ddc4Srjs 402796c5ddc4Srjs unsigned derived_6 = 0; 402896c5ddc4Srjs if (cmpf == 0) derived_6 = 0; 402996c5ddc4Srjs else if (cmpf == 1) derived_6 = 1; 403096c5ddc4Srjs else unreachable("No pattern match at pos 6"); 403196c5ddc4Srjs 403296c5ddc4Srjs return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 403396c5ddc4Srjs} 403496c5ddc4Srjs 403596c5ddc4Srjsstatic inline unsigned 403696c5ddc4Srjsbi_pack_add_icmpf_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 403796c5ddc4Srjs{ 403896c5ddc4Srjs 403996c5ddc4Srjs return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 404096c5ddc4Srjs} 404196c5ddc4Srjs 404296c5ddc4Srjsstatic inline unsigned 404396c5ddc4Srjsbi_pack_add_icmpi_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 404496c5ddc4Srjs{ 404596c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 404696c5ddc4Srjs assert(I->result_type < 4); 404796c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 404896c5ddc4Srjs assert(result_type < 2); 404996c5ddc4Srjs static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 405096c5ddc4Srjs assert(I->cmpf < 9); 405196c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 405296c5ddc4Srjs assert(cmpf < 2); 405396c5ddc4Srjs return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 405496c5ddc4Srjs} 405596c5ddc4Srjs 405696c5ddc4Srjsstatic inline unsigned 405796c5ddc4Srjsbi_pack_add_icmpi_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 405896c5ddc4Srjs{ 405996c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 406096c5ddc4Srjs assert(I->result_type < 4); 406196c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 406296c5ddc4Srjs assert(result_type < 2); 406396c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 406496c5ddc4Srjs assert(I->cmpf < 9); 406596c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 406696c5ddc4Srjs assert(cmpf < 2); 406796c5ddc4Srjs return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 406896c5ddc4Srjs} 406996c5ddc4Srjs 407096c5ddc4Srjsstatic inline unsigned 407196c5ddc4Srjsbi_pack_add_icmpi_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 407296c5ddc4Srjs{ 407396c5ddc4Srjs static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 407496c5ddc4Srjs assert(I->result_type < 4); 407596c5ddc4Srjs unsigned result_type = result_type_table[I->result_type]; 407696c5ddc4Srjs assert(result_type < 2); 407796c5ddc4Srjs static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 407896c5ddc4Srjs assert(I->cmpf < 9); 407996c5ddc4Srjs unsigned cmpf = cmpf_table[I->cmpf]; 408096c5ddc4Srjs assert(cmpf < 2); 408196c5ddc4Srjs return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 408296c5ddc4Srjs} 408396c5ddc4Srjs 408496c5ddc4Srjsstatic inline unsigned 408596c5ddc4Srjsbi_pack_add_icmpm_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 408696c5ddc4Srjs{ 408796c5ddc4Srjs 408896c5ddc4Srjs return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 408996c5ddc4Srjs} 409096c5ddc4Srjs 409196c5ddc4Srjsstatic inline unsigned 409296c5ddc4Srjsbi_pack_add_ilogb_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 409396c5ddc4Srjs{ 409496c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 409596c5ddc4Srjs assert(I->src[0].swizzle < 13); 409696c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 409796c5ddc4Srjs assert(widen0 < 4); 409896c5ddc4Srjs return 0x3d9e0 | (src0 << 0) | (widen0 << 3); 409996c5ddc4Srjs} 410096c5ddc4Srjs 410196c5ddc4Srjsstatic inline unsigned 410296c5ddc4Srjsbi_pack_add_ilogb_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 410396c5ddc4Srjs{ 410496c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 410596c5ddc4Srjs assert(I->src[0].swizzle < 13); 410696c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 410796c5ddc4Srjs assert(swz0 < 4); 410896c5ddc4Srjs return 0x3d9c0 | (src0 << 0) | (swz0 << 3); 410996c5ddc4Srjs} 411096c5ddc4Srjs 411196c5ddc4Srjsstatic inline unsigned 411296c5ddc4Srjsbi_pack_add_imov_fma(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 411396c5ddc4Srjs{ 411496c5ddc4Srjs unsigned threads = I->threads; 411596c5ddc4Srjs assert(threads < 2); 411696c5ddc4Srjs return 0xd7820 | (threads << 3); 411796c5ddc4Srjs} 411896c5ddc4Srjs 411996c5ddc4Srjsstatic inline unsigned 412096c5ddc4Srjsbi_pack_add_isub_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 412196c5ddc4Srjs{ 412296c5ddc4Srjs unsigned saturate = I->saturate; 412396c5ddc4Srjs assert(saturate < 2); 412496c5ddc4Srjs static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 412596c5ddc4Srjs assert(I->src[1].swizzle < 13); 412696c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 412796c5ddc4Srjs assert(lanes1 < 8); 412896c5ddc4Srjs if (lanes1 == 0) { 412996c5ddc4Srjs return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8); 413096c5ddc4Srjs } else if ((lanes1 == 1) || (lanes1 == 2)) { 413196c5ddc4Srjs unsigned derived_9 = 0; 413296c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 413396c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 413496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 413596c5ddc4Srjs 413696c5ddc4Srjs return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 413796c5ddc4Srjs } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 413896c5ddc4Srjs unsigned derived_9 = 0; 413996c5ddc4Srjs if (lanes1 == 3) derived_9 = 0; 414096c5ddc4Srjs else if (lanes1 == 4) derived_9 = 1; 414196c5ddc4Srjs else if (lanes1 == 5) derived_9 = 2; 414296c5ddc4Srjs else if (lanes1 == 6) derived_9 = 3; 414396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 414496c5ddc4Srjs 414596c5ddc4Srjs return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 414696c5ddc4Srjs } else { 414796c5ddc4Srjs unreachable("No matching state found in add_isub_s32"); 414896c5ddc4Srjs } 414996c5ddc4Srjs} 415096c5ddc4Srjs 415196c5ddc4Srjsstatic inline unsigned 415296c5ddc4Srjsbi_pack_add_isub_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 415396c5ddc4Srjs{ 415496c5ddc4Srjs unsigned saturate = I->saturate; 415596c5ddc4Srjs assert(saturate < 2); 415696c5ddc4Srjs static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 415796c5ddc4Srjs assert(I->src[1].swizzle < 13); 415896c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 415996c5ddc4Srjs assert(lanes1 < 8); 416096c5ddc4Srjs if (lanes1 == 0) { 416196c5ddc4Srjs unsigned derived_7 = 0; 416296c5ddc4Srjs if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 416396c5ddc4Srjs else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 416496c5ddc4Srjs else unreachable("No pattern match at pos 7"); 416596c5ddc4Srjs 416696c5ddc4Srjs return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 416796c5ddc4Srjs } else if ((lanes1 == 1) || (lanes1 == 2)) { 416896c5ddc4Srjs unsigned derived_7 = 0; 416996c5ddc4Srjs if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 417096c5ddc4Srjs else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 417196c5ddc4Srjs else unreachable("No pattern match at pos 7"); 417296c5ddc4Srjs 417396c5ddc4Srjs unsigned derived_9 = 0; 417496c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 417596c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 417696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 417796c5ddc4Srjs 417896c5ddc4Srjs return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 417996c5ddc4Srjs } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 418096c5ddc4Srjs unsigned derived_7 = 0; 418196c5ddc4Srjs if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 418296c5ddc4Srjs else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 418396c5ddc4Srjs else unreachable("No pattern match at pos 7"); 418496c5ddc4Srjs 418596c5ddc4Srjs unsigned derived_9 = 0; 418696c5ddc4Srjs if (lanes1 == 3) derived_9 = 0; 418796c5ddc4Srjs else if (lanes1 == 4) derived_9 = 1; 418896c5ddc4Srjs else if (lanes1 == 5) derived_9 = 2; 418996c5ddc4Srjs else if (lanes1 == 6) derived_9 = 3; 419096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 419196c5ddc4Srjs 419296c5ddc4Srjs return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 419396c5ddc4Srjs } else { 419496c5ddc4Srjs unreachable("No matching state found in add_isub_u32"); 419596c5ddc4Srjs } 419696c5ddc4Srjs} 419796c5ddc4Srjs 419896c5ddc4Srjsstatic inline unsigned 419996c5ddc4Srjsbi_pack_add_isub_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 420096c5ddc4Srjs{ 420196c5ddc4Srjs unsigned saturate = I->saturate; 420296c5ddc4Srjs assert(saturate < 2); 420396c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 420496c5ddc4Srjs assert(I->src[0].swizzle < 13); 420596c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 420696c5ddc4Srjs assert(lanes0 < 2); 420796c5ddc4Srjs static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 420896c5ddc4Srjs assert(I->src[1].swizzle < 13); 420996c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 421096c5ddc4Srjs assert(lanes1 < 8); 421196c5ddc4Srjs if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 421296c5ddc4Srjs unsigned derived_9 = 0; 421396c5ddc4Srjs if (lanes1 == 0) derived_9 = 0; 421496c5ddc4Srjs else if (lanes1 == 1) derived_9 = 1; 421596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 421696c5ddc4Srjs 421796c5ddc4Srjs unsigned derived_10 = 0; 421896c5ddc4Srjs if (lanes0 == 0) derived_10 = 0; 421996c5ddc4Srjs else if (lanes0 == 1) derived_10 = 1; 422096c5ddc4Srjs else unreachable("No pattern match at pos 10"); 422196c5ddc4Srjs 422296c5ddc4Srjs return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); 422396c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 422496c5ddc4Srjs unsigned derived_9 = 0; 422596c5ddc4Srjs if (lanes1 == 2) derived_9 = 0; 422696c5ddc4Srjs else if (lanes1 == 3) derived_9 = 1; 422796c5ddc4Srjs else unreachable("No pattern match at pos 9"); 422896c5ddc4Srjs 422996c5ddc4Srjs return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 423096c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 423196c5ddc4Srjs unsigned derived_9 = 0; 423296c5ddc4Srjs if (lanes1 == 4) derived_9 = 0; 423396c5ddc4Srjs else if (lanes1 == 5) derived_9 = 1; 423496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 423596c5ddc4Srjs 423696c5ddc4Srjs return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 423796c5ddc4Srjs } else { 423896c5ddc4Srjs unreachable("No matching state found in add_isub_v2s16"); 423996c5ddc4Srjs } 424096c5ddc4Srjs} 424196c5ddc4Srjs 424296c5ddc4Srjsstatic inline unsigned 424396c5ddc4Srjsbi_pack_add_isub_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 424496c5ddc4Srjs{ 424596c5ddc4Srjs unsigned saturate = I->saturate; 424696c5ddc4Srjs assert(saturate < 2); 424796c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 424896c5ddc4Srjs assert(I->src[0].swizzle < 13); 424996c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 425096c5ddc4Srjs assert(lanes0 < 2); 425196c5ddc4Srjs static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 425296c5ddc4Srjs assert(I->src[1].swizzle < 13); 425396c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 425496c5ddc4Srjs assert(lanes1 < 8); 425596c5ddc4Srjs if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 425696c5ddc4Srjs unsigned derived_7 = 0; 425796c5ddc4Srjs if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 425896c5ddc4Srjs else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 425996c5ddc4Srjs else unreachable("No pattern match at pos 7"); 426096c5ddc4Srjs 426196c5ddc4Srjs unsigned derived_9 = 0; 426296c5ddc4Srjs if (lanes1 == 0) derived_9 = 0; 426396c5ddc4Srjs else if (lanes1 == 1) derived_9 = 1; 426496c5ddc4Srjs else unreachable("No pattern match at pos 9"); 426596c5ddc4Srjs 426696c5ddc4Srjs unsigned derived_10 = 0; 426796c5ddc4Srjs if (lanes0 == 0) derived_10 = 0; 426896c5ddc4Srjs else if (lanes0 == 1) derived_10 = 1; 426996c5ddc4Srjs else unreachable("No pattern match at pos 10"); 427096c5ddc4Srjs 427196c5ddc4Srjs return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); 427296c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 427396c5ddc4Srjs unsigned derived_7 = 0; 427496c5ddc4Srjs if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 427596c5ddc4Srjs else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 427696c5ddc4Srjs else unreachable("No pattern match at pos 7"); 427796c5ddc4Srjs 427896c5ddc4Srjs unsigned derived_9 = 0; 427996c5ddc4Srjs if (lanes1 == 2) derived_9 = 0; 428096c5ddc4Srjs else if (lanes1 == 3) derived_9 = 1; 428196c5ddc4Srjs else unreachable("No pattern match at pos 9"); 428296c5ddc4Srjs 428396c5ddc4Srjs return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 428496c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 428596c5ddc4Srjs unsigned derived_7 = 0; 428696c5ddc4Srjs if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 428796c5ddc4Srjs else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 428896c5ddc4Srjs else unreachable("No pattern match at pos 7"); 428996c5ddc4Srjs 429096c5ddc4Srjs unsigned derived_9 = 0; 429196c5ddc4Srjs if (lanes1 == 4) derived_9 = 0; 429296c5ddc4Srjs else if (lanes1 == 5) derived_9 = 1; 429396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 429496c5ddc4Srjs 429596c5ddc4Srjs return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 429696c5ddc4Srjs } else { 429796c5ddc4Srjs unreachable("No matching state found in add_isub_v2u16"); 429896c5ddc4Srjs } 429996c5ddc4Srjs} 430096c5ddc4Srjs 430196c5ddc4Srjsstatic inline unsigned 430296c5ddc4Srjsbi_pack_add_isub_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 430396c5ddc4Srjs{ 430496c5ddc4Srjs unsigned saturate = I->saturate; 430596c5ddc4Srjs assert(saturate < 2); 430696c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 430796c5ddc4Srjs assert(I->src[0].swizzle < 13); 430896c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 430996c5ddc4Srjs assert(lanes0 < 8); 431096c5ddc4Srjs static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 431196c5ddc4Srjs assert(I->src[1].swizzle < 13); 431296c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 431396c5ddc4Srjs assert(lanes1 < 8); 431496c5ddc4Srjs if ((lanes0 == 0) && (lanes1 == 0)) { 431596c5ddc4Srjs return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8); 431696c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 431796c5ddc4Srjs unsigned derived_9 = 0; 431896c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 431996c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 432096c5ddc4Srjs else if (lanes1 == 3) derived_9 = 2; 432196c5ddc4Srjs else if (lanes1 == 4) derived_9 = 3; 432296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 432396c5ddc4Srjs 432496c5ddc4Srjs return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 432596c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 432696c5ddc4Srjs unsigned derived_9 = 0; 432796c5ddc4Srjs if (lanes1 == 5) derived_9 = 0; 432896c5ddc4Srjs else if (lanes1 == 6) derived_9 = 1; 432996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 433096c5ddc4Srjs 433196c5ddc4Srjs return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 433296c5ddc4Srjs } else { 433396c5ddc4Srjs unreachable("No matching state found in add_isub_v4s8"); 433496c5ddc4Srjs } 433596c5ddc4Srjs} 433696c5ddc4Srjs 433796c5ddc4Srjsstatic inline unsigned 433896c5ddc4Srjsbi_pack_add_isub_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 433996c5ddc4Srjs{ 434096c5ddc4Srjs unsigned saturate = I->saturate; 434196c5ddc4Srjs assert(saturate < 2); 434296c5ddc4Srjs static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 434396c5ddc4Srjs assert(I->src[0].swizzle < 13); 434496c5ddc4Srjs unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 434596c5ddc4Srjs assert(lanes0 < 8); 434696c5ddc4Srjs static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 434796c5ddc4Srjs assert(I->src[1].swizzle < 13); 434896c5ddc4Srjs unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 434996c5ddc4Srjs assert(lanes1 < 8); 435096c5ddc4Srjs if ((lanes0 == 0) && (lanes1 == 0)) { 435196c5ddc4Srjs unsigned derived_7 = 0; 435296c5ddc4Srjs if (saturate == 0) derived_7 = 0; 435396c5ddc4Srjs else if (saturate == 1) derived_7 = 1; 435496c5ddc4Srjs else unreachable("No pattern match at pos 7"); 435596c5ddc4Srjs 435696c5ddc4Srjs return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 435796c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 435896c5ddc4Srjs unsigned derived_7 = 0; 435996c5ddc4Srjs if (saturate == 0) derived_7 = 0; 436096c5ddc4Srjs else if (saturate == 1) derived_7 = 1; 436196c5ddc4Srjs else unreachable("No pattern match at pos 7"); 436296c5ddc4Srjs 436396c5ddc4Srjs unsigned derived_9 = 0; 436496c5ddc4Srjs if (lanes1 == 1) derived_9 = 0; 436596c5ddc4Srjs else if (lanes1 == 2) derived_9 = 1; 436696c5ddc4Srjs else if (lanes1 == 3) derived_9 = 2; 436796c5ddc4Srjs else if (lanes1 == 4) derived_9 = 3; 436896c5ddc4Srjs else unreachable("No pattern match at pos 9"); 436996c5ddc4Srjs 437096c5ddc4Srjs return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 437196c5ddc4Srjs } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 437296c5ddc4Srjs unsigned derived_7 = 0; 437396c5ddc4Srjs if (saturate == 0) derived_7 = 0; 437496c5ddc4Srjs else if (saturate == 1) derived_7 = 1; 437596c5ddc4Srjs else unreachable("No pattern match at pos 7"); 437696c5ddc4Srjs 437796c5ddc4Srjs unsigned derived_9 = 0; 437896c5ddc4Srjs if (lanes1 == 5) derived_9 = 0; 437996c5ddc4Srjs else if (lanes1 == 6) derived_9 = 1; 438096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 438196c5ddc4Srjs 438296c5ddc4Srjs return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 438396c5ddc4Srjs } else { 438496c5ddc4Srjs unreachable("No matching state found in add_isub_v4u8"); 438596c5ddc4Srjs } 438696c5ddc4Srjs} 438796c5ddc4Srjs 438896c5ddc4Srjsstatic inline unsigned 438996c5ddc4Srjsbi_pack_add_jump(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 439096c5ddc4Srjs{ 439196c5ddc4Srjs assert((1 << src0) & 0xf7); 439296c5ddc4Srjs return 0x6fe34 | (src0 << 6); 439396c5ddc4Srjs} 439496c5ddc4Srjs 439596c5ddc4Srjsstatic inline unsigned 439696c5ddc4Srjsbi_pack_add_ldexp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 439796c5ddc4Srjs{ 439896c5ddc4Srjs static uint8_t round_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 439996c5ddc4Srjs assert(I->round < 9); 440096c5ddc4Srjs unsigned round = round_table[I->round]; 440196c5ddc4Srjs assert(round < 8); 440296c5ddc4Srjs return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6); 440396c5ddc4Srjs} 440496c5ddc4Srjs 440596c5ddc4Srjsstatic inline unsigned 440696c5ddc4Srjsbi_pack_add_ldexp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 440796c5ddc4Srjs{ 440896c5ddc4Srjs static uint8_t round_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 440996c5ddc4Srjs assert(I->round < 9); 441096c5ddc4Srjs unsigned round = round_table[I->round]; 441196c5ddc4Srjs assert(round < 8); 441296c5ddc4Srjs return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6); 441396c5ddc4Srjs} 441496c5ddc4Srjs 441596c5ddc4Srjsstatic inline unsigned 441696c5ddc4Srjsbi_pack_add_ld_attr(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 441796c5ddc4Srjs{ 441896c5ddc4Srjs unsigned register_format = I->register_format; 441996c5ddc4Srjs assert(register_format < 16); 442096c5ddc4Srjs unsigned vecsize = I->vecsize; 442196c5ddc4Srjs assert(vecsize < 4); 442296c5ddc4Srjs if (register_format != 8) { 442396c5ddc4Srjs unsigned derived_13 = 0; 442496c5ddc4Srjs if (register_format == 0) derived_13 = 0; 442596c5ddc4Srjs else if (register_format == 1) derived_13 = 1; 442696c5ddc4Srjs else if (register_format == 2) derived_13 = 2; 442796c5ddc4Srjs else if (register_format == 3) derived_13 = 3; 442896c5ddc4Srjs else if (register_format == 4) derived_13 = 4; 442996c5ddc4Srjs else if (register_format == 5) derived_13 = 5; 443096c5ddc4Srjs else if (register_format == 6) derived_13 = 6; 443196c5ddc4Srjs else if (register_format == 7) derived_13 = 7; 443296c5ddc4Srjs else unreachable("No pattern match at pos 13"); 443396c5ddc4Srjs 443496c5ddc4Srjs return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); 443596c5ddc4Srjs } else if (register_format == 8) { 443696c5ddc4Srjs return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); 443796c5ddc4Srjs } else { 443896c5ddc4Srjs unreachable("No matching state found in add_ld_attr"); 443996c5ddc4Srjs } 444096c5ddc4Srjs} 444196c5ddc4Srjs 444296c5ddc4Srjsstatic inline unsigned 444396c5ddc4Srjsbi_pack_add_ld_attr_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 444496c5ddc4Srjs{ 444596c5ddc4Srjs unsigned register_format = I->register_format; 444696c5ddc4Srjs assert(register_format < 16); 444796c5ddc4Srjs unsigned vecsize = I->vecsize; 444896c5ddc4Srjs assert(vecsize < 4); 444996c5ddc4Srjs unsigned attribute_index = I->attribute_index; 445096c5ddc4Srjs assert(attribute_index < 0x10); 445196c5ddc4Srjs if (register_format != 8) { 445296c5ddc4Srjs unsigned derived_13 = 0; 445396c5ddc4Srjs if (register_format == 0) derived_13 = 0; 445496c5ddc4Srjs else if (register_format == 1) derived_13 = 1; 445596c5ddc4Srjs else if (register_format == 2) derived_13 = 2; 445696c5ddc4Srjs else if (register_format == 3) derived_13 = 3; 445796c5ddc4Srjs else if (register_format == 4) derived_13 = 4; 445896c5ddc4Srjs else if (register_format == 5) derived_13 = 5; 445996c5ddc4Srjs else if (register_format == 6) derived_13 = 6; 446096c5ddc4Srjs else if (register_format == 7) derived_13 = 7; 446196c5ddc4Srjs else unreachable("No pattern match at pos 13"); 446296c5ddc4Srjs 446396c5ddc4Srjs return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13); 446496c5ddc4Srjs } else if (register_format == 8) { 446596c5ddc4Srjs return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6); 446696c5ddc4Srjs } else { 446796c5ddc4Srjs unreachable("No matching state found in add_ld_attr_imm"); 446896c5ddc4Srjs } 446996c5ddc4Srjs} 447096c5ddc4Srjs 447196c5ddc4Srjsstatic inline unsigned 447296c5ddc4Srjsbi_pack_add_ld_attr_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 447396c5ddc4Srjs{ 447496c5ddc4Srjs unsigned register_format = I->register_format; 447596c5ddc4Srjs assert(register_format < 16); 447696c5ddc4Srjs unsigned vecsize = I->vecsize; 447796c5ddc4Srjs assert(vecsize < 4); 447896c5ddc4Srjs if (register_format != 8) { 447996c5ddc4Srjs unsigned derived_13 = 0; 448096c5ddc4Srjs if (register_format == 0) derived_13 = 0; 448196c5ddc4Srjs else if (register_format == 1) derived_13 = 1; 448296c5ddc4Srjs else if (register_format == 2) derived_13 = 2; 448396c5ddc4Srjs else if (register_format == 3) derived_13 = 3; 448496c5ddc4Srjs else if (register_format == 4) derived_13 = 4; 448596c5ddc4Srjs else if (register_format == 5) derived_13 = 5; 448696c5ddc4Srjs else if (register_format == 6) derived_13 = 6; 448796c5ddc4Srjs else if (register_format == 7) derived_13 = 7; 448896c5ddc4Srjs else unreachable("No pattern match at pos 13"); 448996c5ddc4Srjs 449096c5ddc4Srjs return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); 449196c5ddc4Srjs } else if (register_format == 8) { 449296c5ddc4Srjs return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); 449396c5ddc4Srjs } else { 449496c5ddc4Srjs unreachable("No matching state found in add_ld_attr_tex"); 449596c5ddc4Srjs } 449696c5ddc4Srjs} 449796c5ddc4Srjs 449896c5ddc4Srjsstatic inline unsigned 449996c5ddc4Srjsbi_pack_add_ld_cvt(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 450096c5ddc4Srjs{ 450196c5ddc4Srjs assert((1 << src2) & 0xf7); 450296c5ddc4Srjs unsigned vecsize = I->vecsize; 450396c5ddc4Srjs assert(vecsize < 4); 450496c5ddc4Srjs return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 450596c5ddc4Srjs} 450696c5ddc4Srjs 450796c5ddc4Srjsstatic inline unsigned 450896c5ddc4Srjsbi_pack_add_ld_gclk_u64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 450996c5ddc4Srjs{ 451096c5ddc4Srjs static uint8_t source_table[] = { 0, 6, 7 }; 451196c5ddc4Srjs assert(I->source < 3); 451296c5ddc4Srjs unsigned source = source_table[I->source]; 451396c5ddc4Srjs assert(source < 8); 451496c5ddc4Srjs return 0xd7800 | (source << 0); 451596c5ddc4Srjs} 451696c5ddc4Srjs 451796c5ddc4Srjsstatic inline unsigned 451896c5ddc4Srjsbi_pack_add_ld_tile(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 451996c5ddc4Srjs{ 452096c5ddc4Srjs assert((1 << src2) & 0xf7); 452196c5ddc4Srjs unsigned vecsize = I->vecsize; 452296c5ddc4Srjs assert(vecsize < 4); 452396c5ddc4Srjs return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 452496c5ddc4Srjs} 452596c5ddc4Srjs 452696c5ddc4Srjsstatic inline unsigned 452796c5ddc4Srjsbi_pack_add_ld_var(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 452896c5ddc4Srjs{ 452996c5ddc4Srjs unsigned vecsize = I->vecsize; 453096c5ddc4Srjs assert(vecsize < 4); 453196c5ddc4Srjs unsigned update = I->update; 453296c5ddc4Srjs assert(update < 4); 453396c5ddc4Srjs static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; 453496c5ddc4Srjs assert(I->register_format < 10); 453596c5ddc4Srjs unsigned register_format = register_format_table[I->register_format]; 453696c5ddc4Srjs assert(register_format < 4); 453796c5ddc4Srjs unsigned sample = I->sample; 453896c5ddc4Srjs assert(sample < 8); 453996c5ddc4Srjs if (register_format != 2) { 454096c5ddc4Srjs unsigned derived_19 = 0; 454196c5ddc4Srjs if (register_format == 0) derived_19 = 0; 454296c5ddc4Srjs else if (register_format == 1) derived_19 = 1; 454396c5ddc4Srjs else unreachable("No pattern match at pos 19"); 454496c5ddc4Srjs 454596c5ddc4Srjs unsigned derived_10 = 0; 454696c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_10 = 0; 454796c5ddc4Srjs else if ((sample == 1) && (update == 0)) derived_10 = 1; 454896c5ddc4Srjs else if ((sample == 2) && (update == 0)) derived_10 = 2; 454996c5ddc4Srjs else if ((sample == 3) && (update == 0)) derived_10 = 3; 455096c5ddc4Srjs else if ((sample == 4) && (update == 1)) derived_10 = 4; 455196c5ddc4Srjs else if ((sample == 0) && (update == 2)) derived_10 = 8; 455296c5ddc4Srjs else if ((sample == 1) && (update == 2)) derived_10 = 9; 455396c5ddc4Srjs else if ((sample == 0) && (update == 3)) derived_10 = 10; 455496c5ddc4Srjs else if ((sample == 1) && (update == 3)) derived_10 = 11; 455596c5ddc4Srjs else if ((sample == 2) && (update == 3)) derived_10 = 12; 455696c5ddc4Srjs else if ((sample == 3) && (update == 3)) derived_10 = 13; 455796c5ddc4Srjs else unreachable("No pattern match at pos 10"); 455896c5ddc4Srjs 455996c5ddc4Srjs return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10); 456096c5ddc4Srjs } else if (register_format == 2) { 456196c5ddc4Srjs unsigned derived_10 = 0; 456296c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_10 = 0; 456396c5ddc4Srjs else if ((sample == 1) && (update == 0)) derived_10 = 1; 456496c5ddc4Srjs else if ((sample == 2) && (update == 0)) derived_10 = 2; 456596c5ddc4Srjs else if ((sample == 3) && (update == 0)) derived_10 = 3; 456696c5ddc4Srjs else if ((sample == 4) && (update == 1)) derived_10 = 4; 456796c5ddc4Srjs else if ((sample == 0) && (update == 2)) derived_10 = 8; 456896c5ddc4Srjs else if ((sample == 1) && (update == 2)) derived_10 = 9; 456996c5ddc4Srjs else if ((sample == 0) && (update == 3)) derived_10 = 10; 457096c5ddc4Srjs else if ((sample == 1) && (update == 3)) derived_10 = 11; 457196c5ddc4Srjs else if ((sample == 2) && (update == 3)) derived_10 = 12; 457296c5ddc4Srjs else if ((sample == 3) && (update == 3)) derived_10 = 13; 457396c5ddc4Srjs else unreachable("No pattern match at pos 10"); 457496c5ddc4Srjs 457596c5ddc4Srjs return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10); 457696c5ddc4Srjs } else { 457796c5ddc4Srjs unreachable("No matching state found in add_ld_var"); 457896c5ddc4Srjs } 457996c5ddc4Srjs} 458096c5ddc4Srjs 458196c5ddc4Srjsstatic inline unsigned 458296c5ddc4Srjsbi_pack_add_ld_var_flat(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 458396c5ddc4Srjs{ 458496c5ddc4Srjs unsigned vecsize = I->vecsize; 458596c5ddc4Srjs assert(vecsize < 4); 458696c5ddc4Srjs static uint8_t register_format_table[] = { 1, 0, 3, 2, ~0, ~0, ~0, ~0, 4, 5 }; 458796c5ddc4Srjs assert(I->register_format < 10); 458896c5ddc4Srjs unsigned register_format = register_format_table[I->register_format]; 458996c5ddc4Srjs assert(register_format < 8); 459096c5ddc4Srjs static uint8_t function_table[] = { 0, 3, 6, 7 }; 459196c5ddc4Srjs assert(I->function < 4); 459296c5ddc4Srjs unsigned function = function_table[I->function]; 459396c5ddc4Srjs assert(function < 8); 459496c5ddc4Srjs if (register_format != 4) { 459596c5ddc4Srjs unsigned derived_10 = 0; 459696c5ddc4Srjs if ((register_format == 0) || (register_format == 1)) derived_10 = 0; 459796c5ddc4Srjs else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; 459896c5ddc4Srjs else unreachable("No pattern match at pos 10"); 459996c5ddc4Srjs 460096c5ddc4Srjs unsigned derived_19 = 0; 460196c5ddc4Srjs if ((register_format == 0) || (register_format == 2)) derived_19 = 0; 460296c5ddc4Srjs else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; 460396c5ddc4Srjs else unreachable("No pattern match at pos 19"); 460496c5ddc4Srjs 460596c5ddc4Srjs return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19); 460696c5ddc4Srjs } else if (register_format == 4) { 460796c5ddc4Srjs return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0); 460896c5ddc4Srjs } else { 460996c5ddc4Srjs unreachable("No matching state found in add_ld_var_flat"); 461096c5ddc4Srjs } 461196c5ddc4Srjs} 461296c5ddc4Srjs 461396c5ddc4Srjsstatic inline unsigned 461496c5ddc4Srjsbi_pack_add_ld_var_flat_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 461596c5ddc4Srjs{ 461696c5ddc4Srjs unsigned vecsize = I->vecsize; 461796c5ddc4Srjs assert(vecsize < 4); 461896c5ddc4Srjs static uint8_t register_format_table[] = { 1, 0, 3, 2, ~0, ~0, ~0, ~0, 4, 5 }; 461996c5ddc4Srjs assert(I->register_format < 10); 462096c5ddc4Srjs unsigned register_format = register_format_table[I->register_format]; 462196c5ddc4Srjs assert(register_format < 8); 462296c5ddc4Srjs static uint8_t function_table[] = { 0, 3, 6, 7 }; 462396c5ddc4Srjs assert(I->function < 4); 462496c5ddc4Srjs unsigned function = function_table[I->function]; 462596c5ddc4Srjs assert(function < 8); 462696c5ddc4Srjs unsigned index = I->index; 462796c5ddc4Srjs assert(index < 0x20); 462896c5ddc4Srjs if (register_format != 4) { 462996c5ddc4Srjs unsigned derived_10 = 0; 463096c5ddc4Srjs if ((register_format == 0) || (register_format == 1)) derived_10 = 0; 463196c5ddc4Srjs else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; 463296c5ddc4Srjs else unreachable("No pattern match at pos 10"); 463396c5ddc4Srjs 463496c5ddc4Srjs unsigned derived_19 = 0; 463596c5ddc4Srjs if ((register_format == 0) || (register_format == 2)) derived_19 = 0; 463696c5ddc4Srjs else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; 463796c5ddc4Srjs else unreachable("No pattern match at pos 19"); 463896c5ddc4Srjs 463996c5ddc4Srjs return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19); 464096c5ddc4Srjs } else if (register_format == 4) { 464196c5ddc4Srjs return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3); 464296c5ddc4Srjs } else { 464396c5ddc4Srjs unreachable("No matching state found in add_ld_var_flat_imm"); 464496c5ddc4Srjs } 464596c5ddc4Srjs} 464696c5ddc4Srjs 464796c5ddc4Srjsstatic inline unsigned 464896c5ddc4Srjsbi_pack_add_ld_var_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 464996c5ddc4Srjs{ 465096c5ddc4Srjs unsigned vecsize = I->vecsize; 465196c5ddc4Srjs assert(vecsize < 4); 465296c5ddc4Srjs unsigned update = I->update; 465396c5ddc4Srjs assert(update < 4); 465496c5ddc4Srjs static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; 465596c5ddc4Srjs assert(I->register_format < 10); 465696c5ddc4Srjs unsigned register_format = register_format_table[I->register_format]; 465796c5ddc4Srjs assert(register_format < 4); 465896c5ddc4Srjs unsigned sample = I->sample; 465996c5ddc4Srjs assert(sample < 8); 466096c5ddc4Srjs unsigned index = I->index; 466196c5ddc4Srjs assert(index < 0x20); 466296c5ddc4Srjs if (register_format != 2) { 466396c5ddc4Srjs unsigned derived_19 = 0; 466496c5ddc4Srjs if (register_format == 0) derived_19 = 0; 466596c5ddc4Srjs else if (register_format == 1) derived_19 = 1; 466696c5ddc4Srjs else unreachable("No pattern match at pos 19"); 466796c5ddc4Srjs 466896c5ddc4Srjs unsigned derived_10 = 0; 466996c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_10 = 0; 467096c5ddc4Srjs else if ((sample == 1) && (update == 0)) derived_10 = 1; 467196c5ddc4Srjs else if ((sample == 2) && (update == 0)) derived_10 = 2; 467296c5ddc4Srjs else if ((sample == 3) && (update == 0)) derived_10 = 3; 467396c5ddc4Srjs else if ((sample == 4) && (update == 1)) derived_10 = 4; 467496c5ddc4Srjs else if ((sample == 0) && (update == 2)) derived_10 = 8; 467596c5ddc4Srjs else if ((sample == 1) && (update == 2)) derived_10 = 9; 467696c5ddc4Srjs else if ((sample == 0) && (update == 3)) derived_10 = 10; 467796c5ddc4Srjs else if ((sample == 1) && (update == 3)) derived_10 = 11; 467896c5ddc4Srjs else if ((sample == 2) && (update == 3)) derived_10 = 12; 467996c5ddc4Srjs else if ((sample == 3) && (update == 3)) derived_10 = 13; 468096c5ddc4Srjs else unreachable("No pattern match at pos 10"); 468196c5ddc4Srjs 468296c5ddc4Srjs return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10); 468396c5ddc4Srjs } else if (register_format == 2) { 468496c5ddc4Srjs unsigned derived_10 = 0; 468596c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_10 = 0; 468696c5ddc4Srjs else if ((sample == 1) && (update == 0)) derived_10 = 1; 468796c5ddc4Srjs else if ((sample == 2) && (update == 0)) derived_10 = 2; 468896c5ddc4Srjs else if ((sample == 3) && (update == 0)) derived_10 = 3; 468996c5ddc4Srjs else if ((sample == 4) && (update == 1)) derived_10 = 4; 469096c5ddc4Srjs else if ((sample == 0) && (update == 2)) derived_10 = 8; 469196c5ddc4Srjs else if ((sample == 1) && (update == 2)) derived_10 = 9; 469296c5ddc4Srjs else if ((sample == 0) && (update == 3)) derived_10 = 10; 469396c5ddc4Srjs else if ((sample == 1) && (update == 3)) derived_10 = 11; 469496c5ddc4Srjs else if ((sample == 2) && (update == 3)) derived_10 = 12; 469596c5ddc4Srjs else if ((sample == 3) && (update == 3)) derived_10 = 13; 469696c5ddc4Srjs else unreachable("No pattern match at pos 10"); 469796c5ddc4Srjs 469896c5ddc4Srjs return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10); 469996c5ddc4Srjs } else { 470096c5ddc4Srjs unreachable("No matching state found in add_ld_var_imm"); 470196c5ddc4Srjs } 470296c5ddc4Srjs} 470396c5ddc4Srjs 470496c5ddc4Srjsstatic inline unsigned 470596c5ddc4Srjsbi_pack_add_ld_var_special(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 470696c5ddc4Srjs{ 470796c5ddc4Srjs unsigned varying_name = I->varying_name; 470896c5ddc4Srjs assert(varying_name < 32); 470996c5ddc4Srjs unsigned vecsize = I->vecsize; 471096c5ddc4Srjs assert(vecsize < 4); 471196c5ddc4Srjs unsigned update = I->update; 471296c5ddc4Srjs assert(update < 4); 471396c5ddc4Srjs static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; 471496c5ddc4Srjs assert(I->register_format < 10); 471596c5ddc4Srjs unsigned register_format = register_format_table[I->register_format]; 471696c5ddc4Srjs assert(register_format < 4); 471796c5ddc4Srjs unsigned sample = I->sample; 471896c5ddc4Srjs assert(sample < 8); 471996c5ddc4Srjs if (register_format != 2) { 472096c5ddc4Srjs unsigned derived_3 = 0; 472196c5ddc4Srjs if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; 472296c5ddc4Srjs else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; 472396c5ddc4Srjs else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; 472496c5ddc4Srjs else unreachable("No pattern match at pos 3"); 472596c5ddc4Srjs 472696c5ddc4Srjs unsigned derived_19 = 0; 472796c5ddc4Srjs if (register_format == 0) derived_19 = 0; 472896c5ddc4Srjs else if (register_format == 1) derived_19 = 1; 472996c5ddc4Srjs else unreachable("No pattern match at pos 19"); 473096c5ddc4Srjs 473196c5ddc4Srjs unsigned derived_10 = 0; 473296c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_10 = 0; 473396c5ddc4Srjs else if ((sample == 1) && (update == 0)) derived_10 = 1; 473496c5ddc4Srjs else if ((sample == 2) && (update == 0)) derived_10 = 2; 473596c5ddc4Srjs else if ((sample == 3) && (update == 0)) derived_10 = 3; 473696c5ddc4Srjs else if ((sample == 4) && (update == 1)) derived_10 = 4; 473796c5ddc4Srjs else if ((sample == 0) && (update == 2)) derived_10 = 8; 473896c5ddc4Srjs else if ((sample == 1) && (update == 2)) derived_10 = 9; 473996c5ddc4Srjs else if ((sample == 0) && (update == 3)) derived_10 = 10; 474096c5ddc4Srjs else if ((sample == 1) && (update == 3)) derived_10 = 11; 474196c5ddc4Srjs else if ((sample == 2) && (update == 3)) derived_10 = 12; 474296c5ddc4Srjs else if ((sample == 3) && (update == 3)) derived_10 = 13; 474396c5ddc4Srjs else unreachable("No pattern match at pos 10"); 474496c5ddc4Srjs 474596c5ddc4Srjs return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10); 474696c5ddc4Srjs } else if (register_format == 2) { 474796c5ddc4Srjs unsigned derived_3 = 0; 474896c5ddc4Srjs if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; 474996c5ddc4Srjs else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; 475096c5ddc4Srjs else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; 475196c5ddc4Srjs else unreachable("No pattern match at pos 3"); 475296c5ddc4Srjs 475396c5ddc4Srjs unsigned derived_10 = 0; 475496c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_10 = 0; 475596c5ddc4Srjs else if ((sample == 1) && (update == 0)) derived_10 = 1; 475696c5ddc4Srjs else if ((sample == 2) && (update == 0)) derived_10 = 2; 475796c5ddc4Srjs else if ((sample == 3) && (update == 0)) derived_10 = 3; 475896c5ddc4Srjs else if ((sample == 4) && (update == 1)) derived_10 = 4; 475996c5ddc4Srjs else if ((sample == 0) && (update == 2)) derived_10 = 8; 476096c5ddc4Srjs else if ((sample == 1) && (update == 2)) derived_10 = 9; 476196c5ddc4Srjs else if ((sample == 0) && (update == 3)) derived_10 = 10; 476296c5ddc4Srjs else if ((sample == 1) && (update == 3)) derived_10 = 11; 476396c5ddc4Srjs else if ((sample == 2) && (update == 3)) derived_10 = 12; 476496c5ddc4Srjs else if ((sample == 3) && (update == 3)) derived_10 = 13; 476596c5ddc4Srjs else unreachable("No pattern match at pos 10"); 476696c5ddc4Srjs 476796c5ddc4Srjs return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10); 476896c5ddc4Srjs } else { 476996c5ddc4Srjs unreachable("No matching state found in add_ld_var_special"); 477096c5ddc4Srjs } 477196c5ddc4Srjs} 477296c5ddc4Srjs 477396c5ddc4Srjsstatic inline unsigned 477496c5ddc4Srjsbi_pack_add_lea_attr(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 477596c5ddc4Srjs{ 477696c5ddc4Srjs unsigned register_format = I->register_format; 477796c5ddc4Srjs assert(register_format < 16); 477896c5ddc4Srjs if (register_format != 8) { 477996c5ddc4Srjs unsigned derived_11 = 0; 478096c5ddc4Srjs if (register_format == 0) derived_11 = 0; 478196c5ddc4Srjs else if (register_format == 1) derived_11 = 1; 478296c5ddc4Srjs else if (register_format == 2) derived_11 = 2; 478396c5ddc4Srjs else if (register_format == 3) derived_11 = 3; 478496c5ddc4Srjs else if (register_format == 4) derived_11 = 4; 478596c5ddc4Srjs else if (register_format == 5) derived_11 = 5; 478696c5ddc4Srjs else if (register_format == 6) derived_11 = 6; 478796c5ddc4Srjs else if (register_format == 7) derived_11 = 7; 478896c5ddc4Srjs else unreachable("No pattern match at pos 11"); 478996c5ddc4Srjs 479096c5ddc4Srjs return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); 479196c5ddc4Srjs } else if (register_format == 8) { 479296c5ddc4Srjs return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6); 479396c5ddc4Srjs } else { 479496c5ddc4Srjs unreachable("No matching state found in add_lea_attr"); 479596c5ddc4Srjs } 479696c5ddc4Srjs} 479796c5ddc4Srjs 479896c5ddc4Srjsstatic inline unsigned 479996c5ddc4Srjsbi_pack_add_lea_attr_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 480096c5ddc4Srjs{ 480196c5ddc4Srjs unsigned register_format = I->register_format; 480296c5ddc4Srjs assert(register_format < 16); 480396c5ddc4Srjs unsigned attribute_index = I->attribute_index; 480496c5ddc4Srjs assert(attribute_index < 0x10); 480596c5ddc4Srjs if (register_format != 8) { 480696c5ddc4Srjs unsigned derived_11 = 0; 480796c5ddc4Srjs if (register_format == 0) derived_11 = 0; 480896c5ddc4Srjs else if (register_format == 1) derived_11 = 1; 480996c5ddc4Srjs else if (register_format == 2) derived_11 = 2; 481096c5ddc4Srjs else if (register_format == 3) derived_11 = 3; 481196c5ddc4Srjs else if (register_format == 4) derived_11 = 4; 481296c5ddc4Srjs else if (register_format == 5) derived_11 = 5; 481396c5ddc4Srjs else if (register_format == 6) derived_11 = 6; 481496c5ddc4Srjs else if (register_format == 7) derived_11 = 7; 481596c5ddc4Srjs else unreachable("No pattern match at pos 11"); 481696c5ddc4Srjs 481796c5ddc4Srjs return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11); 481896c5ddc4Srjs } else if (register_format == 8) { 481996c5ddc4Srjs return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6); 482096c5ddc4Srjs } else { 482196c5ddc4Srjs unreachable("No matching state found in add_lea_attr_imm"); 482296c5ddc4Srjs } 482396c5ddc4Srjs} 482496c5ddc4Srjs 482596c5ddc4Srjsstatic inline unsigned 482696c5ddc4Srjsbi_pack_add_lea_attr_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 482796c5ddc4Srjs{ 482896c5ddc4Srjs unsigned register_format = I->register_format; 482996c5ddc4Srjs assert(register_format < 16); 483096c5ddc4Srjs if (register_format != 8) { 483196c5ddc4Srjs unsigned derived_11 = 0; 483296c5ddc4Srjs if (register_format == 0) derived_11 = 0; 483396c5ddc4Srjs else if (register_format == 1) derived_11 = 1; 483496c5ddc4Srjs else if (register_format == 2) derived_11 = 2; 483596c5ddc4Srjs else if (register_format == 3) derived_11 = 3; 483696c5ddc4Srjs else if (register_format == 4) derived_11 = 4; 483796c5ddc4Srjs else if (register_format == 5) derived_11 = 5; 483896c5ddc4Srjs else if (register_format == 6) derived_11 = 6; 483996c5ddc4Srjs else if (register_format == 7) derived_11 = 7; 484096c5ddc4Srjs else unreachable("No pattern match at pos 11"); 484196c5ddc4Srjs 484296c5ddc4Srjs return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); 484396c5ddc4Srjs } else if (register_format == 8) { 484496c5ddc4Srjs return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6); 484596c5ddc4Srjs } else { 484696c5ddc4Srjs unreachable("No matching state found in add_lea_attr_tex"); 484796c5ddc4Srjs } 484896c5ddc4Srjs} 484996c5ddc4Srjs 485096c5ddc4Srjsstatic inline unsigned 485196c5ddc4Srjsbi_pack_add_lea_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 485296c5ddc4Srjs{ 485396c5ddc4Srjs unsigned format = I->format; 485496c5ddc4Srjs assert(format < 2); 485596c5ddc4Srjs return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11); 485696c5ddc4Srjs} 485796c5ddc4Srjs 485896c5ddc4Srjsstatic inline unsigned 485996c5ddc4Srjsbi_pack_add_lea_tex_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 486096c5ddc4Srjs{ 486196c5ddc4Srjs unsigned format = I->format; 486296c5ddc4Srjs assert(format < 2); 486396c5ddc4Srjs unsigned texture_index = I->texture_index; 486496c5ddc4Srjs assert(texture_index < 0x20); 486596c5ddc4Srjs return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6); 486696c5ddc4Srjs} 486796c5ddc4Srjs 486896c5ddc4Srjsstatic inline unsigned 486996c5ddc4Srjsbi_pack_add_load_i128(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 487096c5ddc4Srjs{ 487196c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 487296c5ddc4Srjs assert(I->seg < 6); 487396c5ddc4Srjs unsigned seg = seg_table[I->seg]; 487496c5ddc4Srjs assert(seg < 8); 487596c5ddc4Srjs return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6); 487696c5ddc4Srjs} 487796c5ddc4Srjs 487896c5ddc4Srjsstatic inline unsigned 487996c5ddc4Srjsbi_pack_add_load_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 488096c5ddc4Srjs{ 488196c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 488296c5ddc4Srjs assert(I->seg < 6); 488396c5ddc4Srjs unsigned seg = seg_table[I->seg]; 488496c5ddc4Srjs assert(seg < 8); 488596c5ddc4Srjs static uint8_t lane_dest_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 488696c5ddc4Srjs assert(I->dest->swizzle < 13); 488796c5ddc4Srjs unsigned lane_dest = lane_dest_table[I->dest->swizzle]; 488896c5ddc4Srjs assert(lane_dest < 4); 488996c5ddc4Srjs unsigned extend = I->extend; 489096c5ddc4Srjs assert(extend < 4); 489196c5ddc4Srjs if ((extend == 0) && ((lane_dest == 0) || (lane_dest == 1))) { 489296c5ddc4Srjs unsigned derived_9 = 0; 489396c5ddc4Srjs if (lane_dest == 0) derived_9 = 0; 489496c5ddc4Srjs else if (lane_dest == 1) derived_9 = 1; 489596c5ddc4Srjs else unreachable("No pattern match at pos 9"); 489696c5ddc4Srjs 489796c5ddc4Srjs return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 489896c5ddc4Srjs } else if ((extend != 0) && (lane_dest == 2)) { 489996c5ddc4Srjs unsigned derived_9 = 0; 490096c5ddc4Srjs if (extend == 1) derived_9 = 0; 490196c5ddc4Srjs else if (extend == 2) derived_9 = 1; 490296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 490396c5ddc4Srjs 490496c5ddc4Srjs return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 490596c5ddc4Srjs } else if ((extend != 0) && (lane_dest == 3)) { 490696c5ddc4Srjs unsigned derived_9 = 0; 490796c5ddc4Srjs if (extend == 1) derived_9 = 0; 490896c5ddc4Srjs else if (extend == 2) derived_9 = 1; 490996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 491096c5ddc4Srjs 491196c5ddc4Srjs return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 491296c5ddc4Srjs } else { 491396c5ddc4Srjs unreachable("No matching state found in add_load_i16"); 491496c5ddc4Srjs } 491596c5ddc4Srjs} 491696c5ddc4Srjs 491796c5ddc4Srjsstatic inline unsigned 491896c5ddc4Srjsbi_pack_add_load_i24(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 491996c5ddc4Srjs{ 492096c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 492196c5ddc4Srjs assert(I->seg < 6); 492296c5ddc4Srjs unsigned seg = seg_table[I->seg]; 492396c5ddc4Srjs assert(seg < 8); 492496c5ddc4Srjs return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6); 492596c5ddc4Srjs} 492696c5ddc4Srjs 492796c5ddc4Srjsstatic inline unsigned 492896c5ddc4Srjsbi_pack_add_load_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 492996c5ddc4Srjs{ 493096c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 493196c5ddc4Srjs assert(I->seg < 6); 493296c5ddc4Srjs unsigned seg = seg_table[I->seg]; 493396c5ddc4Srjs assert(seg < 8); 493496c5ddc4Srjs static uint8_t lane_dest_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 493596c5ddc4Srjs assert(I->dest->swizzle < 13); 493696c5ddc4Srjs unsigned lane_dest = lane_dest_table[I->dest->swizzle]; 493796c5ddc4Srjs assert(lane_dest < 2); 493896c5ddc4Srjs unsigned extend = I->extend; 493996c5ddc4Srjs assert(extend < 4); 494096c5ddc4Srjs if ((extend == 0) && (lane_dest == 0)) { 494196c5ddc4Srjs return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6); 494296c5ddc4Srjs } else if ((extend != 0) && (lane_dest == 1)) { 494396c5ddc4Srjs unsigned derived_9 = 0; 494496c5ddc4Srjs if (extend == 1) derived_9 = 0; 494596c5ddc4Srjs else if (extend == 2) derived_9 = 1; 494696c5ddc4Srjs else unreachable("No pattern match at pos 9"); 494796c5ddc4Srjs 494896c5ddc4Srjs return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 494996c5ddc4Srjs } else { 495096c5ddc4Srjs unreachable("No matching state found in add_load_i32"); 495196c5ddc4Srjs } 495296c5ddc4Srjs} 495396c5ddc4Srjs 495496c5ddc4Srjsstatic inline unsigned 495596c5ddc4Srjsbi_pack_add_load_i48(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 495696c5ddc4Srjs{ 495796c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 495896c5ddc4Srjs assert(I->seg < 6); 495996c5ddc4Srjs unsigned seg = seg_table[I->seg]; 496096c5ddc4Srjs assert(seg < 8); 496196c5ddc4Srjs return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6); 496296c5ddc4Srjs} 496396c5ddc4Srjs 496496c5ddc4Srjsstatic inline unsigned 496596c5ddc4Srjsbi_pack_add_load_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 496696c5ddc4Srjs{ 496796c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 496896c5ddc4Srjs assert(I->seg < 6); 496996c5ddc4Srjs unsigned seg = seg_table[I->seg]; 497096c5ddc4Srjs assert(seg < 8); 497196c5ddc4Srjs return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6); 497296c5ddc4Srjs} 497396c5ddc4Srjs 497496c5ddc4Srjsstatic inline unsigned 497596c5ddc4Srjsbi_pack_add_load_i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 497696c5ddc4Srjs{ 497796c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 497896c5ddc4Srjs assert(I->seg < 6); 497996c5ddc4Srjs unsigned seg = seg_table[I->seg]; 498096c5ddc4Srjs assert(seg < 8); 498196c5ddc4Srjs static uint8_t lane_dest_table[] = { 4, 6, ~0, 5, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 498296c5ddc4Srjs assert(I->dest->swizzle < 13); 498396c5ddc4Srjs unsigned lane_dest = lane_dest_table[I->dest->swizzle]; 498496c5ddc4Srjs assert(lane_dest < 8); 498596c5ddc4Srjs unsigned extend = I->extend; 498696c5ddc4Srjs assert(extend < 4); 498796c5ddc4Srjs if ((extend == 0) && ((lane_dest == 0) || (lane_dest == 1) || (lane_dest == 2) || (lane_dest == 3))) { 498896c5ddc4Srjs unsigned derived_9 = 0; 498996c5ddc4Srjs if (lane_dest == 0) derived_9 = 0; 499096c5ddc4Srjs else if (lane_dest == 1) derived_9 = 1; 499196c5ddc4Srjs else if (lane_dest == 2) derived_9 = 2; 499296c5ddc4Srjs else if (lane_dest == 3) derived_9 = 3; 499396c5ddc4Srjs else unreachable("No pattern match at pos 9"); 499496c5ddc4Srjs 499596c5ddc4Srjs return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 499696c5ddc4Srjs } else if ((extend != 0) && ((lane_dest == 4) || (lane_dest == 5))) { 499796c5ddc4Srjs unsigned derived_9 = 0; 499896c5ddc4Srjs if (extend == 1) derived_9 = 0; 499996c5ddc4Srjs else if (extend == 2) derived_9 = 1; 500096c5ddc4Srjs else unreachable("No pattern match at pos 9"); 500196c5ddc4Srjs 500296c5ddc4Srjs unsigned derived_10 = 0; 500396c5ddc4Srjs if (lane_dest == 4) derived_10 = 0; 500496c5ddc4Srjs else if (lane_dest == 5) derived_10 = 1; 500596c5ddc4Srjs else unreachable("No pattern match at pos 10"); 500696c5ddc4Srjs 500796c5ddc4Srjs return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10); 500896c5ddc4Srjs } else if ((extend != 0) && (lane_dest == 6)) { 500996c5ddc4Srjs unsigned derived_9 = 0; 501096c5ddc4Srjs if (extend == 1) derived_9 = 0; 501196c5ddc4Srjs else if (extend == 2) derived_9 = 1; 501296c5ddc4Srjs else unreachable("No pattern match at pos 9"); 501396c5ddc4Srjs 501496c5ddc4Srjs return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 501596c5ddc4Srjs } else if ((extend != 0) && (lane_dest == 7)) { 501696c5ddc4Srjs unsigned derived_9 = 0; 501796c5ddc4Srjs if (extend == 1) derived_9 = 0; 501896c5ddc4Srjs else if (extend == 2) derived_9 = 1; 501996c5ddc4Srjs else unreachable("No pattern match at pos 9"); 502096c5ddc4Srjs 502196c5ddc4Srjs return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 502296c5ddc4Srjs } else { 502396c5ddc4Srjs unreachable("No matching state found in add_load_i8"); 502496c5ddc4Srjs } 502596c5ddc4Srjs} 502696c5ddc4Srjs 502796c5ddc4Srjsstatic inline unsigned 502896c5ddc4Srjsbi_pack_add_load_i96(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 502996c5ddc4Srjs{ 503096c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 503196c5ddc4Srjs assert(I->seg < 6); 503296c5ddc4Srjs unsigned seg = seg_table[I->seg]; 503396c5ddc4Srjs assert(seg < 8); 503496c5ddc4Srjs return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6); 503596c5ddc4Srjs} 503696c5ddc4Srjs 503796c5ddc4Srjsstatic inline unsigned 503896c5ddc4Srjsbi_pack_add_logb_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 503996c5ddc4Srjs{ 504096c5ddc4Srjs static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 504196c5ddc4Srjs assert(I->src[0].swizzle < 13); 504296c5ddc4Srjs unsigned widen0 = widen0_table[I->src[0].swizzle]; 504396c5ddc4Srjs assert(widen0 < 4); 504496c5ddc4Srjs return 0x3d9a0 | (src0 << 0) | (widen0 << 3); 504596c5ddc4Srjs} 504696c5ddc4Srjs 504796c5ddc4Srjsstatic inline unsigned 504896c5ddc4Srjsbi_pack_add_logb_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 504996c5ddc4Srjs{ 505096c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 505196c5ddc4Srjs assert(I->src[0].swizzle < 13); 505296c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 505396c5ddc4Srjs assert(swz0 < 4); 505496c5ddc4Srjs return 0x3d980 | (src0 << 0) | (swz0 << 3); 505596c5ddc4Srjs} 505696c5ddc4Srjs 505796c5ddc4Srjsstatic inline unsigned 505896c5ddc4Srjsbi_pack_add_mkvec_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 505996c5ddc4Srjs{ 506096c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 506196c5ddc4Srjs assert(I->src[0].swizzle < 13); 506296c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 506396c5ddc4Srjs assert(lane0 < 2); 506496c5ddc4Srjs static uint8_t lane1_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 506596c5ddc4Srjs assert(I->src[1].swizzle < 13); 506696c5ddc4Srjs unsigned lane1 = lane1_table[I->src[1].swizzle]; 506796c5ddc4Srjs assert(lane1 < 2); 506896c5ddc4Srjs return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); 506996c5ddc4Srjs} 507096c5ddc4Srjs 507196c5ddc4Srjsstatic inline unsigned 507296c5ddc4Srjsbi_pack_add_mov_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 507396c5ddc4Srjs{ 507496c5ddc4Srjs 507596c5ddc4Srjs return 0x3d968 | (src0 << 0); 507696c5ddc4Srjs} 507796c5ddc4Srjs 507896c5ddc4Srjsstatic inline unsigned 507996c5ddc4Srjsbi_pack_add_mux_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 508096c5ddc4Srjs{ 508196c5ddc4Srjs unsigned mux = I->mux; 508296c5ddc4Srjs assert(mux < 4); 508396c5ddc4Srjs return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); 508496c5ddc4Srjs} 508596c5ddc4Srjs 508696c5ddc4Srjsstatic inline unsigned 508796c5ddc4Srjsbi_pack_add_mux_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 508896c5ddc4Srjs{ 508996c5ddc4Srjs unsigned mux = I->mux; 509096c5ddc4Srjs assert(mux < 4); 509196c5ddc4Srjs static uint8_t swap2_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 509296c5ddc4Srjs assert(I->src[2].swizzle < 13); 509396c5ddc4Srjs unsigned swap2 = swap2_table[I->src[2].swizzle]; 509496c5ddc4Srjs assert(swap2 < 2); 509596c5ddc4Srjs static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 509696c5ddc4Srjs assert(I->src[1].swizzle < 13); 509796c5ddc4Srjs unsigned swap1 = swap1_table[I->src[1].swizzle]; 509896c5ddc4Srjs assert(swap1 < 2); 509996c5ddc4Srjs static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 510096c5ddc4Srjs assert(I->src[0].swizzle < 13); 510196c5ddc4Srjs unsigned swap0 = swap0_table[I->src[0].swizzle]; 510296c5ddc4Srjs assert(swap0 < 2); 510396c5ddc4Srjs return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13); 510496c5ddc4Srjs} 510596c5ddc4Srjs 510696c5ddc4Srjsstatic inline unsigned 510796c5ddc4Srjsbi_pack_add_mux_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 510896c5ddc4Srjs{ 510996c5ddc4Srjs unsigned mux = I->mux; 511096c5ddc4Srjs assert(mux < 2); 511196c5ddc4Srjs return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); 511296c5ddc4Srjs} 511396c5ddc4Srjs 511496c5ddc4Srjsstatic inline unsigned 511596c5ddc4Srjsbi_pack_add_nop(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 511696c5ddc4Srjs{ 511796c5ddc4Srjs 511896c5ddc4Srjs return 0x3d964; 511996c5ddc4Srjs} 512096c5ddc4Srjs 512196c5ddc4Srjsstatic inline unsigned 512296c5ddc4Srjsbi_pack_add_quiet_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 512396c5ddc4Srjs{ 512496c5ddc4Srjs 512596c5ddc4Srjs return 0x3d970 | (src0 << 0); 512696c5ddc4Srjs} 512796c5ddc4Srjs 512896c5ddc4Srjsstatic inline unsigned 512996c5ddc4Srjsbi_pack_add_quiet_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 513096c5ddc4Srjs{ 513196c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 513296c5ddc4Srjs assert(I->src[0].swizzle < 13); 513396c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 513496c5ddc4Srjs assert(swz0 < 4); 513596c5ddc4Srjs return 0x3d900 | (src0 << 0) | (swz0 << 4); 513696c5ddc4Srjs} 513796c5ddc4Srjs 513896c5ddc4Srjsstatic inline unsigned 513996c5ddc4Srjsbi_pack_add_s16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 514096c5ddc4Srjs{ 514196c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 514296c5ddc4Srjs assert(I->src[0].swizzle < 13); 514396c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 514496c5ddc4Srjs assert(lane0 < 2); 514596c5ddc4Srjs return 0x3cce0 | (src0 << 0) | (lane0 << 4); 514696c5ddc4Srjs} 514796c5ddc4Srjs 514896c5ddc4Srjsstatic inline unsigned 514996c5ddc4Srjsbi_pack_add_s16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 515096c5ddc4Srjs{ 515196c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 515296c5ddc4Srjs assert(I->src[0].swizzle < 13); 515396c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 515496c5ddc4Srjs assert(lane0 < 2); 515596c5ddc4Srjs return 0x3ccc0 | (src0 << 0) | (lane0 << 4); 515696c5ddc4Srjs} 515796c5ddc4Srjs 515896c5ddc4Srjsstatic inline unsigned 515996c5ddc4Srjsbi_pack_add_s32_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 516096c5ddc4Srjs{ 516196c5ddc4Srjs unsigned round = I->round; 516296c5ddc4Srjs assert(round < 8); 516396c5ddc4Srjs if (round != 4) { 516496c5ddc4Srjs unsigned derived_4 = 0; 516596c5ddc4Srjs if (round == 0) derived_4 = 0; 516696c5ddc4Srjs else if (round == 1) derived_4 = 1; 516796c5ddc4Srjs else if (round == 2) derived_4 = 2; 516896c5ddc4Srjs else if (round == 3) derived_4 = 3; 516996c5ddc4Srjs else unreachable("No pattern match at pos 4"); 517096c5ddc4Srjs 517196c5ddc4Srjs return 0x3cbc0 | (src0 << 0) | (derived_4 << 4); 517296c5ddc4Srjs } else if (round == 4) { 517396c5ddc4Srjs return 0x3cd00 | (src0 << 0); 517496c5ddc4Srjs } else { 517596c5ddc4Srjs unreachable("No matching state found in add_s32_to_f32"); 517696c5ddc4Srjs } 517796c5ddc4Srjs} 517896c5ddc4Srjs 517996c5ddc4Srjsstatic inline unsigned 518096c5ddc4Srjsbi_pack_add_s8_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 518196c5ddc4Srjs{ 518296c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 518396c5ddc4Srjs assert(I->src[0].swizzle < 13); 518496c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 518596c5ddc4Srjs assert(lane0 < 4); 518696c5ddc4Srjs return 0x3cb80 | (src0 << 0) | (lane0 << 4); 518796c5ddc4Srjs} 518896c5ddc4Srjs 518996c5ddc4Srjsstatic inline unsigned 519096c5ddc4Srjsbi_pack_add_s8_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 519196c5ddc4Srjs{ 519296c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 519396c5ddc4Srjs assert(I->src[0].swizzle < 13); 519496c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 519596c5ddc4Srjs assert(lane0 < 4); 519696c5ddc4Srjs return 0x3cb40 | (src0 << 0) | (lane0 << 4); 519796c5ddc4Srjs} 519896c5ddc4Srjs 519996c5ddc4Srjsstatic inline unsigned 520096c5ddc4Srjsbi_pack_add_seg_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 520196c5ddc4Srjs{ 520296c5ddc4Srjs static uint8_t seg_table[] = { ~0, 2, 0, ~0, ~0, 7 }; 520396c5ddc4Srjs assert(I->seg < 6); 520496c5ddc4Srjs unsigned seg = seg_table[I->seg]; 520596c5ddc4Srjs assert(seg < 8); 520696c5ddc4Srjs unsigned preserve_null = I->preserve_null; 520796c5ddc4Srjs assert(preserve_null < 2); 520896c5ddc4Srjs return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); 520996c5ddc4Srjs} 521096c5ddc4Srjs 521196c5ddc4Srjsstatic inline unsigned 521296c5ddc4Srjsbi_pack_add_shaddxh_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 521396c5ddc4Srjs{ 521496c5ddc4Srjs 521596c5ddc4Srjs return 0x3f8c0 | (src0 << 0) | (src1 << 3); 521696c5ddc4Srjs} 521796c5ddc4Srjs 521896c5ddc4Srjsstatic inline unsigned 521996c5ddc4Srjsbi_pack_add_shift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 522096c5ddc4Srjs{ 522196c5ddc4Srjs 522296c5ddc4Srjs return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 522396c5ddc4Srjs} 522496c5ddc4Srjs 522596c5ddc4Srjsstatic inline unsigned 522696c5ddc4Srjsbi_pack_add_store_i128(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 522796c5ddc4Srjs{ 522896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 522996c5ddc4Srjs assert(I->seg < 6); 523096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 523196c5ddc4Srjs assert(seg < 8); 523296c5ddc4Srjs return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6); 523396c5ddc4Srjs} 523496c5ddc4Srjs 523596c5ddc4Srjsstatic inline unsigned 523696c5ddc4Srjsbi_pack_add_store_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 523796c5ddc4Srjs{ 523896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 523996c5ddc4Srjs assert(I->seg < 6); 524096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 524196c5ddc4Srjs assert(seg < 8); 524296c5ddc4Srjs return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6); 524396c5ddc4Srjs} 524496c5ddc4Srjs 524596c5ddc4Srjsstatic inline unsigned 524696c5ddc4Srjsbi_pack_add_store_i24(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 524796c5ddc4Srjs{ 524896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 524996c5ddc4Srjs assert(I->seg < 6); 525096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 525196c5ddc4Srjs assert(seg < 8); 525296c5ddc4Srjs return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6); 525396c5ddc4Srjs} 525496c5ddc4Srjs 525596c5ddc4Srjsstatic inline unsigned 525696c5ddc4Srjsbi_pack_add_store_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 525796c5ddc4Srjs{ 525896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 525996c5ddc4Srjs assert(I->seg < 6); 526096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 526196c5ddc4Srjs assert(seg < 8); 526296c5ddc4Srjs return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6); 526396c5ddc4Srjs} 526496c5ddc4Srjs 526596c5ddc4Srjsstatic inline unsigned 526696c5ddc4Srjsbi_pack_add_store_i48(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 526796c5ddc4Srjs{ 526896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 526996c5ddc4Srjs assert(I->seg < 6); 527096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 527196c5ddc4Srjs assert(seg < 8); 527296c5ddc4Srjs return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6); 527396c5ddc4Srjs} 527496c5ddc4Srjs 527596c5ddc4Srjsstatic inline unsigned 527696c5ddc4Srjsbi_pack_add_store_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 527796c5ddc4Srjs{ 527896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 527996c5ddc4Srjs assert(I->seg < 6); 528096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 528196c5ddc4Srjs assert(seg < 8); 528296c5ddc4Srjs return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6); 528396c5ddc4Srjs} 528496c5ddc4Srjs 528596c5ddc4Srjsstatic inline unsigned 528696c5ddc4Srjsbi_pack_add_store_i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 528796c5ddc4Srjs{ 528896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 528996c5ddc4Srjs assert(I->seg < 6); 529096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 529196c5ddc4Srjs assert(seg < 8); 529296c5ddc4Srjs return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6); 529396c5ddc4Srjs} 529496c5ddc4Srjs 529596c5ddc4Srjsstatic inline unsigned 529696c5ddc4Srjsbi_pack_add_store_i96(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 529796c5ddc4Srjs{ 529896c5ddc4Srjs static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 529996c5ddc4Srjs assert(I->seg < 6); 530096c5ddc4Srjs unsigned seg = seg_table[I->seg]; 530196c5ddc4Srjs assert(seg < 8); 530296c5ddc4Srjs return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6); 530396c5ddc4Srjs} 530496c5ddc4Srjs 530596c5ddc4Srjsstatic inline unsigned 530696c5ddc4Srjsbi_pack_add_st_cvt(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 530796c5ddc4Srjs{ 530896c5ddc4Srjs assert((1 << src2) & 0xf7); 530996c5ddc4Srjs unsigned vecsize = I->vecsize; 531096c5ddc4Srjs assert(vecsize < 4); 531196c5ddc4Srjs return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 531296c5ddc4Srjs} 531396c5ddc4Srjs 531496c5ddc4Srjsstatic inline unsigned 531596c5ddc4Srjsbi_pack_add_st_tile(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 531696c5ddc4Srjs{ 531796c5ddc4Srjs assert((1 << src2) & 0xf7); 531896c5ddc4Srjs unsigned vecsize = I->vecsize; 531996c5ddc4Srjs assert(vecsize < 4); 532096c5ddc4Srjs return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 532196c5ddc4Srjs} 532296c5ddc4Srjs 532396c5ddc4Srjsstatic inline unsigned 532496c5ddc4Srjsbi_pack_add_swz_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 532596c5ddc4Srjs{ 532696c5ddc4Srjs static uint8_t swz0_table[] = { 0, ~0, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 532796c5ddc4Srjs assert(I->src[0].swizzle < 13); 532896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 532996c5ddc4Srjs assert(swz0 < 4); 533096c5ddc4Srjs return 0x3d948 | (src0 << 0) | (swz0 << 4); 533196c5ddc4Srjs} 533296c5ddc4Srjs 533396c5ddc4Srjsstatic inline unsigned 533496c5ddc4Srjsbi_pack_add_swz_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 533596c5ddc4Srjs{ 533696c5ddc4Srjs static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, 6, 7, ~0 }; 533796c5ddc4Srjs assert(I->src[0].swizzle < 13); 533896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 533996c5ddc4Srjs assert(swz0 < 8); 534096c5ddc4Srjs return 0x3df40 | (src0 << 0) | (swz0 << 3); 534196c5ddc4Srjs} 534296c5ddc4Srjs 534396c5ddc4Srjsstatic inline unsigned 534496c5ddc4Srjsbi_pack_add_texc(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 534596c5ddc4Srjs{ 534696c5ddc4Srjs assert((1 << src2) & 0xf7); 534796c5ddc4Srjs unsigned skip = I->skip; 534896c5ddc4Srjs assert(skip < 2); 534996c5ddc4Srjs return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9); 535096c5ddc4Srjs} 535196c5ddc4Srjs 535296c5ddc4Srjsstatic inline unsigned 535396c5ddc4Srjsbi_pack_add_texs_2d_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 535496c5ddc4Srjs{ 535596c5ddc4Srjs unsigned skip = I->skip; 535696c5ddc4Srjs assert(skip < 2); 535796c5ddc4Srjs unsigned lod_mode = I->lod_mode; 535896c5ddc4Srjs assert(lod_mode < 2); 535996c5ddc4Srjs unsigned texture_index = I->texture_index; 536096c5ddc4Srjs assert(texture_index < 0x8); 536196c5ddc4Srjs unsigned sampler_index = I->sampler_index; 536296c5ddc4Srjs assert(sampler_index < 0x8); 536396c5ddc4Srjs return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); 536496c5ddc4Srjs} 536596c5ddc4Srjs 536696c5ddc4Srjsstatic inline unsigned 536796c5ddc4Srjsbi_pack_add_texs_2d_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 536896c5ddc4Srjs{ 536996c5ddc4Srjs unsigned skip = I->skip; 537096c5ddc4Srjs assert(skip < 2); 537196c5ddc4Srjs unsigned lod_mode = I->lod_mode; 537296c5ddc4Srjs assert(lod_mode < 2); 537396c5ddc4Srjs unsigned texture_index = I->texture_index; 537496c5ddc4Srjs assert(texture_index < 0x8); 537596c5ddc4Srjs unsigned sampler_index = I->sampler_index; 537696c5ddc4Srjs assert(sampler_index < 0x8); 537796c5ddc4Srjs return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); 537896c5ddc4Srjs} 537996c5ddc4Srjs 538096c5ddc4Srjsstatic inline unsigned 538196c5ddc4Srjsbi_pack_add_texs_cube_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 538296c5ddc4Srjs{ 538396c5ddc4Srjs unsigned skip = I->skip; 538496c5ddc4Srjs assert(skip < 2); 538596c5ddc4Srjs unsigned sampler_index = I->sampler_index; 538696c5ddc4Srjs assert(sampler_index < 0x4); 538796c5ddc4Srjs unsigned texture_index = I->texture_index; 538896c5ddc4Srjs assert(texture_index < 0x4); 538996c5ddc4Srjs return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); 539096c5ddc4Srjs} 539196c5ddc4Srjs 539296c5ddc4Srjsstatic inline unsigned 539396c5ddc4Srjsbi_pack_add_texs_cube_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 539496c5ddc4Srjs{ 539596c5ddc4Srjs unsigned skip = I->skip; 539696c5ddc4Srjs assert(skip < 2); 539796c5ddc4Srjs unsigned sampler_index = I->sampler_index; 539896c5ddc4Srjs assert(sampler_index < 0x4); 539996c5ddc4Srjs unsigned texture_index = I->texture_index; 540096c5ddc4Srjs assert(texture_index < 0x4); 540196c5ddc4Srjs return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); 540296c5ddc4Srjs} 540396c5ddc4Srjs 540496c5ddc4Srjsstatic inline unsigned 540596c5ddc4Srjsbi_pack_add_u16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 540696c5ddc4Srjs{ 540796c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 540896c5ddc4Srjs assert(I->src[0].swizzle < 13); 540996c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 541096c5ddc4Srjs assert(lane0 < 2); 541196c5ddc4Srjs return 0x3cce8 | (src0 << 0) | (lane0 << 4); 541296c5ddc4Srjs} 541396c5ddc4Srjs 541496c5ddc4Srjsstatic inline unsigned 541596c5ddc4Srjsbi_pack_add_u16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 541696c5ddc4Srjs{ 541796c5ddc4Srjs static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 541896c5ddc4Srjs assert(I->src[0].swizzle < 13); 541996c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 542096c5ddc4Srjs assert(lane0 < 2); 542196c5ddc4Srjs return 0x3ccc8 | (src0 << 0) | (lane0 << 4); 542296c5ddc4Srjs} 542396c5ddc4Srjs 542496c5ddc4Srjsstatic inline unsigned 542596c5ddc4Srjsbi_pack_add_u32_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 542696c5ddc4Srjs{ 542796c5ddc4Srjs unsigned round = I->round; 542896c5ddc4Srjs assert(round < 8); 542996c5ddc4Srjs if (round != 4) { 543096c5ddc4Srjs unsigned derived_4 = 0; 543196c5ddc4Srjs if (round == 0) derived_4 = 0; 543296c5ddc4Srjs else if (round == 1) derived_4 = 1; 543396c5ddc4Srjs else if (round == 2) derived_4 = 2; 543496c5ddc4Srjs else if (round == 3) derived_4 = 3; 543596c5ddc4Srjs else unreachable("No pattern match at pos 4"); 543696c5ddc4Srjs 543796c5ddc4Srjs return 0x3cbc8 | (src0 << 0) | (derived_4 << 4); 543896c5ddc4Srjs } else if (round == 4) { 543996c5ddc4Srjs return 0x3cd08 | (src0 << 0); 544096c5ddc4Srjs } else { 544196c5ddc4Srjs unreachable("No matching state found in add_u32_to_f32"); 544296c5ddc4Srjs } 544396c5ddc4Srjs} 544496c5ddc4Srjs 544596c5ddc4Srjsstatic inline unsigned 544696c5ddc4Srjsbi_pack_add_u8_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 544796c5ddc4Srjs{ 544896c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 544996c5ddc4Srjs assert(I->src[0].swizzle < 13); 545096c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 545196c5ddc4Srjs assert(lane0 < 4); 545296c5ddc4Srjs return 0x3cb88 | (src0 << 0) | (lane0 << 4); 545396c5ddc4Srjs} 545496c5ddc4Srjs 545596c5ddc4Srjsstatic inline unsigned 545696c5ddc4Srjsbi_pack_add_u8_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 545796c5ddc4Srjs{ 545896c5ddc4Srjs static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 545996c5ddc4Srjs assert(I->src[0].swizzle < 13); 546096c5ddc4Srjs unsigned lane0 = lane0_table[I->src[0].swizzle]; 546196c5ddc4Srjs assert(lane0 < 4); 546296c5ddc4Srjs return 0x3cb48 | (src0 << 0) | (lane0 << 4); 546396c5ddc4Srjs} 546496c5ddc4Srjs 546596c5ddc4Srjsstatic inline unsigned 546696c5ddc4Srjsbi_pack_add_v2f16_to_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 546796c5ddc4Srjs{ 546896c5ddc4Srjs unsigned round = I->round; 546996c5ddc4Srjs assert(round < 8); 547096c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 547196c5ddc4Srjs assert(I->src[0].swizzle < 13); 547296c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 547396c5ddc4Srjs assert(swz0 < 4); 547496c5ddc4Srjs if (round != 4) { 547596c5ddc4Srjs unsigned derived_4 = 0; 547696c5ddc4Srjs if (round == 0) derived_4 = 0; 547796c5ddc4Srjs else if (round == 1) derived_4 = 1; 547896c5ddc4Srjs else if (round == 2) derived_4 = 2; 547996c5ddc4Srjs else if (round == 3) derived_4 = 3; 548096c5ddc4Srjs else unreachable("No pattern match at pos 4"); 548196c5ddc4Srjs 548296c5ddc4Srjs return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 548396c5ddc4Srjs } else if (round == 4) { 548496c5ddc4Srjs return 0x3ca80 | (src0 << 0) | (swz0 << 4); 548596c5ddc4Srjs } else { 548696c5ddc4Srjs unreachable("No matching state found in add_v2f16_to_v2s16"); 548796c5ddc4Srjs } 548896c5ddc4Srjs} 548996c5ddc4Srjs 549096c5ddc4Srjsstatic inline unsigned 549196c5ddc4Srjsbi_pack_add_v2f16_to_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 549296c5ddc4Srjs{ 549396c5ddc4Srjs unsigned round = I->round; 549496c5ddc4Srjs assert(round < 8); 549596c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 549696c5ddc4Srjs assert(I->src[0].swizzle < 13); 549796c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 549896c5ddc4Srjs assert(swz0 < 4); 549996c5ddc4Srjs if (round != 4) { 550096c5ddc4Srjs unsigned derived_4 = 0; 550196c5ddc4Srjs if (round == 0) derived_4 = 0; 550296c5ddc4Srjs else if (round == 1) derived_4 = 1; 550396c5ddc4Srjs else if (round == 2) derived_4 = 2; 550496c5ddc4Srjs else if (round == 3) derived_4 = 3; 550596c5ddc4Srjs else unreachable("No pattern match at pos 4"); 550696c5ddc4Srjs 550796c5ddc4Srjs return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 550896c5ddc4Srjs } else if (round == 4) { 550996c5ddc4Srjs return 0x3ca88 | (src0 << 0) | (swz0 << 4); 551096c5ddc4Srjs } else { 551196c5ddc4Srjs unreachable("No matching state found in add_v2f16_to_v2u16"); 551296c5ddc4Srjs } 551396c5ddc4Srjs} 551496c5ddc4Srjs 551596c5ddc4Srjsstatic inline unsigned 551696c5ddc4Srjsbi_pack_add_v2f32_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 551796c5ddc4Srjs{ 551896c5ddc4Srjs unsigned abs0 = I->src[0].abs; 551996c5ddc4Srjs assert(abs0 < 2); 552096c5ddc4Srjs unsigned abs1 = I->src[1].abs; 552196c5ddc4Srjs assert(abs1 < 2); 552296c5ddc4Srjs unsigned neg0 = I->src[0].neg; 552396c5ddc4Srjs assert(neg0 < 2); 552496c5ddc4Srjs unsigned neg1 = I->src[1].neg; 552596c5ddc4Srjs assert(neg1 < 2); 552696c5ddc4Srjs unsigned clamp = I->clamp; 552796c5ddc4Srjs assert(clamp < 4); 552896c5ddc4Srjs unsigned round = I->round; 552996c5ddc4Srjs assert(round < 8); 553096c5ddc4Srjs unsigned derived_6 = 0; 553196c5ddc4Srjs if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; 553296c5ddc4Srjs else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; 553396c5ddc4Srjs else unreachable("No pattern match at pos 6"); 553496c5ddc4Srjs 553596c5ddc4Srjs unsigned derived_7 = 0; 553696c5ddc4Srjs if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; 553796c5ddc4Srjs else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; 553896c5ddc4Srjs else unreachable("No pattern match at pos 7"); 553996c5ddc4Srjs 554096c5ddc4Srjs return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); 554196c5ddc4Srjs} 554296c5ddc4Srjs 554396c5ddc4Srjsstatic inline unsigned 554496c5ddc4Srjsbi_pack_add_v2s16_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 554596c5ddc4Srjs{ 554696c5ddc4Srjs unsigned round = I->round; 554796c5ddc4Srjs assert(round < 8); 554896c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 554996c5ddc4Srjs assert(I->src[0].swizzle < 13); 555096c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 555196c5ddc4Srjs assert(swz0 < 4); 555296c5ddc4Srjs if (round != 4) { 555396c5ddc4Srjs unsigned derived_4 = 0; 555496c5ddc4Srjs if (round == 0) derived_4 = 0; 555596c5ddc4Srjs else if (round == 1) derived_4 = 1; 555696c5ddc4Srjs else if (round == 2) derived_4 = 2; 555796c5ddc4Srjs else if (round == 3) derived_4 = 3; 555896c5ddc4Srjs else unreachable("No pattern match at pos 4"); 555996c5ddc4Srjs 556096c5ddc4Srjs return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 556196c5ddc4Srjs } else if (round == 4) { 556296c5ddc4Srjs return 0x3cb00 | (src0 << 0) | (swz0 << 4); 556396c5ddc4Srjs } else { 556496c5ddc4Srjs unreachable("No matching state found in add_v2s16_to_v2f16"); 556596c5ddc4Srjs } 556696c5ddc4Srjs} 556796c5ddc4Srjs 556896c5ddc4Srjsstatic inline unsigned 556996c5ddc4Srjsbi_pack_add_v2s8_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 557096c5ddc4Srjs{ 557196c5ddc4Srjs static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 557296c5ddc4Srjs assert(I->src[0].swizzle < 13); 557396c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 557496c5ddc4Srjs assert(swz0 < 16); 557596c5ddc4Srjs return 0x3c800 | (src0 << 0) | (swz0 << 4); 557696c5ddc4Srjs} 557796c5ddc4Srjs 557896c5ddc4Srjsstatic inline unsigned 557996c5ddc4Srjsbi_pack_add_v2s8_to_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 558096c5ddc4Srjs{ 558196c5ddc4Srjs static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 558296c5ddc4Srjs assert(I->src[0].swizzle < 13); 558396c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 558496c5ddc4Srjs assert(swz0 < 16); 558596c5ddc4Srjs return 0x3c700 | (src0 << 0) | (swz0 << 4); 558696c5ddc4Srjs} 558796c5ddc4Srjs 558896c5ddc4Srjsstatic inline unsigned 558996c5ddc4Srjsbi_pack_add_v2u16_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 559096c5ddc4Srjs{ 559196c5ddc4Srjs unsigned round = I->round; 559296c5ddc4Srjs assert(round < 8); 559396c5ddc4Srjs static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 559496c5ddc4Srjs assert(I->src[0].swizzle < 13); 559596c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 559696c5ddc4Srjs assert(swz0 < 4); 559796c5ddc4Srjs if (round != 4) { 559896c5ddc4Srjs unsigned derived_4 = 0; 559996c5ddc4Srjs if (round == 0) derived_4 = 0; 560096c5ddc4Srjs else if (round == 1) derived_4 = 1; 560196c5ddc4Srjs else if (round == 2) derived_4 = 2; 560296c5ddc4Srjs else if (round == 3) derived_4 = 3; 560396c5ddc4Srjs else unreachable("No pattern match at pos 4"); 560496c5ddc4Srjs 560596c5ddc4Srjs return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 560696c5ddc4Srjs } else if (round == 4) { 560796c5ddc4Srjs return 0x3cb08 | (src0 << 0) | (swz0 << 4); 560896c5ddc4Srjs } else { 560996c5ddc4Srjs unreachable("No matching state found in add_v2u16_to_v2f16"); 561096c5ddc4Srjs } 561196c5ddc4Srjs} 561296c5ddc4Srjs 561396c5ddc4Srjsstatic inline unsigned 561496c5ddc4Srjsbi_pack_add_v2u8_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 561596c5ddc4Srjs{ 561696c5ddc4Srjs static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 561796c5ddc4Srjs assert(I->src[0].swizzle < 13); 561896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 561996c5ddc4Srjs assert(swz0 < 16); 562096c5ddc4Srjs return 0x3c808 | (src0 << 0) | (swz0 << 4); 562196c5ddc4Srjs} 562296c5ddc4Srjs 562396c5ddc4Srjsstatic inline unsigned 562496c5ddc4Srjsbi_pack_add_v2u8_to_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 562596c5ddc4Srjs{ 562696c5ddc4Srjs static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 562796c5ddc4Srjs assert(I->src[0].swizzle < 13); 562896c5ddc4Srjs unsigned swz0 = swz0_table[I->src[0].swizzle]; 562996c5ddc4Srjs assert(swz0 < 16); 563096c5ddc4Srjs return 0x3c708 | (src0 << 0) | (swz0 << 4); 563196c5ddc4Srjs} 563296c5ddc4Srjs 563396c5ddc4Srjsstatic inline unsigned 563496c5ddc4Srjsbi_pack_add_var_tex_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 563596c5ddc4Srjs{ 563696c5ddc4Srjs unsigned update = I->update; 563796c5ddc4Srjs assert(update < 2); 563896c5ddc4Srjs unsigned skip = I->skip; 563996c5ddc4Srjs assert(skip < 2); 564096c5ddc4Srjs unsigned lod_mode = I->lod_mode; 564196c5ddc4Srjs assert(lod_mode < 2); 564296c5ddc4Srjs static uint8_t sample_table[] = { 0, ~0, ~0, ~0, 1, ~0 }; 564396c5ddc4Srjs assert(I->sample < 6); 564496c5ddc4Srjs unsigned sample = sample_table[I->sample]; 564596c5ddc4Srjs assert(sample < 2); 564696c5ddc4Srjs unsigned varying_index = I->varying_index; 564796c5ddc4Srjs assert(varying_index < 0x8); 564896c5ddc4Srjs unsigned texture_index = I->texture_index; 564996c5ddc4Srjs assert(texture_index < 0x4); 565096c5ddc4Srjs unsigned derived_5 = 0; 565196c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_5 = 0; 565296c5ddc4Srjs else if ((sample == 1) && (update == 1)) derived_5 = 1; 565396c5ddc4Srjs else unreachable("No pattern match at pos 5"); 565496c5ddc4Srjs 565596c5ddc4Srjs return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); 565696c5ddc4Srjs} 565796c5ddc4Srjs 565896c5ddc4Srjsstatic inline unsigned 565996c5ddc4Srjsbi_pack_add_var_tex_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 566096c5ddc4Srjs{ 566196c5ddc4Srjs unsigned update = I->update; 566296c5ddc4Srjs assert(update < 2); 566396c5ddc4Srjs unsigned skip = I->skip; 566496c5ddc4Srjs assert(skip < 2); 566596c5ddc4Srjs unsigned lod_mode = I->lod_mode; 566696c5ddc4Srjs assert(lod_mode < 2); 566796c5ddc4Srjs static uint8_t sample_table[] = { 0, ~0, ~0, ~0, 1, ~0 }; 566896c5ddc4Srjs assert(I->sample < 6); 566996c5ddc4Srjs unsigned sample = sample_table[I->sample]; 567096c5ddc4Srjs assert(sample < 2); 567196c5ddc4Srjs unsigned varying_index = I->varying_index; 567296c5ddc4Srjs assert(varying_index < 0x8); 567396c5ddc4Srjs unsigned texture_index = I->texture_index; 567496c5ddc4Srjs assert(texture_index < 0x4); 567596c5ddc4Srjs unsigned derived_5 = 0; 567696c5ddc4Srjs if ((sample == 0) && (update == 0)) derived_5 = 0; 567796c5ddc4Srjs else if ((sample == 1) && (update == 1)) derived_5 = 1; 567896c5ddc4Srjs else unreachable("No pattern match at pos 5"); 567996c5ddc4Srjs 568096c5ddc4Srjs return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); 568196c5ddc4Srjs} 568296c5ddc4Srjs 568396c5ddc4Srjsstatic inline unsigned 568496c5ddc4Srjsbi_pack_add_vn_asst2_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 568596c5ddc4Srjs{ 568696c5ddc4Srjs unsigned scale = I->scale; 568796c5ddc4Srjs assert(scale < 2); 568896c5ddc4Srjs unsigned neg0 = I->src[0].neg; 568996c5ddc4Srjs assert(neg0 < 2); 569096c5ddc4Srjs if (scale == 0) { 569196c5ddc4Srjs return 0x3df80 | (src0 << 0) | (neg0 << 3); 569296c5ddc4Srjs } else if (scale == 1) { 569396c5ddc4Srjs return 0x3de80 | (src0 << 0) | (neg0 << 4); 569496c5ddc4Srjs } else { 569596c5ddc4Srjs unreachable("No matching state found in add_vn_asst2_f32"); 569696c5ddc4Srjs } 569796c5ddc4Srjs} 569896c5ddc4Srjs 569996c5ddc4Srjsstatic inline unsigned 570096c5ddc4Srjsbi_pack_add_vn_asst2_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 570196c5ddc4Srjs{ 570296c5ddc4Srjs unsigned neg0 = I->src[0].neg; 570396c5ddc4Srjs assert(neg0 < 2); 570496c5ddc4Srjs return 0x3dfa0 | (src0 << 0) | (neg0 << 3); 570596c5ddc4Srjs} 570696c5ddc4Srjs 570796c5ddc4Srjsstatic inline unsigned 570896c5ddc4Srjsbi_pack_add_wmask(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 570996c5ddc4Srjs{ 571096c5ddc4Srjs unsigned subgroup = I->subgroup; 571196c5ddc4Srjs assert(subgroup < 4); 571296c5ddc4Srjs unsigned fill = I->fill; 571396c5ddc4Srjs assert(fill < 0x2); 571496c5ddc4Srjs return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3); 571596c5ddc4Srjs} 571696c5ddc4Srjs 571796c5ddc4Srjsstatic inline unsigned 571896c5ddc4Srjsbi_pack_add_zs_emit(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 571996c5ddc4Srjs{ 572096c5ddc4Srjs unsigned stencil = I->stencil; 572196c5ddc4Srjs assert(stencil < 2); 572296c5ddc4Srjs unsigned z = I->z; 572396c5ddc4Srjs assert(z < 2); 572496c5ddc4Srjs unsigned derived_9 = 0; 572596c5ddc4Srjs if ((stencil == 1) && (z == 0)) derived_9 = 1; 572696c5ddc4Srjs else if ((stencil == 0) && (z == 1)) derived_9 = 2; 572796c5ddc4Srjs else if ((stencil == 1) && (z == 1)) derived_9 = 3; 572896c5ddc4Srjs else unreachable("No pattern match at pos 9"); 572996c5ddc4Srjs 573096c5ddc4Srjs return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 573196c5ddc4Srjs} 573296c5ddc4Srjs 573396c5ddc4Srjsunsigned 573496c5ddc4Srjsbi_pack_fma(bi_instr *I, 573596c5ddc4Srjs enum bifrost_packed_src src0, 573696c5ddc4Srjs enum bifrost_packed_src src1, 573796c5ddc4Srjs enum bifrost_packed_src src2, 573896c5ddc4Srjs enum bifrost_packed_src src3) 573996c5ddc4Srjs{ 574096c5ddc4Srjs if (!I) 574196c5ddc4Srjs return bi_pack_fma_nop(I, src0, src1, src2, src3); 574296c5ddc4Srjs 574396c5ddc4Srjs assert((1 << src0) & 0xfb); 574496c5ddc4Srjs assert((1 << src1) & 0xfb); 574596c5ddc4Srjs 574696c5ddc4Srjs switch (I->op) { 574796c5ddc4Srjs case BI_OPCODE_ARSHIFT_I32: 574896c5ddc4Srjs return bi_pack_fma_arshift_i32(I, src0, src1, src2, src3); 574996c5ddc4Srjs case BI_OPCODE_ARSHIFT_V2I16: 575096c5ddc4Srjs return bi_pack_fma_arshift_v2i16(I, src0, src1, src2, src3); 575196c5ddc4Srjs case BI_OPCODE_ARSHIFT_V4I8: 575296c5ddc4Srjs return bi_pack_fma_arshift_v4i8(I, src0, src1, src2, src3); 575396c5ddc4Srjs case BI_OPCODE_ARSHIFT_DOUBLE_I32: 575496c5ddc4Srjs return bi_pack_fma_arshift_double_i32(I, src0, src1, src2, src3); 575596c5ddc4Srjs case BI_OPCODE_ATOM_C_I32: 575696c5ddc4Srjs return bi_pack_fma_atom_c_i32(I, src0, src1, src2, src3); 575796c5ddc4Srjs case BI_OPCODE_ATOM_C_I64: 575896c5ddc4Srjs return bi_pack_fma_atom_c_i64(I, src0, src1, src2, src3); 575996c5ddc4Srjs case BI_OPCODE_ATOM_C1_I32: 576096c5ddc4Srjs return bi_pack_fma_atom_c1_i32(I, src0, src1, src2, src3); 576196c5ddc4Srjs case BI_OPCODE_ATOM_C1_I64: 576296c5ddc4Srjs return bi_pack_fma_atom_c1_i64(I, src0, src1, src2, src3); 576396c5ddc4Srjs case BI_OPCODE_ATOM_C1_RETURN_I32: 576496c5ddc4Srjs return bi_pack_fma_atom_c1_return_i32(I, src0, src1, src2, src3); 576596c5ddc4Srjs case BI_OPCODE_ATOM_C1_RETURN_I64: 576696c5ddc4Srjs return bi_pack_fma_atom_c1_return_i64(I, src0, src1, src2, src3); 576796c5ddc4Srjs case BI_OPCODE_ATOM_C_RETURN_I32: 576896c5ddc4Srjs return bi_pack_fma_atom_c_return_i32(I, src0, src1, src2, src3); 576996c5ddc4Srjs case BI_OPCODE_ATOM_C_RETURN_I64: 577096c5ddc4Srjs return bi_pack_fma_atom_c_return_i64(I, src0, src1, src2, src3); 577196c5ddc4Srjs case BI_OPCODE_ATOM_POST_I32: 577296c5ddc4Srjs return bi_pack_fma_atom_post_i32(I, src0, src1, src2, src3); 577396c5ddc4Srjs case BI_OPCODE_ATOM_POST_I64: 577496c5ddc4Srjs return bi_pack_fma_atom_post_i64(I, src0, src1, src2, src3); 577596c5ddc4Srjs case BI_OPCODE_ATOM_PRE_I64: 577696c5ddc4Srjs return bi_pack_fma_atom_pre_i64(I, src0, src1, src2, src3); 577796c5ddc4Srjs case BI_OPCODE_BITREV_I32: 577896c5ddc4Srjs return bi_pack_fma_bitrev_i32(I, src0, src1, src2, src3); 577996c5ddc4Srjs case BI_OPCODE_CLZ_U32: 578096c5ddc4Srjs return bi_pack_fma_clz_u32(I, src0, src1, src2, src3); 578196c5ddc4Srjs case BI_OPCODE_CLZ_V2U16: 578296c5ddc4Srjs return bi_pack_fma_clz_v2u16(I, src0, src1, src2, src3); 578396c5ddc4Srjs case BI_OPCODE_CLZ_V4U8: 578496c5ddc4Srjs return bi_pack_fma_clz_v4u8(I, src0, src1, src2, src3); 578596c5ddc4Srjs case BI_OPCODE_CSEL_F32: 578696c5ddc4Srjs return bi_pack_fma_csel_f32(I, src0, src1, src2, src3); 578796c5ddc4Srjs case BI_OPCODE_CSEL_I32: 578896c5ddc4Srjs return bi_pack_fma_csel_i32(I, src0, src1, src2, src3); 578996c5ddc4Srjs case BI_OPCODE_CSEL_S32: 579096c5ddc4Srjs return bi_pack_fma_csel_s32(I, src0, src1, src2, src3); 579196c5ddc4Srjs case BI_OPCODE_CSEL_U32: 579296c5ddc4Srjs return bi_pack_fma_csel_u32(I, src0, src1, src2, src3); 579396c5ddc4Srjs case BI_OPCODE_CSEL_V2F16: 579496c5ddc4Srjs return bi_pack_fma_csel_v2f16(I, src0, src1, src2, src3); 579596c5ddc4Srjs case BI_OPCODE_CSEL_V2I16: 579696c5ddc4Srjs return bi_pack_fma_csel_v2i16(I, src0, src1, src2, src3); 579796c5ddc4Srjs case BI_OPCODE_CSEL_V2S16: 579896c5ddc4Srjs return bi_pack_fma_csel_v2s16(I, src0, src1, src2, src3); 579996c5ddc4Srjs case BI_OPCODE_CSEL_V2U16: 580096c5ddc4Srjs return bi_pack_fma_csel_v2u16(I, src0, src1, src2, src3); 580196c5ddc4Srjs case BI_OPCODE_CUBEFACE1: 580296c5ddc4Srjs return bi_pack_fma_cubeface1(I, src0, src1, src2, src3); 580396c5ddc4Srjs case BI_OPCODE_DTSEL_IMM: 580496c5ddc4Srjs return bi_pack_fma_dtsel_imm(I, src0, src1, src2, src3); 580596c5ddc4Srjs case BI_OPCODE_F16_TO_F32: 580696c5ddc4Srjs return bi_pack_fma_f16_to_f32(I, src0, src1, src2, src3); 580796c5ddc4Srjs case BI_OPCODE_FADD_F32: 580896c5ddc4Srjs return bi_pack_fma_fadd_f32(I, src0, src1, src2, src3); 580996c5ddc4Srjs case BI_OPCODE_FADD_V2F16: 581096c5ddc4Srjs return bi_pack_fma_fadd_v2f16(I, src0, src1, src2, src3); 581196c5ddc4Srjs case BI_OPCODE_FADD_LSCALE_F32: 581296c5ddc4Srjs return bi_pack_fma_fadd_lscale_f32(I, src0, src1, src2, src3); 581396c5ddc4Srjs case BI_OPCODE_FCMP_F32: 581496c5ddc4Srjs return bi_pack_fma_fcmp_f32(I, src0, src1, src2, src3); 581596c5ddc4Srjs case BI_OPCODE_FCMP_V2F16: 581696c5ddc4Srjs return bi_pack_fma_fcmp_v2f16(I, src0, src1, src2, src3); 581796c5ddc4Srjs case BI_OPCODE_FLSHIFT_DOUBLE_I32: 581896c5ddc4Srjs return bi_pack_fma_flshift_double_i32(I, src0, src1, src2, src3); 581996c5ddc4Srjs case BI_OPCODE_FMA_F32: 582096c5ddc4Srjs return bi_pack_fma_fma_f32(I, src0, src1, src2, src3); 582196c5ddc4Srjs case BI_OPCODE_FMA_V2F16: 582296c5ddc4Srjs return bi_pack_fma_fma_v2f16(I, src0, src1, src2, src3); 582396c5ddc4Srjs case BI_OPCODE_FMA_RSCALE_F32: 582496c5ddc4Srjs return bi_pack_fma_fma_rscale_f32(I, src0, src1, src2, src3); 582596c5ddc4Srjs case BI_OPCODE_FMA_RSCALE_V2F16: 582696c5ddc4Srjs return bi_pack_fma_fma_rscale_v2f16(I, src0, src1, src2, src3); 582796c5ddc4Srjs case BI_OPCODE_FMUL_CSLICE: 582896c5ddc4Srjs return bi_pack_fma_fmul_cslice(I, src0, src1, src2, src3); 582996c5ddc4Srjs case BI_OPCODE_FMUL_SLICE_F32: 583096c5ddc4Srjs return bi_pack_fma_fmul_slice_f32(I, src0, src1, src2, src3); 583196c5ddc4Srjs case BI_OPCODE_FREXPE_F32: 583296c5ddc4Srjs return bi_pack_fma_frexpe_f32(I, src0, src1, src2, src3); 583396c5ddc4Srjs case BI_OPCODE_FREXPE_V2F16: 583496c5ddc4Srjs return bi_pack_fma_frexpe_v2f16(I, src0, src1, src2, src3); 583596c5ddc4Srjs case BI_OPCODE_FREXPM_F32: 583696c5ddc4Srjs return bi_pack_fma_frexpm_f32(I, src0, src1, src2, src3); 583796c5ddc4Srjs case BI_OPCODE_FREXPM_V2F16: 583896c5ddc4Srjs return bi_pack_fma_frexpm_v2f16(I, src0, src1, src2, src3); 583996c5ddc4Srjs case BI_OPCODE_FROUND_F32: 584096c5ddc4Srjs return bi_pack_fma_fround_f32(I, src0, src1, src2, src3); 584196c5ddc4Srjs case BI_OPCODE_FROUND_V2F16: 584296c5ddc4Srjs return bi_pack_fma_fround_v2f16(I, src0, src1, src2, src3); 584396c5ddc4Srjs case BI_OPCODE_FRSHIFT_DOUBLE_I32: 584496c5ddc4Srjs return bi_pack_fma_frshift_double_i32(I, src0, src1, src2, src3); 584596c5ddc4Srjs case BI_OPCODE_IADDC_I32: 584696c5ddc4Srjs return bi_pack_fma_iaddc_i32(I, src0, src1, src2, src3); 584796c5ddc4Srjs case BI_OPCODE_IDP_V4I8: 584896c5ddc4Srjs return bi_pack_fma_idp_v4i8(I, src0, src1, src2, src3); 584996c5ddc4Srjs case BI_OPCODE_IMUL_I32: 585096c5ddc4Srjs return bi_pack_fma_imul_i32(I, src0, src1, src2, src3); 585196c5ddc4Srjs case BI_OPCODE_IMUL_V2I16: 585296c5ddc4Srjs return bi_pack_fma_imul_v2i16(I, src0, src1, src2, src3); 585396c5ddc4Srjs case BI_OPCODE_IMUL_V4I8: 585496c5ddc4Srjs return bi_pack_fma_imul_v4i8(I, src0, src1, src2, src3); 585596c5ddc4Srjs case BI_OPCODE_IMULD: 585696c5ddc4Srjs return bi_pack_fma_imuld(I, src0, src1, src2, src3); 585796c5ddc4Srjs case BI_OPCODE_ISUBB_I32: 585896c5ddc4Srjs return bi_pack_fma_isubb_i32(I, src0, src1, src2, src3); 585996c5ddc4Srjs case BI_OPCODE_JUMP_EX: 586096c5ddc4Srjs return bi_pack_fma_jump_ex(I, src0, src1, src2, src3); 586196c5ddc4Srjs case BI_OPCODE_LROT_DOUBLE_I32: 586296c5ddc4Srjs return bi_pack_fma_lrot_double_i32(I, src0, src1, src2, src3); 586396c5ddc4Srjs case BI_OPCODE_LSHIFT_AND_I32: 586496c5ddc4Srjs return bi_pack_fma_lshift_and_i32(I, src0, src1, src2, src3); 586596c5ddc4Srjs case BI_OPCODE_LSHIFT_AND_V2I16: 586696c5ddc4Srjs return bi_pack_fma_lshift_and_v2i16(I, src0, src1, src2, src3); 586796c5ddc4Srjs case BI_OPCODE_LSHIFT_AND_V4I8: 586896c5ddc4Srjs return bi_pack_fma_lshift_and_v4i8(I, src0, src1, src2, src3); 586996c5ddc4Srjs case BI_OPCODE_LSHIFT_DOUBLE_I32: 587096c5ddc4Srjs return bi_pack_fma_lshift_double_i32(I, src0, src1, src2, src3); 587196c5ddc4Srjs case BI_OPCODE_LSHIFT_OR_I32: 587296c5ddc4Srjs return bi_pack_fma_lshift_or_i32(I, src0, src1, src2, src3); 587396c5ddc4Srjs case BI_OPCODE_LSHIFT_OR_V2I16: 587496c5ddc4Srjs return bi_pack_fma_lshift_or_v2i16(I, src0, src1, src2, src3); 587596c5ddc4Srjs case BI_OPCODE_LSHIFT_OR_V4I8: 587696c5ddc4Srjs return bi_pack_fma_lshift_or_v4i8(I, src0, src1, src2, src3); 587796c5ddc4Srjs case BI_OPCODE_LSHIFT_XOR_I32: 587896c5ddc4Srjs return bi_pack_fma_lshift_xor_i32(I, src0, src1, src2, src3); 587996c5ddc4Srjs case BI_OPCODE_LSHIFT_XOR_V2I16: 588096c5ddc4Srjs return bi_pack_fma_lshift_xor_v2i16(I, src0, src1, src2, src3); 588196c5ddc4Srjs case BI_OPCODE_LSHIFT_XOR_V4I8: 588296c5ddc4Srjs return bi_pack_fma_lshift_xor_v4i8(I, src0, src1, src2, src3); 588396c5ddc4Srjs case BI_OPCODE_MKVEC_V2I16: 588496c5ddc4Srjs return bi_pack_fma_mkvec_v2i16(I, src0, src1, src2, src3); 588596c5ddc4Srjs case BI_OPCODE_MKVEC_V4I8: 588696c5ddc4Srjs return bi_pack_fma_mkvec_v4i8(I, src0, src1, src2, src3); 588796c5ddc4Srjs case BI_OPCODE_MOV_I32: 588896c5ddc4Srjs return bi_pack_fma_mov_i32(I, src0, src1, src2, src3); 588996c5ddc4Srjs case BI_OPCODE_NOP: 589096c5ddc4Srjs return bi_pack_fma_nop(I, src0, src1, src2, src3); 589196c5ddc4Srjs case BI_OPCODE_POPCOUNT_I32: 589296c5ddc4Srjs return bi_pack_fma_popcount_i32(I, src0, src1, src2, src3); 589396c5ddc4Srjs case BI_OPCODE_QUIET_F32: 589496c5ddc4Srjs return bi_pack_fma_quiet_f32(I, src0, src1, src2, src3); 589596c5ddc4Srjs case BI_OPCODE_QUIET_V2F16: 589696c5ddc4Srjs return bi_pack_fma_quiet_v2f16(I, src0, src1, src2, src3); 589796c5ddc4Srjs case BI_OPCODE_RROT_DOUBLE_I32: 589896c5ddc4Srjs return bi_pack_fma_rrot_double_i32(I, src0, src1, src2, src3); 589996c5ddc4Srjs case BI_OPCODE_RSHIFT_AND_I32: 590096c5ddc4Srjs return bi_pack_fma_rshift_and_i32(I, src0, src1, src2, src3); 590196c5ddc4Srjs case BI_OPCODE_RSHIFT_AND_V2I16: 590296c5ddc4Srjs return bi_pack_fma_rshift_and_v2i16(I, src0, src1, src2, src3); 590396c5ddc4Srjs case BI_OPCODE_RSHIFT_AND_V4I8: 590496c5ddc4Srjs return bi_pack_fma_rshift_and_v4i8(I, src0, src1, src2, src3); 590596c5ddc4Srjs case BI_OPCODE_RSHIFT_DOUBLE_I32: 590696c5ddc4Srjs return bi_pack_fma_rshift_double_i32(I, src0, src1, src2, src3); 590796c5ddc4Srjs case BI_OPCODE_RSHIFT_OR_I32: 590896c5ddc4Srjs return bi_pack_fma_rshift_or_i32(I, src0, src1, src2, src3); 590996c5ddc4Srjs case BI_OPCODE_RSHIFT_OR_V2I16: 591096c5ddc4Srjs return bi_pack_fma_rshift_or_v2i16(I, src0, src1, src2, src3); 591196c5ddc4Srjs case BI_OPCODE_RSHIFT_OR_V4I8: 591296c5ddc4Srjs return bi_pack_fma_rshift_or_v4i8(I, src0, src1, src2, src3); 591396c5ddc4Srjs case BI_OPCODE_RSHIFT_XOR_I32: 591496c5ddc4Srjs return bi_pack_fma_rshift_xor_i32(I, src0, src1, src2, src3); 591596c5ddc4Srjs case BI_OPCODE_RSHIFT_XOR_V2I16: 591696c5ddc4Srjs return bi_pack_fma_rshift_xor_v2i16(I, src0, src1, src2, src3); 591796c5ddc4Srjs case BI_OPCODE_RSHIFT_XOR_V4I8: 591896c5ddc4Srjs return bi_pack_fma_rshift_xor_v4i8(I, src0, src1, src2, src3); 591996c5ddc4Srjs case BI_OPCODE_S16_TO_S32: 592096c5ddc4Srjs return bi_pack_fma_s16_to_s32(I, src0, src1, src2, src3); 592196c5ddc4Srjs case BI_OPCODE_S8_TO_S32: 592296c5ddc4Srjs return bi_pack_fma_s8_to_s32(I, src0, src1, src2, src3); 592396c5ddc4Srjs case BI_OPCODE_SEG_ADD: 592496c5ddc4Srjs return bi_pack_fma_seg_add(I, src0, src1, src2, src3); 592596c5ddc4Srjs case BI_OPCODE_SHADDXL_I64: 592696c5ddc4Srjs return bi_pack_fma_shaddxl_i64(I, src0, src1, src2, src3); 592796c5ddc4Srjs case BI_OPCODE_SHADDXL_S32: 592896c5ddc4Srjs return bi_pack_fma_shaddxl_s32(I, src0, src1, src2, src3); 592996c5ddc4Srjs case BI_OPCODE_SHADDXL_U32: 593096c5ddc4Srjs return bi_pack_fma_shaddxl_u32(I, src0, src1, src2, src3); 593196c5ddc4Srjs case BI_OPCODE_U16_TO_U32: 593296c5ddc4Srjs return bi_pack_fma_u16_to_u32(I, src0, src1, src2, src3); 593396c5ddc4Srjs case BI_OPCODE_U8_TO_U32: 593496c5ddc4Srjs return bi_pack_fma_u8_to_u32(I, src0, src1, src2, src3); 593596c5ddc4Srjs case BI_OPCODE_V2F32_TO_V2F16: 593696c5ddc4Srjs return bi_pack_fma_v2f32_to_v2f16(I, src0, src1, src2, src3); 593796c5ddc4Srjs case BI_OPCODE_VN_ASST1_F16: 593896c5ddc4Srjs return bi_pack_fma_vn_asst1_f16(I, src0, src1, src2, src3); 593996c5ddc4Srjs case BI_OPCODE_VN_ASST1_F32: 594096c5ddc4Srjs return bi_pack_fma_vn_asst1_f32(I, src0, src1, src2, src3); 594196c5ddc4Srjs default: 594296c5ddc4Srjs#ifndef NDEBUG 594396c5ddc4Srjs bi_print_instr(I, stderr); 594496c5ddc4Srjs#endif 594596c5ddc4Srjs unreachable("Cannot pack instruction as *"); 594696c5ddc4Srjs } 594796c5ddc4Srjs} 594896c5ddc4Srjs 594996c5ddc4Srjsunsigned 595096c5ddc4Srjsbi_pack_add(bi_instr *I, 595196c5ddc4Srjs enum bifrost_packed_src src0, 595296c5ddc4Srjs enum bifrost_packed_src src1, 595396c5ddc4Srjs enum bifrost_packed_src src2, 595496c5ddc4Srjs enum bifrost_packed_src src3) 595596c5ddc4Srjs{ 595696c5ddc4Srjs if (!I) 595796c5ddc4Srjs return bi_pack_add_nop(I, src0, src1, src2, src3); 595896c5ddc4Srjs 595996c5ddc4Srjs switch (I->op) { 596096c5ddc4Srjs case BI_OPCODE_ACMPSTORE_I32: 596196c5ddc4Srjs return bi_pack_add_acmpstore_i32(I, src0, src1, src2, src3); 596296c5ddc4Srjs case BI_OPCODE_ACMPSTORE_I64: 596396c5ddc4Srjs return bi_pack_add_acmpstore_i64(I, src0, src1, src2, src3); 596496c5ddc4Srjs case BI_OPCODE_ACMPXCHG_I32: 596596c5ddc4Srjs return bi_pack_add_acmpxchg_i32(I, src0, src1, src2, src3); 596696c5ddc4Srjs case BI_OPCODE_ACMPXCHG_I64: 596796c5ddc4Srjs return bi_pack_add_acmpxchg_i64(I, src0, src1, src2, src3); 596896c5ddc4Srjs case BI_OPCODE_ATEST: 596996c5ddc4Srjs return bi_pack_add_atest(I, src0, src1, src2, src3); 597096c5ddc4Srjs case BI_OPCODE_ATOM_CX: 597196c5ddc4Srjs return bi_pack_add_atom_cx(I, src0, src1, src2, src3); 597296c5ddc4Srjs case BI_OPCODE_AXCHG_I32: 597396c5ddc4Srjs return bi_pack_add_axchg_i32(I, src0, src1, src2, src3); 597496c5ddc4Srjs case BI_OPCODE_AXCHG_I64: 597596c5ddc4Srjs return bi_pack_add_axchg_i64(I, src0, src1, src2, src3); 597696c5ddc4Srjs case BI_OPCODE_BARRIER: 597796c5ddc4Srjs return bi_pack_add_barrier(I, src0, src1, src2, src3); 597896c5ddc4Srjs case BI_OPCODE_BLEND: 597996c5ddc4Srjs return bi_pack_add_blend(I, src0, src1, src2, src3); 598096c5ddc4Srjs case BI_OPCODE_BRANCH_F16: 598196c5ddc4Srjs return bi_pack_add_branch_f16(I, src0, src1, src2, src3); 598296c5ddc4Srjs case BI_OPCODE_BRANCH_F32: 598396c5ddc4Srjs return bi_pack_add_branch_f32(I, src0, src1, src2, src3); 598496c5ddc4Srjs case BI_OPCODE_BRANCH_I16: 598596c5ddc4Srjs return bi_pack_add_branch_i16(I, src0, src1, src2, src3); 598696c5ddc4Srjs case BI_OPCODE_BRANCH_I32: 598796c5ddc4Srjs return bi_pack_add_branch_i32(I, src0, src1, src2, src3); 598896c5ddc4Srjs case BI_OPCODE_BRANCH_S16: 598996c5ddc4Srjs return bi_pack_add_branch_s16(I, src0, src1, src2, src3); 599096c5ddc4Srjs case BI_OPCODE_BRANCH_S32: 599196c5ddc4Srjs return bi_pack_add_branch_s32(I, src0, src1, src2, src3); 599296c5ddc4Srjs case BI_OPCODE_BRANCH_U16: 599396c5ddc4Srjs return bi_pack_add_branch_u16(I, src0, src1, src2, src3); 599496c5ddc4Srjs case BI_OPCODE_BRANCH_U32: 599596c5ddc4Srjs return bi_pack_add_branch_u32(I, src0, src1, src2, src3); 599696c5ddc4Srjs case BI_OPCODE_BRANCHC_I16: 599796c5ddc4Srjs return bi_pack_add_branchc_i16(I, src0, src1, src2, src3); 599896c5ddc4Srjs case BI_OPCODE_BRANCHC_I32: 599996c5ddc4Srjs return bi_pack_add_branchc_i32(I, src0, src1, src2, src3); 600096c5ddc4Srjs case BI_OPCODE_BRANCHZ_F16: 600196c5ddc4Srjs return bi_pack_add_branchz_f16(I, src0, src1, src2, src3); 600296c5ddc4Srjs case BI_OPCODE_BRANCHZ_F32: 600396c5ddc4Srjs return bi_pack_add_branchz_f32(I, src0, src1, src2, src3); 600496c5ddc4Srjs case BI_OPCODE_BRANCHZ_I16: 600596c5ddc4Srjs return bi_pack_add_branchz_i16(I, src0, src1, src2, src3); 600696c5ddc4Srjs case BI_OPCODE_BRANCHZ_I32: 600796c5ddc4Srjs return bi_pack_add_branchz_i32(I, src0, src1, src2, src3); 600896c5ddc4Srjs case BI_OPCODE_BRANCHZ_S16: 600996c5ddc4Srjs return bi_pack_add_branchz_s16(I, src0, src1, src2, src3); 601096c5ddc4Srjs case BI_OPCODE_BRANCHZ_S32: 601196c5ddc4Srjs return bi_pack_add_branchz_s32(I, src0, src1, src2, src3); 601296c5ddc4Srjs case BI_OPCODE_BRANCHZ_U16: 601396c5ddc4Srjs return bi_pack_add_branchz_u16(I, src0, src1, src2, src3); 601496c5ddc4Srjs case BI_OPCODE_BRANCHZ_U32: 601596c5ddc4Srjs return bi_pack_add_branchz_u32(I, src0, src1, src2, src3); 601696c5ddc4Srjs case BI_OPCODE_BRANCH_DIVERG: 601796c5ddc4Srjs return bi_pack_add_branch_diverg(I, src0, src1, src2, src3); 601896c5ddc4Srjs case BI_OPCODE_BRANCH_LOWBITS_F32: 601996c5ddc4Srjs return bi_pack_add_branch_lowbits_f32(I, src0, src1, src2, src3); 602096c5ddc4Srjs case BI_OPCODE_BRANCH_NO_DIVERG: 602196c5ddc4Srjs return bi_pack_add_branch_no_diverg(I, src0, src1, src2, src3); 602296c5ddc4Srjs case BI_OPCODE_CLPER_I32: 602396c5ddc4Srjs return bi_pack_add_clper_i32(I, src0, src1, src2, src3); 602496c5ddc4Srjs case BI_OPCODE_CLPER_V6_I32: 602596c5ddc4Srjs return bi_pack_add_clper_v6_i32(I, src0, src1, src2, src3); 602696c5ddc4Srjs case BI_OPCODE_CUBEFACE2: 602796c5ddc4Srjs return bi_pack_add_cubeface2(I, src0, src1, src2, src3); 602896c5ddc4Srjs case BI_OPCODE_CUBE_SSEL: 602996c5ddc4Srjs return bi_pack_add_cube_ssel(I, src0, src1, src2, src3); 603096c5ddc4Srjs case BI_OPCODE_CUBE_TSEL: 603196c5ddc4Srjs return bi_pack_add_cube_tsel(I, src0, src1, src2, src3); 603296c5ddc4Srjs case BI_OPCODE_DISCARD_F32: 603396c5ddc4Srjs return bi_pack_add_discard_f32(I, src0, src1, src2, src3); 603496c5ddc4Srjs case BI_OPCODE_F16_TO_F32: 603596c5ddc4Srjs return bi_pack_add_f16_to_f32(I, src0, src1, src2, src3); 603696c5ddc4Srjs case BI_OPCODE_F16_TO_S32: 603796c5ddc4Srjs return bi_pack_add_f16_to_s32(I, src0, src1, src2, src3); 603896c5ddc4Srjs case BI_OPCODE_F16_TO_U32: 603996c5ddc4Srjs return bi_pack_add_f16_to_u32(I, src0, src1, src2, src3); 604096c5ddc4Srjs case BI_OPCODE_F32_TO_S32: 604196c5ddc4Srjs return bi_pack_add_f32_to_s32(I, src0, src1, src2, src3); 604296c5ddc4Srjs case BI_OPCODE_F32_TO_U32: 604396c5ddc4Srjs return bi_pack_add_f32_to_u32(I, src0, src1, src2, src3); 604496c5ddc4Srjs case BI_OPCODE_FADD_F32: 604596c5ddc4Srjs return bi_pack_add_fadd_f32(I, src0, src1, src2, src3); 604696c5ddc4Srjs case BI_OPCODE_FADD_V2F16: 604796c5ddc4Srjs return bi_pack_add_fadd_v2f16(I, src0, src1, src2, src3); 604896c5ddc4Srjs case BI_OPCODE_FADD_RSCALE_F32: 604996c5ddc4Srjs return bi_pack_add_fadd_rscale_f32(I, src0, src1, src2, src3); 605096c5ddc4Srjs case BI_OPCODE_FCMP_F32: 605196c5ddc4Srjs return bi_pack_add_fcmp_f32(I, src0, src1, src2, src3); 605296c5ddc4Srjs case BI_OPCODE_FCMP_V2F16: 605396c5ddc4Srjs return bi_pack_add_fcmp_v2f16(I, src0, src1, src2, src3); 605496c5ddc4Srjs case BI_OPCODE_FCOS_TABLE_U6: 605596c5ddc4Srjs return bi_pack_add_fcos_table_u6(I, src0, src1, src2, src3); 605696c5ddc4Srjs case BI_OPCODE_FEXP_F32: 605796c5ddc4Srjs return bi_pack_add_fexp_f32(I, src0, src1, src2, src3); 605896c5ddc4Srjs case BI_OPCODE_FEXP_TABLE_U4: 605996c5ddc4Srjs return bi_pack_add_fexp_table_u4(I, src0, src1, src2, src3); 606096c5ddc4Srjs case BI_OPCODE_FLOGD_F32: 606196c5ddc4Srjs return bi_pack_add_flogd_f32(I, src0, src1, src2, src3); 606296c5ddc4Srjs case BI_OPCODE_FLOG_TABLE_F32: 606396c5ddc4Srjs return bi_pack_add_flog_table_f32(I, src0, src1, src2, src3); 606496c5ddc4Srjs case BI_OPCODE_FMAX_F32: 606596c5ddc4Srjs return bi_pack_add_fmax_f32(I, src0, src1, src2, src3); 606696c5ddc4Srjs case BI_OPCODE_FMAX_V2F16: 606796c5ddc4Srjs return bi_pack_add_fmax_v2f16(I, src0, src1, src2, src3); 606896c5ddc4Srjs case BI_OPCODE_FMIN_F32: 606996c5ddc4Srjs return bi_pack_add_fmin_f32(I, src0, src1, src2, src3); 607096c5ddc4Srjs case BI_OPCODE_FMIN_V2F16: 607196c5ddc4Srjs return bi_pack_add_fmin_v2f16(I, src0, src1, src2, src3); 607296c5ddc4Srjs case BI_OPCODE_FPCLASS_F16: 607396c5ddc4Srjs return bi_pack_add_fpclass_f16(I, src0, src1, src2, src3); 607496c5ddc4Srjs case BI_OPCODE_FPCLASS_F32: 607596c5ddc4Srjs return bi_pack_add_fpclass_f32(I, src0, src1, src2, src3); 607696c5ddc4Srjs case BI_OPCODE_FPOW_SC_APPLY: 607796c5ddc4Srjs return bi_pack_add_fpow_sc_apply(I, src0, src1, src2, src3); 607896c5ddc4Srjs case BI_OPCODE_FPOW_SC_DET_F16: 607996c5ddc4Srjs return bi_pack_add_fpow_sc_det_f16(I, src0, src1, src2, src3); 608096c5ddc4Srjs case BI_OPCODE_FPOW_SC_DET_F32: 608196c5ddc4Srjs return bi_pack_add_fpow_sc_det_f32(I, src0, src1, src2, src3); 608296c5ddc4Srjs case BI_OPCODE_FRCP_F16: 608396c5ddc4Srjs return bi_pack_add_frcp_f16(I, src0, src1, src2, src3); 608496c5ddc4Srjs case BI_OPCODE_FRCP_F32: 608596c5ddc4Srjs return bi_pack_add_frcp_f32(I, src0, src1, src2, src3); 608696c5ddc4Srjs case BI_OPCODE_FRCP_APPROX_F32: 608796c5ddc4Srjs return bi_pack_add_frcp_approx_f32(I, src0, src1, src2, src3); 608896c5ddc4Srjs case BI_OPCODE_FREXPE_F32: 608996c5ddc4Srjs return bi_pack_add_frexpe_f32(I, src0, src1, src2, src3); 609096c5ddc4Srjs case BI_OPCODE_FREXPE_V2F16: 609196c5ddc4Srjs return bi_pack_add_frexpe_v2f16(I, src0, src1, src2, src3); 609296c5ddc4Srjs case BI_OPCODE_FREXPM_F32: 609396c5ddc4Srjs return bi_pack_add_frexpm_f32(I, src0, src1, src2, src3); 609496c5ddc4Srjs case BI_OPCODE_FREXPM_V2F16: 609596c5ddc4Srjs return bi_pack_add_frexpm_v2f16(I, src0, src1, src2, src3); 609696c5ddc4Srjs case BI_OPCODE_FROUND_F32: 609796c5ddc4Srjs return bi_pack_add_fround_f32(I, src0, src1, src2, src3); 609896c5ddc4Srjs case BI_OPCODE_FROUND_V2F16: 609996c5ddc4Srjs return bi_pack_add_fround_v2f16(I, src0, src1, src2, src3); 610096c5ddc4Srjs case BI_OPCODE_FRSQ_F16: 610196c5ddc4Srjs return bi_pack_add_frsq_f16(I, src0, src1, src2, src3); 610296c5ddc4Srjs case BI_OPCODE_FRSQ_F32: 610396c5ddc4Srjs return bi_pack_add_frsq_f32(I, src0, src1, src2, src3); 610496c5ddc4Srjs case BI_OPCODE_FRSQ_APPROX_F32: 610596c5ddc4Srjs return bi_pack_add_frsq_approx_f32(I, src0, src1, src2, src3); 610696c5ddc4Srjs case BI_OPCODE_FSINCOS_OFFSET_U6: 610796c5ddc4Srjs return bi_pack_add_fsincos_offset_u6(I, src0, src1, src2, src3); 610896c5ddc4Srjs case BI_OPCODE_FSIN_TABLE_U6: 610996c5ddc4Srjs return bi_pack_add_fsin_table_u6(I, src0, src1, src2, src3); 611096c5ddc4Srjs case BI_OPCODE_HADD_S32: 611196c5ddc4Srjs return bi_pack_add_hadd_s32(I, src0, src1, src2, src3); 611296c5ddc4Srjs case BI_OPCODE_HADD_U32: 611396c5ddc4Srjs return bi_pack_add_hadd_u32(I, src0, src1, src2, src3); 611496c5ddc4Srjs case BI_OPCODE_HADD_V2S16: 611596c5ddc4Srjs return bi_pack_add_hadd_v2s16(I, src0, src1, src2, src3); 611696c5ddc4Srjs case BI_OPCODE_HADD_V2U16: 611796c5ddc4Srjs return bi_pack_add_hadd_v2u16(I, src0, src1, src2, src3); 611896c5ddc4Srjs case BI_OPCODE_HADD_V4S8: 611996c5ddc4Srjs return bi_pack_add_hadd_v4s8(I, src0, src1, src2, src3); 612096c5ddc4Srjs case BI_OPCODE_HADD_V4U8: 612196c5ddc4Srjs return bi_pack_add_hadd_v4u8(I, src0, src1, src2, src3); 612296c5ddc4Srjs case BI_OPCODE_IABS_S32: 612396c5ddc4Srjs return bi_pack_add_iabs_s32(I, src0, src1, src2, src3); 612496c5ddc4Srjs case BI_OPCODE_IABS_V2S16: 612596c5ddc4Srjs return bi_pack_add_iabs_v2s16(I, src0, src1, src2, src3); 612696c5ddc4Srjs case BI_OPCODE_IABS_V4S8: 612796c5ddc4Srjs return bi_pack_add_iabs_v4s8(I, src0, src1, src2, src3); 612896c5ddc4Srjs case BI_OPCODE_IADD_S32: 612996c5ddc4Srjs return bi_pack_add_iadd_s32(I, src0, src1, src2, src3); 613096c5ddc4Srjs case BI_OPCODE_IADD_U32: 613196c5ddc4Srjs return bi_pack_add_iadd_u32(I, src0, src1, src2, src3); 613296c5ddc4Srjs case BI_OPCODE_IADD_V2S16: 613396c5ddc4Srjs return bi_pack_add_iadd_v2s16(I, src0, src1, src2, src3); 613496c5ddc4Srjs case BI_OPCODE_IADD_V2U16: 613596c5ddc4Srjs return bi_pack_add_iadd_v2u16(I, src0, src1, src2, src3); 613696c5ddc4Srjs case BI_OPCODE_IADD_V4S8: 613796c5ddc4Srjs return bi_pack_add_iadd_v4s8(I, src0, src1, src2, src3); 613896c5ddc4Srjs case BI_OPCODE_IADD_V4U8: 613996c5ddc4Srjs return bi_pack_add_iadd_v4u8(I, src0, src1, src2, src3); 614096c5ddc4Srjs case BI_OPCODE_ICMP_I32: 614196c5ddc4Srjs return bi_pack_add_icmp_i32(I, src0, src1, src2, src3); 614296c5ddc4Srjs case BI_OPCODE_ICMP_S32: 614396c5ddc4Srjs return bi_pack_add_icmp_s32(I, src0, src1, src2, src3); 614496c5ddc4Srjs case BI_OPCODE_ICMP_U32: 614596c5ddc4Srjs return bi_pack_add_icmp_u32(I, src0, src1, src2, src3); 614696c5ddc4Srjs case BI_OPCODE_ICMP_V2I16: 614796c5ddc4Srjs return bi_pack_add_icmp_v2i16(I, src0, src1, src2, src3); 614896c5ddc4Srjs case BI_OPCODE_ICMP_V2S16: 614996c5ddc4Srjs return bi_pack_add_icmp_v2s16(I, src0, src1, src2, src3); 615096c5ddc4Srjs case BI_OPCODE_ICMP_V2U16: 615196c5ddc4Srjs return bi_pack_add_icmp_v2u16(I, src0, src1, src2, src3); 615296c5ddc4Srjs case BI_OPCODE_ICMP_V4I8: 615396c5ddc4Srjs return bi_pack_add_icmp_v4i8(I, src0, src1, src2, src3); 615496c5ddc4Srjs case BI_OPCODE_ICMP_V4S8: 615596c5ddc4Srjs return bi_pack_add_icmp_v4s8(I, src0, src1, src2, src3); 615696c5ddc4Srjs case BI_OPCODE_ICMP_V4U8: 615796c5ddc4Srjs return bi_pack_add_icmp_v4u8(I, src0, src1, src2, src3); 615896c5ddc4Srjs case BI_OPCODE_ICMPF_I32: 615996c5ddc4Srjs return bi_pack_add_icmpf_i32(I, src0, src1, src2, src3); 616096c5ddc4Srjs case BI_OPCODE_ICMPI_I32: 616196c5ddc4Srjs return bi_pack_add_icmpi_i32(I, src0, src1, src2, src3); 616296c5ddc4Srjs case BI_OPCODE_ICMPI_S32: 616396c5ddc4Srjs return bi_pack_add_icmpi_s32(I, src0, src1, src2, src3); 616496c5ddc4Srjs case BI_OPCODE_ICMPI_U32: 616596c5ddc4Srjs return bi_pack_add_icmpi_u32(I, src0, src1, src2, src3); 616696c5ddc4Srjs case BI_OPCODE_ICMPM_I32: 616796c5ddc4Srjs return bi_pack_add_icmpm_i32(I, src0, src1, src2, src3); 616896c5ddc4Srjs case BI_OPCODE_ILOGB_F32: 616996c5ddc4Srjs return bi_pack_add_ilogb_f32(I, src0, src1, src2, src3); 617096c5ddc4Srjs case BI_OPCODE_ILOGB_V2F16: 617196c5ddc4Srjs return bi_pack_add_ilogb_v2f16(I, src0, src1, src2, src3); 617296c5ddc4Srjs case BI_OPCODE_IMOV_FMA: 617396c5ddc4Srjs return bi_pack_add_imov_fma(I, src0, src1, src2, src3); 617496c5ddc4Srjs case BI_OPCODE_ISUB_S32: 617596c5ddc4Srjs return bi_pack_add_isub_s32(I, src0, src1, src2, src3); 617696c5ddc4Srjs case BI_OPCODE_ISUB_U32: 617796c5ddc4Srjs return bi_pack_add_isub_u32(I, src0, src1, src2, src3); 617896c5ddc4Srjs case BI_OPCODE_ISUB_V2S16: 617996c5ddc4Srjs return bi_pack_add_isub_v2s16(I, src0, src1, src2, src3); 618096c5ddc4Srjs case BI_OPCODE_ISUB_V2U16: 618196c5ddc4Srjs return bi_pack_add_isub_v2u16(I, src0, src1, src2, src3); 618296c5ddc4Srjs case BI_OPCODE_ISUB_V4S8: 618396c5ddc4Srjs return bi_pack_add_isub_v4s8(I, src0, src1, src2, src3); 618496c5ddc4Srjs case BI_OPCODE_ISUB_V4U8: 618596c5ddc4Srjs return bi_pack_add_isub_v4u8(I, src0, src1, src2, src3); 618696c5ddc4Srjs case BI_OPCODE_JUMP: 618796c5ddc4Srjs return bi_pack_add_jump(I, src0, src1, src2, src3); 618896c5ddc4Srjs case BI_OPCODE_LDEXP_F32: 618996c5ddc4Srjs return bi_pack_add_ldexp_f32(I, src0, src1, src2, src3); 619096c5ddc4Srjs case BI_OPCODE_LDEXP_V2F16: 619196c5ddc4Srjs return bi_pack_add_ldexp_v2f16(I, src0, src1, src2, src3); 619296c5ddc4Srjs case BI_OPCODE_LD_ATTR: 619396c5ddc4Srjs return bi_pack_add_ld_attr(I, src0, src1, src2, src3); 619496c5ddc4Srjs case BI_OPCODE_LD_ATTR_IMM: 619596c5ddc4Srjs return bi_pack_add_ld_attr_imm(I, src0, src1, src2, src3); 619696c5ddc4Srjs case BI_OPCODE_LD_ATTR_TEX: 619796c5ddc4Srjs return bi_pack_add_ld_attr_tex(I, src0, src1, src2, src3); 619896c5ddc4Srjs case BI_OPCODE_LD_CVT: 619996c5ddc4Srjs return bi_pack_add_ld_cvt(I, src0, src1, src2, src3); 620096c5ddc4Srjs case BI_OPCODE_LD_GCLK_U64: 620196c5ddc4Srjs return bi_pack_add_ld_gclk_u64(I, src0, src1, src2, src3); 620296c5ddc4Srjs case BI_OPCODE_LD_TILE: 620396c5ddc4Srjs return bi_pack_add_ld_tile(I, src0, src1, src2, src3); 620496c5ddc4Srjs case BI_OPCODE_LD_VAR: 620596c5ddc4Srjs return bi_pack_add_ld_var(I, src0, src1, src2, src3); 620696c5ddc4Srjs case BI_OPCODE_LD_VAR_FLAT: 620796c5ddc4Srjs return bi_pack_add_ld_var_flat(I, src0, src1, src2, src3); 620896c5ddc4Srjs case BI_OPCODE_LD_VAR_FLAT_IMM: 620996c5ddc4Srjs return bi_pack_add_ld_var_flat_imm(I, src0, src1, src2, src3); 621096c5ddc4Srjs case BI_OPCODE_LD_VAR_IMM: 621196c5ddc4Srjs return bi_pack_add_ld_var_imm(I, src0, src1, src2, src3); 621296c5ddc4Srjs case BI_OPCODE_LD_VAR_SPECIAL: 621396c5ddc4Srjs return bi_pack_add_ld_var_special(I, src0, src1, src2, src3); 621496c5ddc4Srjs case BI_OPCODE_LEA_ATTR: 621596c5ddc4Srjs return bi_pack_add_lea_attr(I, src0, src1, src2, src3); 621696c5ddc4Srjs case BI_OPCODE_LEA_ATTR_IMM: 621796c5ddc4Srjs return bi_pack_add_lea_attr_imm(I, src0, src1, src2, src3); 621896c5ddc4Srjs case BI_OPCODE_LEA_ATTR_TEX: 621996c5ddc4Srjs return bi_pack_add_lea_attr_tex(I, src0, src1, src2, src3); 622096c5ddc4Srjs case BI_OPCODE_LEA_TEX: 622196c5ddc4Srjs return bi_pack_add_lea_tex(I, src0, src1, src2, src3); 622296c5ddc4Srjs case BI_OPCODE_LEA_TEX_IMM: 622396c5ddc4Srjs return bi_pack_add_lea_tex_imm(I, src0, src1, src2, src3); 622496c5ddc4Srjs case BI_OPCODE_LOAD_I128: 622596c5ddc4Srjs return bi_pack_add_load_i128(I, src0, src1, src2, src3); 622696c5ddc4Srjs case BI_OPCODE_LOAD_I16: 622796c5ddc4Srjs return bi_pack_add_load_i16(I, src0, src1, src2, src3); 622896c5ddc4Srjs case BI_OPCODE_LOAD_I24: 622996c5ddc4Srjs return bi_pack_add_load_i24(I, src0, src1, src2, src3); 623096c5ddc4Srjs case BI_OPCODE_LOAD_I32: 623196c5ddc4Srjs return bi_pack_add_load_i32(I, src0, src1, src2, src3); 623296c5ddc4Srjs case BI_OPCODE_LOAD_I48: 623396c5ddc4Srjs return bi_pack_add_load_i48(I, src0, src1, src2, src3); 623496c5ddc4Srjs case BI_OPCODE_LOAD_I64: 623596c5ddc4Srjs return bi_pack_add_load_i64(I, src0, src1, src2, src3); 623696c5ddc4Srjs case BI_OPCODE_LOAD_I8: 623796c5ddc4Srjs return bi_pack_add_load_i8(I, src0, src1, src2, src3); 623896c5ddc4Srjs case BI_OPCODE_LOAD_I96: 623996c5ddc4Srjs return bi_pack_add_load_i96(I, src0, src1, src2, src3); 624096c5ddc4Srjs case BI_OPCODE_LOGB_F32: 624196c5ddc4Srjs return bi_pack_add_logb_f32(I, src0, src1, src2, src3); 624296c5ddc4Srjs case BI_OPCODE_LOGB_V2F16: 624396c5ddc4Srjs return bi_pack_add_logb_v2f16(I, src0, src1, src2, src3); 624496c5ddc4Srjs case BI_OPCODE_MKVEC_V2I16: 624596c5ddc4Srjs return bi_pack_add_mkvec_v2i16(I, src0, src1, src2, src3); 624696c5ddc4Srjs case BI_OPCODE_MOV_I32: 624796c5ddc4Srjs return bi_pack_add_mov_i32(I, src0, src1, src2, src3); 624896c5ddc4Srjs case BI_OPCODE_MUX_I32: 624996c5ddc4Srjs return bi_pack_add_mux_i32(I, src0, src1, src2, src3); 625096c5ddc4Srjs case BI_OPCODE_MUX_V2I16: 625196c5ddc4Srjs return bi_pack_add_mux_v2i16(I, src0, src1, src2, src3); 625296c5ddc4Srjs case BI_OPCODE_MUX_V4I8: 625396c5ddc4Srjs return bi_pack_add_mux_v4i8(I, src0, src1, src2, src3); 625496c5ddc4Srjs case BI_OPCODE_NOP: 625596c5ddc4Srjs return bi_pack_add_nop(I, src0, src1, src2, src3); 625696c5ddc4Srjs case BI_OPCODE_QUIET_F32: 625796c5ddc4Srjs return bi_pack_add_quiet_f32(I, src0, src1, src2, src3); 625896c5ddc4Srjs case BI_OPCODE_QUIET_V2F16: 625996c5ddc4Srjs return bi_pack_add_quiet_v2f16(I, src0, src1, src2, src3); 626096c5ddc4Srjs case BI_OPCODE_S16_TO_F32: 626196c5ddc4Srjs return bi_pack_add_s16_to_f32(I, src0, src1, src2, src3); 626296c5ddc4Srjs case BI_OPCODE_S16_TO_S32: 626396c5ddc4Srjs return bi_pack_add_s16_to_s32(I, src0, src1, src2, src3); 626496c5ddc4Srjs case BI_OPCODE_S32_TO_F32: 626596c5ddc4Srjs return bi_pack_add_s32_to_f32(I, src0, src1, src2, src3); 626696c5ddc4Srjs case BI_OPCODE_S8_TO_F32: 626796c5ddc4Srjs return bi_pack_add_s8_to_f32(I, src0, src1, src2, src3); 626896c5ddc4Srjs case BI_OPCODE_S8_TO_S32: 626996c5ddc4Srjs return bi_pack_add_s8_to_s32(I, src0, src1, src2, src3); 627096c5ddc4Srjs case BI_OPCODE_SEG_ADD: 627196c5ddc4Srjs return bi_pack_add_seg_add(I, src0, src1, src2, src3); 627296c5ddc4Srjs case BI_OPCODE_SHADDXH_I32: 627396c5ddc4Srjs return bi_pack_add_shaddxh_i32(I, src0, src1, src2, src3); 627496c5ddc4Srjs case BI_OPCODE_SHIFT_DOUBLE_I32: 627596c5ddc4Srjs return bi_pack_add_shift_double_i32(I, src0, src1, src2, src3); 627696c5ddc4Srjs case BI_OPCODE_STORE_I128: 627796c5ddc4Srjs return bi_pack_add_store_i128(I, src0, src1, src2, src3); 627896c5ddc4Srjs case BI_OPCODE_STORE_I16: 627996c5ddc4Srjs return bi_pack_add_store_i16(I, src0, src1, src2, src3); 628096c5ddc4Srjs case BI_OPCODE_STORE_I24: 628196c5ddc4Srjs return bi_pack_add_store_i24(I, src0, src1, src2, src3); 628296c5ddc4Srjs case BI_OPCODE_STORE_I32: 628396c5ddc4Srjs return bi_pack_add_store_i32(I, src0, src1, src2, src3); 628496c5ddc4Srjs case BI_OPCODE_STORE_I48: 628596c5ddc4Srjs return bi_pack_add_store_i48(I, src0, src1, src2, src3); 628696c5ddc4Srjs case BI_OPCODE_STORE_I64: 628796c5ddc4Srjs return bi_pack_add_store_i64(I, src0, src1, src2, src3); 628896c5ddc4Srjs case BI_OPCODE_STORE_I8: 628996c5ddc4Srjs return bi_pack_add_store_i8(I, src0, src1, src2, src3); 629096c5ddc4Srjs case BI_OPCODE_STORE_I96: 629196c5ddc4Srjs return bi_pack_add_store_i96(I, src0, src1, src2, src3); 629296c5ddc4Srjs case BI_OPCODE_ST_CVT: 629396c5ddc4Srjs return bi_pack_add_st_cvt(I, src0, src1, src2, src3); 629496c5ddc4Srjs case BI_OPCODE_ST_TILE: 629596c5ddc4Srjs return bi_pack_add_st_tile(I, src0, src1, src2, src3); 629696c5ddc4Srjs case BI_OPCODE_SWZ_V2I16: 629796c5ddc4Srjs return bi_pack_add_swz_v2i16(I, src0, src1, src2, src3); 629896c5ddc4Srjs case BI_OPCODE_SWZ_V4I8: 629996c5ddc4Srjs return bi_pack_add_swz_v4i8(I, src0, src1, src2, src3); 630096c5ddc4Srjs case BI_OPCODE_TEXC: 630196c5ddc4Srjs return bi_pack_add_texc(I, src0, src1, src2, src3); 630296c5ddc4Srjs case BI_OPCODE_TEXS_2D_F16: 630396c5ddc4Srjs return bi_pack_add_texs_2d_f16(I, src0, src1, src2, src3); 630496c5ddc4Srjs case BI_OPCODE_TEXS_2D_F32: 630596c5ddc4Srjs return bi_pack_add_texs_2d_f32(I, src0, src1, src2, src3); 630696c5ddc4Srjs case BI_OPCODE_TEXS_CUBE_F16: 630796c5ddc4Srjs return bi_pack_add_texs_cube_f16(I, src0, src1, src2, src3); 630896c5ddc4Srjs case BI_OPCODE_TEXS_CUBE_F32: 630996c5ddc4Srjs return bi_pack_add_texs_cube_f32(I, src0, src1, src2, src3); 631096c5ddc4Srjs case BI_OPCODE_U16_TO_F32: 631196c5ddc4Srjs return bi_pack_add_u16_to_f32(I, src0, src1, src2, src3); 631296c5ddc4Srjs case BI_OPCODE_U16_TO_U32: 631396c5ddc4Srjs return bi_pack_add_u16_to_u32(I, src0, src1, src2, src3); 631496c5ddc4Srjs case BI_OPCODE_U32_TO_F32: 631596c5ddc4Srjs return bi_pack_add_u32_to_f32(I, src0, src1, src2, src3); 631696c5ddc4Srjs case BI_OPCODE_U8_TO_F32: 631796c5ddc4Srjs return bi_pack_add_u8_to_f32(I, src0, src1, src2, src3); 631896c5ddc4Srjs case BI_OPCODE_U8_TO_U32: 631996c5ddc4Srjs return bi_pack_add_u8_to_u32(I, src0, src1, src2, src3); 632096c5ddc4Srjs case BI_OPCODE_V2F16_TO_V2S16: 632196c5ddc4Srjs return bi_pack_add_v2f16_to_v2s16(I, src0, src1, src2, src3); 632296c5ddc4Srjs case BI_OPCODE_V2F16_TO_V2U16: 632396c5ddc4Srjs return bi_pack_add_v2f16_to_v2u16(I, src0, src1, src2, src3); 632496c5ddc4Srjs case BI_OPCODE_V2F32_TO_V2F16: 632596c5ddc4Srjs return bi_pack_add_v2f32_to_v2f16(I, src0, src1, src2, src3); 632696c5ddc4Srjs case BI_OPCODE_V2S16_TO_V2F16: 632796c5ddc4Srjs return bi_pack_add_v2s16_to_v2f16(I, src0, src1, src2, src3); 632896c5ddc4Srjs case BI_OPCODE_V2S8_TO_V2F16: 632996c5ddc4Srjs return bi_pack_add_v2s8_to_v2f16(I, src0, src1, src2, src3); 633096c5ddc4Srjs case BI_OPCODE_V2S8_TO_V2S16: 633196c5ddc4Srjs return bi_pack_add_v2s8_to_v2s16(I, src0, src1, src2, src3); 633296c5ddc4Srjs case BI_OPCODE_V2U16_TO_V2F16: 633396c5ddc4Srjs return bi_pack_add_v2u16_to_v2f16(I, src0, src1, src2, src3); 633496c5ddc4Srjs case BI_OPCODE_V2U8_TO_V2F16: 633596c5ddc4Srjs return bi_pack_add_v2u8_to_v2f16(I, src0, src1, src2, src3); 633696c5ddc4Srjs case BI_OPCODE_V2U8_TO_V2U16: 633796c5ddc4Srjs return bi_pack_add_v2u8_to_v2u16(I, src0, src1, src2, src3); 633896c5ddc4Srjs case BI_OPCODE_VAR_TEX_F16: 633996c5ddc4Srjs return bi_pack_add_var_tex_f16(I, src0, src1, src2, src3); 634096c5ddc4Srjs case BI_OPCODE_VAR_TEX_F32: 634196c5ddc4Srjs return bi_pack_add_var_tex_f32(I, src0, src1, src2, src3); 634296c5ddc4Srjs case BI_OPCODE_VN_ASST2_F32: 634396c5ddc4Srjs return bi_pack_add_vn_asst2_f32(I, src0, src1, src2, src3); 634496c5ddc4Srjs case BI_OPCODE_VN_ASST2_V2F16: 634596c5ddc4Srjs return bi_pack_add_vn_asst2_v2f16(I, src0, src1, src2, src3); 634696c5ddc4Srjs case BI_OPCODE_WMASK: 634796c5ddc4Srjs return bi_pack_add_wmask(I, src0, src1, src2, src3); 634896c5ddc4Srjs case BI_OPCODE_ZS_EMIT: 634996c5ddc4Srjs return bi_pack_add_zs_emit(I, src0, src1, src2, src3); 635096c5ddc4Srjs default: 635196c5ddc4Srjs#ifndef NDEBUG 635296c5ddc4Srjs bi_print_instr(I, stderr); 635396c5ddc4Srjs#endif 635496c5ddc4Srjs unreachable("Cannot pack instruction as +"); 635596c5ddc4Srjs } 635696c5ddc4Srjs} 635796c5ddc4Srjs 6358