1/* 2 * Copyright (C) 2020 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24/* Autogenerated file, do not edit */ 25 26#include "compiler.h" 27static inline unsigned 28bi_pack_fma_arshift_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 29{ 30 assert((1 << src1) & 0x8); 31 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 32 assert(I->src[2].swizzle < 13); 33 unsigned lane2 = lane2_table[I->src[2].swizzle]; 34 assert(lane2 < 4); 35 return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9); 36} 37 38static inline unsigned 39bi_pack_fma_arshift_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 40{ 41 assert((1 << src1) & 0x8); 42 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 43 assert(I->src[2].swizzle < 13); 44 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 45 assert(lanes2 < 8); 46 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 47 unsigned derived_9 = 0; 48 if (lanes2 == 0) derived_9 = 0; 49 else if (lanes2 == 1) derived_9 = 1; 50 else if (lanes2 == 2) derived_9 = 2; 51 else if (lanes2 == 3) derived_9 = 3; 52 else unreachable("No pattern match at pos 9"); 53 54 return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 55 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 56 unsigned derived_9 = 0; 57 if (lanes2 == 4) derived_9 = 1; 58 else if (lanes2 == 5) derived_9 = 2; 59 else if (lanes2 == 6) derived_9 = 3; 60 else unreachable("No pattern match at pos 9"); 61 62 return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 63 } else { 64 unreachable("No matching state found in fma_arshift_v2i16"); 65 } 66} 67 68static inline unsigned 69bi_pack_fma_arshift_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 70{ 71 assert((1 << src1) & 0x8); 72 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 73 assert(I->src[2].swizzle < 13); 74 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 75 assert(lanes2 < 8); 76 if (lanes2 != 0) { 77 unsigned derived_9 = 0; 78 if (lanes2 == 1) derived_9 = 0; 79 else if (lanes2 == 2) derived_9 = 1; 80 else if (lanes2 == 3) derived_9 = 2; 81 else if (lanes2 == 4) derived_9 = 3; 82 else unreachable("No pattern match at pos 9"); 83 84 return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 85 } else if (lanes2 == 0) { 86 return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6); 87 } else { 88 unreachable("No matching state found in fma_arshift_v4i8"); 89 } 90} 91 92static inline unsigned 93bi_pack_fma_arshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 94{ 95 unsigned bytes2 = I->bytes2; 96 assert(bytes2 < 2); 97 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 98 assert(I->src[2].swizzle < 13); 99 unsigned lane2 = lane2_table[I->src[2].swizzle]; 100 assert(lane2 < 2); 101 unsigned result_word = I->result_word; 102 assert(result_word < 2); 103 return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 104} 105 106static inline unsigned 107bi_pack_fma_atom_c_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 108{ 109 assert((1 << src0) & 0xf3); 110 assert((1 << src1) & 0xf3); 111 assert((1 << src2) & 0xf7); 112 static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 113 assert(I->atom_opc < 16); 114 unsigned atom_opc = atom_opc_table[I->atom_opc]; 115 assert(atom_opc < 16); 116 return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 117} 118 119static inline unsigned 120bi_pack_fma_atom_c_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 121{ 122 assert((1 << src0) & 0xf3); 123 assert((1 << src1) & 0xf3); 124 assert((1 << src2) & 0xf7); 125 static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 126 assert(I->atom_opc < 16); 127 unsigned atom_opc = atom_opc_table[I->atom_opc]; 128 assert(atom_opc < 16); 129 return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 130} 131 132static inline unsigned 133bi_pack_fma_atom_c1_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 134{ 135 assert((1 << src0) & 0xf3); 136 assert((1 << src1) & 0xf3); 137 static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 138 assert(I->atom_opc < 16); 139 unsigned atom_opc = atom_opc_table[I->atom_opc]; 140 assert(atom_opc < 8); 141 return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 142} 143 144static inline unsigned 145bi_pack_fma_atom_c1_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 146{ 147 assert((1 << src0) & 0xf3); 148 assert((1 << src1) & 0xf3); 149 static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 150 assert(I->atom_opc < 16); 151 unsigned atom_opc = atom_opc_table[I->atom_opc]; 152 assert(atom_opc < 8); 153 return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 154} 155 156static inline unsigned 157bi_pack_fma_atom_c1_return_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 158{ 159 assert((1 << src0) & 0xf3); 160 assert((1 << src1) & 0xf3); 161 static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 162 assert(I->atom_opc < 16); 163 unsigned atom_opc = atom_opc_table[I->atom_opc]; 164 assert(atom_opc < 8); 165 return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 166} 167 168static inline unsigned 169bi_pack_fma_atom_c1_return_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 170{ 171 assert((1 << src0) & 0xf3); 172 assert((1 << src1) & 0xf3); 173 static uint8_t atom_opc_table[] = { 5, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4 }; 174 assert(I->atom_opc < 16); 175 unsigned atom_opc = atom_opc_table[I->atom_opc]; 176 assert(atom_opc < 8); 177 return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 178} 179 180static inline unsigned 181bi_pack_fma_atom_c_return_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 182{ 183 assert((1 << src0) & 0xf3); 184 assert((1 << src1) & 0xf3); 185 assert((1 << src2) & 0xf7); 186 static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 187 assert(I->atom_opc < 16); 188 unsigned atom_opc = atom_opc_table[I->atom_opc]; 189 assert(atom_opc < 16); 190 return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 191} 192 193static inline unsigned 194bi_pack_fma_atom_c_return_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 195{ 196 assert((1 << src0) & 0xf3); 197 assert((1 << src1) & 0xf3); 198 assert((1 << src2) & 0xf7); 199 static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 200 assert(I->atom_opc < 16); 201 unsigned atom_opc = atom_opc_table[I->atom_opc]; 202 assert(atom_opc < 16); 203 return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 204} 205 206static inline unsigned 207bi_pack_fma_atom_post_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 208{ 209 static uint8_t atom_opc_table[] = { 0, 2, 8, 9, 10, 11, 12, 13, 14, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 210 assert(I->atom_opc < 16); 211 unsigned atom_opc = atom_opc_table[I->atom_opc]; 212 assert(atom_opc < 16); 213 return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 214} 215 216static inline unsigned 217bi_pack_fma_atom_post_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 218{ 219 static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 220 assert(I->atom_opc < 16); 221 unsigned atom_opc = atom_opc_table[I->atom_opc]; 222 assert(atom_opc < 16); 223 return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); 224} 225 226static inline unsigned 227bi_pack_fma_atom_pre_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 228{ 229 static uint8_t atom_opc_table[] = { 3, 2, 8, 9, 10, 11, 12, 13, 14, 0, 1, ~0, ~0, ~0, ~0, ~0 }; 230 assert(I->atom_opc < 16); 231 unsigned atom_opc = atom_opc_table[I->atom_opc]; 232 assert(atom_opc < 16); 233 return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); 234} 235 236static inline unsigned 237bi_pack_fma_bitrev_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 238{ 239 240 return 0x701fc0 | (src0 << 0); 241} 242 243static inline unsigned 244bi_pack_fma_clz_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 245{ 246 unsigned mask = I->mask; 247 assert(mask < 2); 248 return 0x701fd0 | (src0 << 0) | (mask << 3); 249} 250 251static inline unsigned 252bi_pack_fma_clz_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 253{ 254 unsigned mask = I->mask; 255 assert(mask < 2); 256 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 257 assert(I->src[0].swizzle < 13); 258 unsigned swz0 = swz0_table[I->src[0].swizzle]; 259 assert(swz0 < 4); 260 return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4); 261} 262 263static inline unsigned 264bi_pack_fma_clz_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 265{ 266 unsigned mask = I->mask; 267 assert(mask < 2); 268 return 0x701f90 | (src0 << 0) | (mask << 3); 269} 270 271static inline unsigned 272bi_pack_fma_csel_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 273{ 274 unsigned cmpf = I->cmpf; 275 assert(cmpf < 8); 276 if ((cmpf == 4) || (cmpf == 5)) { 277 { unsigned temp = src0; src0 = src1; src1 = temp; } 278 if (cmpf == 4) cmpf = 1; 279 else if (cmpf == 5) cmpf = 2; 280 } 281 282 if (cmpf == 3) { 283 { unsigned temp = src2; src2 = src3; src3 = temp; } 284 if (cmpf == 3) cmpf = 0; 285 } 286 287 unsigned derived_12 = 0; 288 if (cmpf == 0) derived_12 = 0; 289 else if (cmpf == 1) derived_12 = 1; 290 else if (cmpf == 2) derived_12 = 2; 291 else unreachable("No pattern match at pos 12"); 292 293 return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 294} 295 296static inline unsigned 297bi_pack_fma_csel_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 298{ 299 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 300 assert(I->cmpf < 9); 301 unsigned cmpf = cmpf_table[I->cmpf]; 302 assert(cmpf < 2); 303 if (cmpf == 1) { 304 { unsigned temp = src2; src2 = src3; src3 = temp; } 305 if (cmpf == 1) cmpf = 0; 306 } 307 308 unsigned derived_12 = 0; 309 if (cmpf == 0) derived_12 = 3; 310 else unreachable("No pattern match at pos 12"); 311 312 return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 313} 314 315static inline unsigned 316bi_pack_fma_csel_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 317{ 318 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 319 assert(I->cmpf < 9); 320 unsigned cmpf = cmpf_table[I->cmpf]; 321 assert(cmpf < 4); 322 if ((cmpf == 2) || (cmpf == 3)) { 323 { unsigned temp = src0; src0 = src1; src1 = temp; } 324 if (cmpf == 2) cmpf = 0; 325 else if (cmpf == 3) cmpf = 1; 326 } 327 328 unsigned derived_12 = 0; 329 if (cmpf == 0) derived_12 = 0; 330 else if (cmpf == 1) derived_12 = 1; 331 else unreachable("No pattern match at pos 12"); 332 333 return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 334} 335 336static inline unsigned 337bi_pack_fma_csel_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 338{ 339 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 340 assert(I->cmpf < 9); 341 unsigned cmpf = cmpf_table[I->cmpf]; 342 assert(cmpf < 4); 343 if ((cmpf == 2) || (cmpf == 3)) { 344 { unsigned temp = src0; src0 = src1; src1 = temp; } 345 if (cmpf == 2) cmpf = 0; 346 else if (cmpf == 3) cmpf = 1; 347 } 348 349 unsigned derived_12 = 0; 350 if (cmpf == 0) derived_12 = 0; 351 else if (cmpf == 1) derived_12 = 1; 352 else unreachable("No pattern match at pos 12"); 353 354 return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 355} 356 357static inline unsigned 358bi_pack_fma_csel_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 359{ 360 unsigned cmpf = I->cmpf; 361 assert(cmpf < 8); 362 if ((cmpf == 4) || (cmpf == 5)) { 363 { unsigned temp = src0; src0 = src1; src1 = temp; } 364 if (cmpf == 4) cmpf = 1; 365 else if (cmpf == 5) cmpf = 2; 366 } 367 368 if (cmpf == 3) { 369 { unsigned temp = src2; src2 = src3; src3 = temp; } 370 if (cmpf == 3) cmpf = 0; 371 } 372 373 unsigned derived_12 = 0; 374 if (cmpf == 0) derived_12 = 0; 375 else if (cmpf == 1) derived_12 = 1; 376 else if (cmpf == 2) derived_12 = 2; 377 else unreachable("No pattern match at pos 12"); 378 379 return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 380} 381 382static inline unsigned 383bi_pack_fma_csel_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 384{ 385 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 386 assert(I->cmpf < 9); 387 unsigned cmpf = cmpf_table[I->cmpf]; 388 assert(cmpf < 2); 389 if (cmpf == 1) { 390 { unsigned temp = src2; src2 = src3; src3 = temp; } 391 if (cmpf == 1) cmpf = 0; 392 } 393 394 unsigned derived_12 = 0; 395 if (cmpf == 0) derived_12 = 3; 396 else unreachable("No pattern match at pos 12"); 397 398 return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 399} 400 401static inline unsigned 402bi_pack_fma_csel_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 403{ 404 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 405 assert(I->cmpf < 9); 406 unsigned cmpf = cmpf_table[I->cmpf]; 407 assert(cmpf < 4); 408 if ((cmpf == 2) || (cmpf == 3)) { 409 { unsigned temp = src0; src0 = src1; src1 = temp; } 410 if (cmpf == 2) cmpf = 0; 411 else if (cmpf == 3) cmpf = 1; 412 } 413 414 unsigned derived_12 = 0; 415 if (cmpf == 0) derived_12 = 0; 416 else if (cmpf == 1) derived_12 = 1; 417 else unreachable("No pattern match at pos 12"); 418 419 return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 420} 421 422static inline unsigned 423bi_pack_fma_csel_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 424{ 425 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 426 assert(I->cmpf < 9); 427 unsigned cmpf = cmpf_table[I->cmpf]; 428 assert(cmpf < 4); 429 if ((cmpf == 2) || (cmpf == 3)) { 430 { unsigned temp = src0; src0 = src1; src1 = temp; } 431 if (cmpf == 2) cmpf = 0; 432 else if (cmpf == 3) cmpf = 1; 433 } 434 435 unsigned derived_12 = 0; 436 if (cmpf == 0) derived_12 = 0; 437 else if (cmpf == 1) derived_12 = 1; 438 else unreachable("No pattern match at pos 12"); 439 440 return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); 441} 442 443static inline unsigned 444bi_pack_fma_cubeface1(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 445{ 446 unsigned neg0 = I->src[0].neg; 447 assert(neg0 < 2); 448 unsigned neg1 = I->src[1].neg; 449 assert(neg1 < 2); 450 unsigned neg2 = I->src[2].neg; 451 assert(neg2 < 2); 452 unsigned derived_9 = 0; 453 if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0; 454 else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1; 455 else unreachable("No pattern match at pos 9"); 456 457 return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 458} 459 460static inline unsigned 461bi_pack_fma_dtsel_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 462{ 463 static uint8_t table_table[] = { 2, 1, 0, 3 }; 464 assert(I->table < 4); 465 unsigned table = table_table[I->table]; 466 assert(table < 4); 467 return 0x70f3e0 | (src0 << 0) | (table << 3); 468} 469 470static inline unsigned 471bi_pack_fma_f16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 472{ 473 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 474 assert(I->src[0].swizzle < 13); 475 unsigned lane0 = lane0_table[I->src[0].swizzle]; 476 assert(lane0 < 2); 477 return 0x700d10 | (src0 << 0) | (lane0 << 3); 478} 479 480static inline unsigned 481bi_pack_fma_fadd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 482{ 483 unsigned abs1 = I->src[1].abs; 484 assert(abs1 < 2); 485 unsigned neg0 = I->src[0].neg; 486 assert(neg0 < 2); 487 unsigned neg1 = I->src[1].neg; 488 assert(neg1 < 2); 489 unsigned abs0 = I->src[0].abs; 490 assert(abs0 < 2); 491 unsigned round = I->round; 492 assert(round < 4); 493 unsigned clamp = I->clamp; 494 assert(clamp < 4); 495 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 496 assert(I->src[0].swizzle < 13); 497 unsigned widen0 = widen0_table[I->src[0].swizzle]; 498 assert(widen0 < 4); 499 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 500 assert(I->src[1].swizzle < 13); 501 unsigned widen1 = widen1_table[I->src[1].swizzle]; 502 assert(widen1 < 4); 503 if ((widen0 == 2) && (widen1 == 1)) { 504 { unsigned temp = src0; src0 = src1; src1 = temp; } 505 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 506 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 507 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 508 } 509 510 unsigned derived_9 = 0; 511 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 512 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 513 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 514 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 515 else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; 516 else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; 517 else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; 518 else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; 519 else unreachable("No pattern match at pos 9"); 520 521 return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9); 522} 523 524static inline unsigned 525bi_pack_fma_fadd_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 526{ 527 unsigned abs0 = I->src[0].abs; 528 assert(abs0 < 2); 529 unsigned abs1 = I->src[1].abs; 530 assert(abs1 < 2); 531 unsigned neg0 = I->src[0].neg; 532 assert(neg0 < 2); 533 unsigned neg1 = I->src[1].neg; 534 assert(neg1 < 2); 535 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 536 assert(I->src[0].swizzle < 13); 537 unsigned swz0 = swz0_table[I->src[0].swizzle]; 538 assert(swz0 < 4); 539 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 540 assert(I->src[1].swizzle < 13); 541 unsigned swz1 = swz1_table[I->src[1].swizzle]; 542 assert(swz1 < 4); 543 unsigned round = I->round; 544 assert(round < 4); 545 unsigned clamp = I->clamp; 546 assert(clamp < 4); 547 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 548 { unsigned temp = src0; src0 = src1; src1 = temp; } 549 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 550 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 551 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 552 } 553 554 unsigned derived_6 = 0; 555 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 556 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 557 else unreachable("No pattern match at pos 6"); 558 559 return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6); 560} 561 562static inline unsigned 563bi_pack_fma_fadd_lscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 564{ 565 unsigned abs0 = I->src[0].abs; 566 assert(abs0 < 2); 567 unsigned neg0 = I->src[0].neg; 568 assert(neg0 < 2); 569 unsigned abs1 = I->src[1].abs; 570 assert(abs1 < 2); 571 unsigned neg1 = I->src[1].neg; 572 assert(neg1 < 2); 573 return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9); 574} 575 576static inline unsigned 577bi_pack_fma_fcmp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 578{ 579 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 580 assert(I->src[0].swizzle < 13); 581 unsigned widen0 = widen0_table[I->src[0].swizzle]; 582 assert(widen0 < 4); 583 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 584 assert(I->src[1].swizzle < 13); 585 unsigned widen1 = widen1_table[I->src[1].swizzle]; 586 assert(widen1 < 4); 587 unsigned abs1 = I->src[1].abs; 588 assert(abs1 < 2); 589 unsigned neg0 = I->src[0].neg; 590 assert(neg0 < 2); 591 unsigned neg1 = I->src[1].neg; 592 assert(neg1 < 2); 593 unsigned abs0 = I->src[0].abs; 594 assert(abs0 < 2); 595 static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 596 assert(I->cmpf < 9); 597 unsigned cmpf = cmpf_table[I->cmpf]; 598 assert(cmpf < 8); 599 unsigned result_type = I->result_type; 600 assert(result_type < 4); 601 if ((widen0 == 2) && (widen1 == 1)) { 602 { unsigned temp = src0; src0 = src1; src1 = temp; } 603 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 604 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 605 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 606 if (cmpf == 4) cmpf = 1; 607 else if (cmpf == 5) cmpf = 2; 608 else if (cmpf == 1) cmpf = 4; 609 else if (cmpf == 2) cmpf = 5; 610 } 611 612 unsigned derived_9 = 0; 613 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 614 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 615 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 616 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 617 else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; 618 else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; 619 else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; 620 else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; 621 else unreachable("No pattern match at pos 9"); 622 623 return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9); 624} 625 626static inline unsigned 627bi_pack_fma_fcmp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 628{ 629 unsigned abs0 = I->src[0].abs; 630 assert(abs0 < 2); 631 unsigned abs1 = I->src[1].abs; 632 assert(abs1 < 2); 633 static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 634 assert(I->cmpf < 9); 635 unsigned cmpf = cmpf_table[I->cmpf]; 636 assert(cmpf < 8); 637 unsigned neg0 = I->src[0].neg; 638 assert(neg0 < 2); 639 unsigned neg1 = I->src[1].neg; 640 assert(neg1 < 2); 641 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 642 assert(I->src[0].swizzle < 13); 643 unsigned swz0 = swz0_table[I->src[0].swizzle]; 644 assert(swz0 < 4); 645 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 646 assert(I->src[1].swizzle < 13); 647 unsigned swz1 = swz1_table[I->src[1].swizzle]; 648 assert(swz1 < 4); 649 unsigned result_type = I->result_type; 650 assert(result_type < 4); 651 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 652 { unsigned temp = src0; src0 = src1; src1 = temp; } 653 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 654 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 655 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 656 if (cmpf == 4) cmpf = 1; 657 else if (cmpf == 5) cmpf = 2; 658 else if (cmpf == 1) cmpf = 4; 659 else if (cmpf == 2) cmpf = 5; 660 } 661 662 unsigned derived_6 = 0; 663 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 664 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 665 else unreachable("No pattern match at pos 6"); 666 667 unsigned derived_13 = 0; 668 if (cmpf == 0) derived_13 = 0; 669 else if (cmpf == 1) derived_13 = 1; 670 else if (cmpf == 2) derived_13 = 2; 671 else if (cmpf == 3) derived_13 = 3; 672 else if (cmpf == 4) derived_13 = 4; 673 else if (cmpf == 5) derived_13 = 5; 674 else if (cmpf == 6) derived_13 = 6; 675 else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7; 676 else unreachable("No pattern match at pos 13"); 677 678 return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13); 679} 680 681static inline unsigned 682bi_pack_fma_flshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 683{ 684 unsigned bytes2 = I->bytes2; 685 assert(bytes2 < 2); 686 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 687 assert(I->src[2].swizzle < 13); 688 unsigned lane2 = lane2_table[I->src[2].swizzle]; 689 assert(lane2 < 2); 690 return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); 691} 692 693static inline unsigned 694bi_pack_fma_fma_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 695{ 696 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 697 assert(I->src[0].swizzle < 13); 698 unsigned widen0 = widen0_table[I->src[0].swizzle]; 699 assert(widen0 < 4); 700 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 701 assert(I->src[1].swizzle < 13); 702 unsigned widen1 = widen1_table[I->src[1].swizzle]; 703 assert(widen1 < 4); 704 unsigned neg0 = I->src[0].neg; 705 assert(neg0 < 2); 706 unsigned neg1 = I->src[1].neg; 707 assert(neg1 < 2); 708 unsigned abs0 = I->src[0].abs; 709 assert(abs0 < 2); 710 unsigned round = I->round; 711 assert(round < 4); 712 unsigned clamp = I->clamp; 713 assert(clamp < 4); 714 unsigned abs1 = I->src[1].abs; 715 assert(abs1 < 2); 716 unsigned neg2 = I->src[2].neg; 717 assert(neg2 < 2); 718 unsigned abs2 = I->src[2].abs; 719 assert(abs2 < 2); 720 if ((widen0 == 2) && (widen1 == 1)) { 721 { unsigned temp = src0; src0 = src1; src1 = temp; } 722 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 723 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 724 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 725 } 726 727 unsigned derived_9 = 0; 728 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 729 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 730 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 731 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 732 else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; 733 else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; 734 else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; 735 else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; 736 else unreachable("No pattern match at pos 9"); 737 738 unsigned derived_17 = 0; 739 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; 740 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; 741 else unreachable("No pattern match at pos 17"); 742 743 return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17); 744} 745 746static inline unsigned 747bi_pack_fma_fma_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 748{ 749 unsigned neg0 = I->src[0].neg; 750 assert(neg0 < 2); 751 unsigned neg1 = I->src[1].neg; 752 assert(neg1 < 2); 753 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 754 assert(I->src[0].swizzle < 13); 755 unsigned swz0 = swz0_table[I->src[0].swizzle]; 756 assert(swz0 < 4); 757 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 758 assert(I->src[1].swizzle < 13); 759 unsigned swz1 = swz1_table[I->src[1].swizzle]; 760 assert(swz1 < 4); 761 unsigned round = I->round; 762 assert(round < 4); 763 unsigned clamp = I->clamp; 764 assert(clamp < 4); 765 unsigned neg2 = I->src[2].neg; 766 assert(neg2 < 2); 767 static uint8_t swz2_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 768 assert(I->src[2].swizzle < 13); 769 unsigned swz2 = swz2_table[I->src[2].swizzle]; 770 assert(swz2 < 4); 771 unsigned derived_17 = 0; 772 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; 773 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; 774 else unreachable("No pattern match at pos 17"); 775 776 return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17); 777} 778 779static inline unsigned 780bi_pack_fma_fma_rscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 781{ 782 static uint8_t round_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 783 assert(I->round < 9); 784 unsigned round = round_table[I->round]; 785 assert(round < 2); 786 unsigned clamp = I->clamp; 787 assert(clamp < 4); 788 unsigned neg0 = I->src[0].neg; 789 assert(neg0 < 2); 790 unsigned neg1 = I->src[1].neg; 791 assert(neg1 < 2); 792 unsigned abs0 = I->src[0].abs; 793 assert(abs0 < 2); 794 unsigned neg2 = I->src[2].neg; 795 assert(neg2 < 2); 796 unsigned special = I->special; 797 assert(special < 4); 798 unsigned derived_16 = 0; 799 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; 800 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; 801 else unreachable("No pattern match at pos 16"); 802 803 unsigned derived_12 = 0; 804 if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; 805 else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; 806 else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; 807 else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; 808 else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; 809 else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; 810 else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6; 811 else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; 812 else unreachable("No pattern match at pos 12"); 813 814 return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); 815} 816 817static inline unsigned 818bi_pack_fma_fma_rscale_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 819{ 820 static uint8_t round_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 821 assert(I->round < 9); 822 unsigned round = round_table[I->round]; 823 assert(round < 2); 824 unsigned clamp = I->clamp; 825 assert(clamp < 4); 826 unsigned neg0 = I->src[0].neg; 827 assert(neg0 < 2); 828 unsigned neg1 = I->src[1].neg; 829 assert(neg1 < 2); 830 unsigned abs0 = I->src[0].abs; 831 assert(abs0 < 2); 832 unsigned neg2 = I->src[2].neg; 833 assert(neg2 < 2); 834 unsigned special = I->special; 835 assert(special < 4); 836 unsigned derived_16 = 0; 837 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; 838 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; 839 else unreachable("No pattern match at pos 16"); 840 841 unsigned derived_12 = 0; 842 if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; 843 else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; 844 else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; 845 else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; 846 else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; 847 else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; 848 else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; 849 else unreachable("No pattern match at pos 12"); 850 851 return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); 852} 853 854static inline unsigned 855bi_pack_fma_fmul_cslice(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 856{ 857 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 858 assert(I->src[0].swizzle < 13); 859 unsigned lane0 = lane0_table[I->src[0].swizzle]; 860 assert(lane0 < 2); 861 unsigned abs0 = I->src[0].abs; 862 assert(abs0 < 2); 863 unsigned neg0 = I->src[0].neg; 864 assert(neg0 < 2); 865 return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8); 866} 867 868static inline unsigned 869bi_pack_fma_fmul_slice_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 870{ 871 872 return 0x70cb40 | (src0 << 0) | (src1 << 3); 873} 874 875static inline unsigned 876bi_pack_fma_frexpe_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 877{ 878 unsigned neg0 = I->src[0].neg; 879 assert(neg0 < 2); 880 unsigned sqrt = I->sqrt; 881 assert(sqrt < 2); 882 unsigned log = I->log; 883 assert(log < 2); 884 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 885 assert(I->src[0].swizzle < 13); 886 unsigned widen0 = widen0_table[I->src[0].swizzle]; 887 assert(widen0 < 4); 888 if (log == 0) { 889 return 0x701c20 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (widen0 << 3); 890 } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 891 return 0x701e20 | (src0 << 0) | (widen0 << 3); 892 } else { 893 unreachable("No matching state found in fma_frexpe_f32"); 894 } 895} 896 897static inline unsigned 898bi_pack_fma_frexpe_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 899{ 900 unsigned neg0 = I->src[0].neg; 901 assert(neg0 < 2); 902 unsigned sqrt = I->sqrt; 903 assert(sqrt < 2); 904 unsigned log = I->log; 905 assert(log < 2); 906 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 907 assert(I->src[0].swizzle < 13); 908 unsigned swz0 = swz0_table[I->src[0].swizzle]; 909 assert(swz0 < 4); 910 if (log == 0) { 911 return 0x701c00 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (swz0 << 3); 912 } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 913 return 0x701e00 | (src0 << 0) | (swz0 << 3); 914 } else { 915 unreachable("No matching state found in fma_frexpe_v2f16"); 916 } 917} 918 919static inline unsigned 920bi_pack_fma_frexpm_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 921{ 922 unsigned abs0 = I->src[0].abs; 923 assert(abs0 < 2); 924 unsigned sqrt = I->sqrt; 925 assert(sqrt < 2); 926 unsigned log = I->log; 927 assert(log < 2); 928 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 929 assert(I->src[0].swizzle < 13); 930 unsigned widen0 = widen0_table[I->src[0].swizzle]; 931 assert(widen0 < 4); 932 unsigned neg0 = I->src[0].neg; 933 assert(neg0 < 2); 934 if ((log == 0) && (neg0 == 0)) { 935 return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); 936 } else if ((log == 1) && (sqrt == 0)) { 937 return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); 938 } else { 939 unreachable("No matching state found in fma_frexpm_f32"); 940 } 941} 942 943static inline unsigned 944bi_pack_fma_frexpm_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 945{ 946 unsigned abs0 = I->src[0].abs; 947 assert(abs0 < 2); 948 unsigned sqrt = I->sqrt; 949 assert(sqrt < 2); 950 unsigned log = I->log; 951 assert(log < 2); 952 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 953 assert(I->src[0].swizzle < 13); 954 unsigned swz0 = swz0_table[I->src[0].swizzle]; 955 assert(swz0 < 4); 956 unsigned neg0 = I->src[0].neg; 957 assert(neg0 < 2); 958 if ((log == 0) && (neg0 == 0)) { 959 return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); 960 } else if ((log == 1) && (sqrt == 0)) { 961 return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); 962 } else { 963 unreachable("No matching state found in fma_frexpm_v2f16"); 964 } 965} 966 967static inline unsigned 968bi_pack_fma_fround_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 969{ 970 unsigned abs0 = I->src[0].abs; 971 assert(abs0 < 2); 972 unsigned neg0 = I->src[0].neg; 973 assert(neg0 < 2); 974 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 975 assert(I->src[0].swizzle < 13); 976 unsigned widen0 = widen0_table[I->src[0].swizzle]; 977 assert(widen0 < 4); 978 unsigned round = I->round; 979 assert(round < 8); 980 if (round != 4) { 981 unsigned derived_9 = 0; 982 if (round == 0) derived_9 = 0; 983 else if (round == 1) derived_9 = 1; 984 else if (round == 2) derived_9 = 2; 985 else if (round == 3) derived_9 = 3; 986 else unreachable("No pattern match at pos 9"); 987 988 return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9); 989 } else if (round == 4) { 990 return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3); 991 } else { 992 unreachable("No matching state found in fma_fround_f32"); 993 } 994} 995 996static inline unsigned 997bi_pack_fma_fround_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 998{ 999 unsigned abs0 = I->src[0].abs; 1000 assert(abs0 < 2); 1001 unsigned neg0 = I->src[0].neg; 1002 assert(neg0 < 2); 1003 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1004 assert(I->src[0].swizzle < 13); 1005 unsigned swz0 = swz0_table[I->src[0].swizzle]; 1006 assert(swz0 < 4); 1007 unsigned round = I->round; 1008 assert(round < 8); 1009 if (round != 4) { 1010 unsigned derived_9 = 0; 1011 if (round == 0) derived_9 = 0; 1012 else if (round == 1) derived_9 = 1; 1013 else if (round == 2) derived_9 = 2; 1014 else if (round == 3) derived_9 = 3; 1015 else unreachable("No pattern match at pos 9"); 1016 1017 return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9); 1018 } else if (round == 4) { 1019 return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3); 1020 } else { 1021 unreachable("No matching state found in fma_fround_v2f16"); 1022 } 1023} 1024 1025static inline unsigned 1026bi_pack_fma_frshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1027{ 1028 unsigned bytes2 = I->bytes2; 1029 assert(bytes2 < 2); 1030 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1031 assert(I->src[2].swizzle < 13); 1032 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1033 assert(lane2 < 2); 1034 return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); 1035} 1036 1037static inline unsigned 1038bi_pack_fma_iaddc_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1039{ 1040 1041 return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 1042} 1043 1044static inline unsigned 1045bi_pack_fma_idp_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1046{ 1047 unsigned sign0 = I->src[0].abs; 1048 assert(sign0 < 2); 1049 unsigned sign1 = I->src[1].abs; 1050 assert(sign1 < 2); 1051 return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10); 1052} 1053 1054static inline unsigned 1055bi_pack_fma_imul_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1056{ 1057 static uint8_t widen1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 1058 assert(I->src[1].swizzle < 13); 1059 unsigned widen1 = widen1_table[I->src[1].swizzle]; 1060 assert(widen1 < 8); 1061 unsigned extend = I->extend; 1062 assert(extend < 4); 1063 if ((extend == 0) && (widen1 == 0)) { 1064 return 0x73c0c0 | (src0 << 0) | (src1 << 3); 1065 } else if ((extend != 0) && ((widen1 == 1) || (widen1 == 2))) { 1066 unsigned derived_9 = 0; 1067 if (widen1 == 1) derived_9 = 0; 1068 else if (widen1 == 2) derived_9 = 1; 1069 else unreachable("No pattern match at pos 9"); 1070 1071 unsigned derived_10 = 0; 1072 if (extend == 2) derived_10 = 0; 1073 else if (extend == 1) derived_10 = 1; 1074 else unreachable("No pattern match at pos 10"); 1075 1076 return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10); 1077 } else if ((extend != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) { 1078 unsigned derived_9 = 0; 1079 if (widen1 == 3) derived_9 = 0; 1080 else if (widen1 == 4) derived_9 = 1; 1081 else if (widen1 == 5) derived_9 = 2; 1082 else if (widen1 == 6) derived_9 = 3; 1083 else unreachable("No pattern match at pos 9"); 1084 1085 unsigned derived_11 = 0; 1086 if (extend == 2) derived_11 = 0; 1087 else if (extend == 1) derived_11 = 1; 1088 else unreachable("No pattern match at pos 11"); 1089 1090 return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11); 1091 } else { 1092 unreachable("No matching state found in fma_imul_i32"); 1093 } 1094} 1095 1096static inline unsigned 1097bi_pack_fma_imul_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1098{ 1099 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1100 assert(I->src[0].swizzle < 13); 1101 unsigned swz0 = swz0_table[I->src[0].swizzle]; 1102 assert(swz0 < 4); 1103 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1104 assert(I->src[1].swizzle < 13); 1105 unsigned swz1 = swz1_table[I->src[1].swizzle]; 1106 assert(swz1 < 4); 1107 return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11); 1108} 1109 1110static inline unsigned 1111bi_pack_fma_imul_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1112{ 1113 static uint8_t replicate0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1114 assert(I->src[0].swizzle < 13); 1115 unsigned replicate0 = replicate0_table[I->src[0].swizzle]; 1116 assert(replicate0 < 8); 1117 static uint8_t replicate1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1118 assert(I->src[1].swizzle < 13); 1119 unsigned replicate1 = replicate1_table[I->src[1].swizzle]; 1120 assert(replicate1 < 8); 1121 if ((replicate0 == 0) && (replicate1 == 0)) { 1122 return 0x73e0c0 | (src0 << 0) | (src1 << 3); 1123 } else if ((replicate0 == 0) && (replicate1 != 0)) { 1124 unsigned derived_9 = 0; 1125 if (replicate1 == 1) derived_9 = 0; 1126 else if (replicate1 == 2) derived_9 = 1; 1127 else if (replicate1 == 3) derived_9 = 2; 1128 else if (replicate1 == 4) derived_9 = 3; 1129 else unreachable("No pattern match at pos 9"); 1130 1131 return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9); 1132 } else { 1133 unreachable("No matching state found in fma_imul_v4i8"); 1134 } 1135} 1136 1137static inline unsigned 1138bi_pack_fma_imuld(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1139{ 1140 assert((1 << src0) & 0x33); 1141 assert((1 << src1) & 0x33); 1142 unsigned threads = I->threads; 1143 assert(threads < 2); 1144 return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6); 1145} 1146 1147static inline unsigned 1148bi_pack_fma_isubb_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1149{ 1150 1151 return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 1152} 1153 1154static inline unsigned 1155bi_pack_fma_jump_ex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1156{ 1157 unsigned test_mode = I->test_mode; 1158 assert(test_mode < 2); 1159 unsigned stack_mode = I->stack_mode; 1160 assert(stack_mode < 4); 1161 return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10); 1162} 1163 1164static inline unsigned 1165bi_pack_fma_lrot_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1166{ 1167 unsigned bytes2 = I->bytes2; 1168 assert(bytes2 < 2); 1169 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1170 assert(I->src[2].swizzle < 13); 1171 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1172 assert(lane2 < 2); 1173 unsigned result_word = I->result_word; 1174 assert(result_word < 2); 1175 return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 1176} 1177 1178static inline unsigned 1179bi_pack_fma_lshift_and_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1180{ 1181 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1182 assert(I->src[2].swizzle < 13); 1183 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1184 assert(lane2 < 4); 1185 unsigned not1 = I->src[1].neg; 1186 assert(not1 < 2); 1187 static uint8_t not_result_table[] = { 1, 0 }; 1188 unsigned not_result = not_result_table[I->not_result]; 1189 assert(not_result < 2); 1190 return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 1191} 1192 1193static inline unsigned 1194bi_pack_fma_lshift_and_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1195{ 1196 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 1197 assert(I->src[2].swizzle < 13); 1198 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1199 assert(lanes2 < 8); 1200 unsigned not1 = I->src[1].neg; 1201 assert(not1 < 2); 1202 static uint8_t not_result_table[] = { 1, 0 }; 1203 unsigned not_result = not_result_table[I->not_result]; 1204 assert(not_result < 2); 1205 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 1206 unsigned derived_9 = 0; 1207 if (lanes2 == 0) derived_9 = 0; 1208 else if (lanes2 == 1) derived_9 = 1; 1209 else if (lanes2 == 2) derived_9 = 2; 1210 else if (lanes2 == 3) derived_9 = 3; 1211 else unreachable("No pattern match at pos 9"); 1212 1213 return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1214 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 1215 unsigned derived_9 = 0; 1216 if (lanes2 == 4) derived_9 = 1; 1217 else if (lanes2 == 5) derived_9 = 2; 1218 else if (lanes2 == 6) derived_9 = 3; 1219 else unreachable("No pattern match at pos 9"); 1220 1221 return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1222 } else { 1223 unreachable("No matching state found in fma_lshift_and_v2i16"); 1224 } 1225} 1226 1227static inline unsigned 1228bi_pack_fma_lshift_and_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1229{ 1230 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1231 assert(I->src[2].swizzle < 13); 1232 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1233 assert(lanes2 < 8); 1234 unsigned not1 = I->src[1].neg; 1235 assert(not1 < 2); 1236 static uint8_t not_result_table[] = { 1, 0 }; 1237 unsigned not_result = not_result_table[I->not_result]; 1238 assert(not_result < 2); 1239 if (lanes2 != 0) { 1240 unsigned derived_9 = 0; 1241 if (lanes2 == 1) derived_9 = 0; 1242 else if (lanes2 == 2) derived_9 = 1; 1243 else if (lanes2 == 3) derived_9 = 2; 1244 else if (lanes2 == 4) derived_9 = 3; 1245 else unreachable("No pattern match at pos 9"); 1246 1247 return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1248 } else if (lanes2 == 0) { 1249 return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 1250 } else { 1251 unreachable("No matching state found in fma_lshift_and_v4i8"); 1252 } 1253} 1254 1255static inline unsigned 1256bi_pack_fma_lshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1257{ 1258 unsigned bytes2 = I->bytes2; 1259 assert(bytes2 < 2); 1260 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1261 assert(I->src[2].swizzle < 13); 1262 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1263 assert(lane2 < 2); 1264 unsigned result_word = I->result_word; 1265 assert(result_word < 2); 1266 return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 1267} 1268 1269static inline unsigned 1270bi_pack_fma_lshift_or_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1271{ 1272 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1273 assert(I->src[2].swizzle < 13); 1274 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1275 assert(lane2 < 4); 1276 static uint8_t not1_table[] = { 1, 0 }; 1277 unsigned not1 = not1_table[I->src[1].neg]; 1278 assert(not1 < 2); 1279 unsigned not_result = I->not_result; 1280 assert(not_result < 2); 1281 return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 1282} 1283 1284static inline unsigned 1285bi_pack_fma_lshift_or_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1286{ 1287 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 1288 assert(I->src[2].swizzle < 13); 1289 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1290 assert(lanes2 < 8); 1291 static uint8_t not1_table[] = { 1, 0 }; 1292 unsigned not1 = not1_table[I->src[1].neg]; 1293 assert(not1 < 2); 1294 unsigned not_result = I->not_result; 1295 assert(not_result < 2); 1296 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 1297 unsigned derived_9 = 0; 1298 if (lanes2 == 0) derived_9 = 0; 1299 else if (lanes2 == 1) derived_9 = 1; 1300 else if (lanes2 == 2) derived_9 = 2; 1301 else if (lanes2 == 3) derived_9 = 3; 1302 else unreachable("No pattern match at pos 9"); 1303 1304 return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1305 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 1306 unsigned derived_9 = 0; 1307 if (lanes2 == 4) derived_9 = 1; 1308 else if (lanes2 == 5) derived_9 = 2; 1309 else if (lanes2 == 6) derived_9 = 3; 1310 else unreachable("No pattern match at pos 9"); 1311 1312 return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1313 } else { 1314 unreachable("No matching state found in fma_lshift_or_v2i16"); 1315 } 1316} 1317 1318static inline unsigned 1319bi_pack_fma_lshift_or_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1320{ 1321 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1322 assert(I->src[2].swizzle < 13); 1323 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1324 assert(lanes2 < 8); 1325 static uint8_t not1_table[] = { 1, 0 }; 1326 unsigned not1 = not1_table[I->src[1].neg]; 1327 assert(not1 < 2); 1328 unsigned not_result = I->not_result; 1329 assert(not_result < 2); 1330 if (lanes2 != 0) { 1331 unsigned derived_9 = 0; 1332 if (lanes2 == 1) derived_9 = 0; 1333 else if (lanes2 == 2) derived_9 = 1; 1334 else if (lanes2 == 3) derived_9 = 2; 1335 else if (lanes2 == 4) derived_9 = 3; 1336 else unreachable("No pattern match at pos 9"); 1337 1338 return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1339 } else if (lanes2 == 0) { 1340 return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 1341 } else { 1342 unreachable("No matching state found in fma_lshift_or_v4i8"); 1343 } 1344} 1345 1346static inline unsigned 1347bi_pack_fma_lshift_xor_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1348{ 1349 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1350 assert(I->src[2].swizzle < 13); 1351 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1352 assert(lane2 < 4); 1353 unsigned not_result = I->not_result; 1354 assert(not_result < 2); 1355 return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); 1356} 1357 1358static inline unsigned 1359bi_pack_fma_lshift_xor_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1360{ 1361 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 1362 assert(I->src[2].swizzle < 13); 1363 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1364 assert(lanes2 < 8); 1365 unsigned not_result = I->not_result; 1366 assert(not_result < 2); 1367 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 1368 unsigned derived_9 = 0; 1369 if (lanes2 == 0) derived_9 = 0; 1370 else if (lanes2 == 1) derived_9 = 1; 1371 else if (lanes2 == 2) derived_9 = 2; 1372 else if (lanes2 == 3) derived_9 = 3; 1373 else unreachable("No pattern match at pos 9"); 1374 1375 return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 1376 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 1377 unsigned derived_9 = 0; 1378 if (lanes2 == 4) derived_9 = 1; 1379 else if (lanes2 == 5) derived_9 = 2; 1380 else if (lanes2 == 6) derived_9 = 3; 1381 else unreachable("No pattern match at pos 9"); 1382 1383 return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 1384 } else { 1385 unreachable("No matching state found in fma_lshift_xor_v2i16"); 1386 } 1387} 1388 1389static inline unsigned 1390bi_pack_fma_lshift_xor_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1391{ 1392 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1393 assert(I->src[2].swizzle < 13); 1394 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1395 assert(lanes2 < 8); 1396 unsigned not_result = I->not_result; 1397 assert(not_result < 2); 1398 if (lanes2 != 0) { 1399 unsigned derived_9 = 0; 1400 if (lanes2 == 1) derived_9 = 0; 1401 else if (lanes2 == 2) derived_9 = 1; 1402 else if (lanes2 == 3) derived_9 = 2; 1403 else if (lanes2 == 4) derived_9 = 3; 1404 else unreachable("No pattern match at pos 9"); 1405 1406 return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 1407 } else if (lanes2 == 0) { 1408 return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); 1409 } else { 1410 unreachable("No matching state found in fma_lshift_xor_v4i8"); 1411 } 1412} 1413 1414static inline unsigned 1415bi_pack_fma_mkvec_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1416{ 1417 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1418 assert(I->src[0].swizzle < 13); 1419 unsigned lane0 = lane0_table[I->src[0].swizzle]; 1420 assert(lane0 < 2); 1421 static uint8_t lane1_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1422 assert(I->src[1].swizzle < 13); 1423 unsigned lane1 = lane1_table[I->src[1].swizzle]; 1424 assert(lane1 < 2); 1425 return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); 1426} 1427 1428static inline unsigned 1429bi_pack_fma_mkvec_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1430{ 1431 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1432 assert(I->src[0].swizzle < 13); 1433 unsigned lane0 = lane0_table[I->src[0].swizzle]; 1434 assert(lane0 < 2); 1435 static uint8_t lane1_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1436 assert(I->src[1].swizzle < 13); 1437 unsigned lane1 = lane1_table[I->src[1].swizzle]; 1438 assert(lane1 < 2); 1439 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1440 assert(I->src[2].swizzle < 13); 1441 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1442 assert(lane2 < 2); 1443 static uint8_t lane3_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1444 assert(I->src[3].swizzle < 13); 1445 unsigned lane3 = lane3_table[I->src[3].swizzle]; 1446 assert(lane3 < 2); 1447 return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15); 1448} 1449 1450static inline unsigned 1451bi_pack_fma_mov_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1452{ 1453 1454 return 0x701968 | (src0 << 0); 1455} 1456 1457static inline unsigned 1458bi_pack_fma_nop(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1459{ 1460 1461 return 0x701963; 1462} 1463 1464static inline unsigned 1465bi_pack_fma_popcount_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1466{ 1467 1468 return 0x73c6d8 | (src0 << 0); 1469} 1470 1471static inline unsigned 1472bi_pack_fma_quiet_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1473{ 1474 1475 return 0x701970 | (src0 << 0); 1476} 1477 1478static inline unsigned 1479bi_pack_fma_quiet_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1480{ 1481 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1482 assert(I->src[0].swizzle < 13); 1483 unsigned swz0 = swz0_table[I->src[0].swizzle]; 1484 assert(swz0 < 4); 1485 return 0x701900 | (src0 << 0) | (swz0 << 4); 1486} 1487 1488static inline unsigned 1489bi_pack_fma_rrot_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1490{ 1491 unsigned bytes2 = I->bytes2; 1492 assert(bytes2 < 2); 1493 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1494 assert(I->src[2].swizzle < 13); 1495 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1496 assert(lane2 < 2); 1497 unsigned result_word = I->result_word; 1498 assert(result_word < 2); 1499 return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 1500} 1501 1502static inline unsigned 1503bi_pack_fma_rshift_and_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1504{ 1505 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1506 assert(I->src[2].swizzle < 13); 1507 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1508 assert(lane2 < 4); 1509 unsigned not1 = I->src[1].neg; 1510 assert(not1 < 2); 1511 static uint8_t not_result_table[] = { 1, 0 }; 1512 unsigned not_result = not_result_table[I->not_result]; 1513 assert(not_result < 2); 1514 return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 1515} 1516 1517static inline unsigned 1518bi_pack_fma_rshift_and_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1519{ 1520 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 1521 assert(I->src[2].swizzle < 13); 1522 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1523 assert(lanes2 < 8); 1524 unsigned not1 = I->src[1].neg; 1525 assert(not1 < 2); 1526 static uint8_t not_result_table[] = { 1, 0 }; 1527 unsigned not_result = not_result_table[I->not_result]; 1528 assert(not_result < 2); 1529 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 1530 unsigned derived_9 = 0; 1531 if (lanes2 == 0) derived_9 = 0; 1532 else if (lanes2 == 1) derived_9 = 1; 1533 else if (lanes2 == 2) derived_9 = 2; 1534 else if (lanes2 == 3) derived_9 = 3; 1535 else unreachable("No pattern match at pos 9"); 1536 1537 return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1538 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 1539 unsigned derived_9 = 0; 1540 if (lanes2 == 4) derived_9 = 1; 1541 else if (lanes2 == 5) derived_9 = 2; 1542 else if (lanes2 == 6) derived_9 = 3; 1543 else unreachable("No pattern match at pos 9"); 1544 1545 return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1546 } else { 1547 unreachable("No matching state found in fma_rshift_and_v2i16"); 1548 } 1549} 1550 1551static inline unsigned 1552bi_pack_fma_rshift_and_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1553{ 1554 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1555 assert(I->src[2].swizzle < 13); 1556 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1557 assert(lanes2 < 8); 1558 unsigned not1 = I->src[1].neg; 1559 assert(not1 < 2); 1560 static uint8_t not_result_table[] = { 1, 0 }; 1561 unsigned not_result = not_result_table[I->not_result]; 1562 assert(not_result < 2); 1563 if (lanes2 != 0) { 1564 unsigned derived_9 = 0; 1565 if (lanes2 == 1) derived_9 = 0; 1566 else if (lanes2 == 2) derived_9 = 1; 1567 else if (lanes2 == 3) derived_9 = 2; 1568 else if (lanes2 == 4) derived_9 = 3; 1569 else unreachable("No pattern match at pos 9"); 1570 1571 return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1572 } else if (lanes2 == 0) { 1573 return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 1574 } else { 1575 unreachable("No matching state found in fma_rshift_and_v4i8"); 1576 } 1577} 1578 1579static inline unsigned 1580bi_pack_fma_rshift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1581{ 1582 unsigned bytes2 = I->bytes2; 1583 assert(bytes2 < 2); 1584 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 1585 assert(I->src[2].swizzle < 13); 1586 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1587 assert(lane2 < 2); 1588 unsigned result_word = I->result_word; 1589 assert(result_word < 2); 1590 return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); 1591} 1592 1593static inline unsigned 1594bi_pack_fma_rshift_or_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1595{ 1596 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1597 assert(I->src[2].swizzle < 13); 1598 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1599 assert(lane2 < 4); 1600 static uint8_t not1_table[] = { 1, 0 }; 1601 unsigned not1 = not1_table[I->src[1].neg]; 1602 assert(not1 < 2); 1603 unsigned not_result = I->not_result; 1604 assert(not_result < 2); 1605 return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); 1606} 1607 1608static inline unsigned 1609bi_pack_fma_rshift_or_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1610{ 1611 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 1612 assert(I->src[2].swizzle < 13); 1613 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1614 assert(lanes2 < 8); 1615 static uint8_t not1_table[] = { 1, 0 }; 1616 unsigned not1 = not1_table[I->src[1].neg]; 1617 assert(not1 < 2); 1618 unsigned not_result = I->not_result; 1619 assert(not_result < 2); 1620 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 1621 unsigned derived_9 = 0; 1622 if (lanes2 == 0) derived_9 = 0; 1623 else if (lanes2 == 1) derived_9 = 1; 1624 else if (lanes2 == 2) derived_9 = 2; 1625 else if (lanes2 == 3) derived_9 = 3; 1626 else unreachable("No pattern match at pos 9"); 1627 1628 return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1629 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 1630 unsigned derived_9 = 0; 1631 if (lanes2 == 4) derived_9 = 1; 1632 else if (lanes2 == 5) derived_9 = 2; 1633 else if (lanes2 == 6) derived_9 = 3; 1634 else unreachable("No pattern match at pos 9"); 1635 1636 return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1637 } else { 1638 unreachable("No matching state found in fma_rshift_or_v2i16"); 1639 } 1640} 1641 1642static inline unsigned 1643bi_pack_fma_rshift_or_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1644{ 1645 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1646 assert(I->src[2].swizzle < 13); 1647 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1648 assert(lanes2 < 8); 1649 static uint8_t not1_table[] = { 1, 0 }; 1650 unsigned not1 = not1_table[I->src[1].neg]; 1651 assert(not1 < 2); 1652 unsigned not_result = I->not_result; 1653 assert(not_result < 2); 1654 if (lanes2 != 0) { 1655 unsigned derived_9 = 0; 1656 if (lanes2 == 1) derived_9 = 0; 1657 else if (lanes2 == 2) derived_9 = 1; 1658 else if (lanes2 == 3) derived_9 = 2; 1659 else if (lanes2 == 4) derived_9 = 3; 1660 else unreachable("No pattern match at pos 9"); 1661 1662 return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); 1663 } else if (lanes2 == 0) { 1664 return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); 1665 } else { 1666 unreachable("No matching state found in fma_rshift_or_v4i8"); 1667 } 1668} 1669 1670static inline unsigned 1671bi_pack_fma_rshift_xor_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1672{ 1673 static uint8_t lane2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1674 assert(I->src[2].swizzle < 13); 1675 unsigned lane2 = lane2_table[I->src[2].swizzle]; 1676 assert(lane2 < 4); 1677 unsigned not_result = I->not_result; 1678 assert(not_result < 2); 1679 return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); 1680} 1681 1682static inline unsigned 1683bi_pack_fma_rshift_xor_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1684{ 1685 static uint8_t lanes2_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, ~0, ~0, 6 }; 1686 assert(I->src[2].swizzle < 13); 1687 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1688 assert(lanes2 < 8); 1689 unsigned not_result = I->not_result; 1690 assert(not_result < 2); 1691 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { 1692 unsigned derived_9 = 0; 1693 if (lanes2 == 0) derived_9 = 0; 1694 else if (lanes2 == 1) derived_9 = 1; 1695 else if (lanes2 == 2) derived_9 = 2; 1696 else if (lanes2 == 3) derived_9 = 3; 1697 else unreachable("No pattern match at pos 9"); 1698 1699 return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 1700 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { 1701 unsigned derived_9 = 0; 1702 if (lanes2 == 4) derived_9 = 1; 1703 else if (lanes2 == 5) derived_9 = 2; 1704 else if (lanes2 == 6) derived_9 = 3; 1705 else unreachable("No pattern match at pos 9"); 1706 1707 return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 1708 } else { 1709 unreachable("No matching state found in fma_rshift_xor_v2i16"); 1710 } 1711} 1712 1713static inline unsigned 1714bi_pack_fma_rshift_xor_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1715{ 1716 static uint8_t lanes2_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 1717 assert(I->src[2].swizzle < 13); 1718 unsigned lanes2 = lanes2_table[I->src[2].swizzle]; 1719 assert(lanes2 < 8); 1720 unsigned not_result = I->not_result; 1721 assert(not_result < 2); 1722 if (lanes2 != 0) { 1723 unsigned derived_9 = 0; 1724 if (lanes2 == 1) derived_9 = 0; 1725 else if (lanes2 == 2) derived_9 = 1; 1726 else if (lanes2 == 3) derived_9 = 2; 1727 else if (lanes2 == 4) derived_9 = 3; 1728 else unreachable("No pattern match at pos 9"); 1729 1730 return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); 1731 } else if (lanes2 == 0) { 1732 return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); 1733 } else { 1734 unreachable("No matching state found in fma_rshift_xor_v4i8"); 1735 } 1736} 1737 1738static inline unsigned 1739bi_pack_fma_s16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1740{ 1741 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1742 assert(I->src[0].swizzle < 13); 1743 unsigned lane0 = lane0_table[I->src[0].swizzle]; 1744 assert(lane0 < 2); 1745 return 0x700cc0 | (src0 << 0) | (lane0 << 4); 1746} 1747 1748static inline unsigned 1749bi_pack_fma_s8_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1750{ 1751 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1752 assert(I->src[0].swizzle < 13); 1753 unsigned lane0 = lane0_table[I->src[0].swizzle]; 1754 assert(lane0 < 4); 1755 return 0x700b40 | (src0 << 0) | (lane0 << 4); 1756} 1757 1758static inline unsigned 1759bi_pack_fma_seg_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1760{ 1761 static uint8_t seg_table[] = { ~0, 2, 0, ~0, ~0, 7 }; 1762 assert(I->seg < 6); 1763 unsigned seg = seg_table[I->seg]; 1764 assert(seg < 8); 1765 unsigned preserve_null = I->preserve_null; 1766 assert(preserve_null < 2); 1767 return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); 1768} 1769 1770static inline unsigned 1771bi_pack_fma_shaddxl_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1772{ 1773 unsigned shift = I->shift; 1774 assert(shift < 0x8); 1775 return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6); 1776} 1777 1778static inline unsigned 1779bi_pack_fma_shaddxl_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1780{ 1781 static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1782 assert(I->src[1].swizzle < 13); 1783 unsigned lane1 = lane1_table[I->src[1].swizzle]; 1784 assert(lane1 < 4); 1785 unsigned shift = I->shift; 1786 assert(shift < 0x8); 1787 return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); 1788} 1789 1790static inline unsigned 1791bi_pack_fma_shaddxl_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1792{ 1793 static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1794 assert(I->src[1].swizzle < 13); 1795 unsigned lane1 = lane1_table[I->src[1].swizzle]; 1796 assert(lane1 < 4); 1797 unsigned shift = I->shift; 1798 assert(shift < 0x8); 1799 return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); 1800} 1801 1802static inline unsigned 1803bi_pack_fma_u16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1804{ 1805 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1806 assert(I->src[0].swizzle < 13); 1807 unsigned lane0 = lane0_table[I->src[0].swizzle]; 1808 assert(lane0 < 2); 1809 return 0x700cc8 | (src0 << 0) | (lane0 << 4); 1810} 1811 1812static inline unsigned 1813bi_pack_fma_u8_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1814{ 1815 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 1816 assert(I->src[0].swizzle < 13); 1817 unsigned lane0 = lane0_table[I->src[0].swizzle]; 1818 assert(lane0 < 4); 1819 return 0x700b48 | (src0 << 0) | (lane0 << 4); 1820} 1821 1822static inline unsigned 1823bi_pack_fma_v2f32_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1824{ 1825 unsigned abs0 = I->src[0].abs; 1826 assert(abs0 < 2); 1827 unsigned abs1 = I->src[1].abs; 1828 assert(abs1 < 2); 1829 unsigned neg0 = I->src[0].neg; 1830 assert(neg0 < 2); 1831 unsigned neg1 = I->src[1].neg; 1832 assert(neg1 < 2); 1833 unsigned clamp = I->clamp; 1834 assert(clamp < 4); 1835 unsigned round = I->round; 1836 assert(round < 8); 1837 unsigned derived_6 = 0; 1838 if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; 1839 else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; 1840 else unreachable("No pattern match at pos 6"); 1841 1842 unsigned derived_7 = 0; 1843 if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; 1844 else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; 1845 else unreachable("No pattern match at pos 7"); 1846 1847 return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); 1848} 1849 1850static inline unsigned 1851bi_pack_fma_vn_asst1_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1852{ 1853 unsigned h = I->h; 1854 assert(h < 2); 1855 unsigned l = I->l; 1856 assert(l < 2); 1857 unsigned neg2 = I->src[2].neg; 1858 assert(neg2 < 2); 1859 return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11); 1860} 1861 1862static inline unsigned 1863bi_pack_fma_vn_asst1_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1864{ 1865 unsigned neg2 = I->src[2].neg; 1866 assert(neg2 < 2); 1867 return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12); 1868} 1869 1870static inline unsigned 1871bi_pack_add_acmpstore_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1872{ 1873 unsigned seg = I->seg; 1874 assert(seg < 2); 1875 return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9); 1876} 1877 1878static inline unsigned 1879bi_pack_add_acmpstore_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1880{ 1881 unsigned seg = I->seg; 1882 assert(seg < 2); 1883 return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9); 1884} 1885 1886static inline unsigned 1887bi_pack_add_acmpxchg_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1888{ 1889 unsigned seg = I->seg; 1890 assert(seg < 2); 1891 return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9); 1892} 1893 1894static inline unsigned 1895bi_pack_add_acmpxchg_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1896{ 1897 unsigned seg = I->seg; 1898 assert(seg < 2); 1899 return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9); 1900} 1901 1902static inline unsigned 1903bi_pack_add_atest(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1904{ 1905 assert((1 << src0) & 0xf7); 1906 assert((1 << src1) & 0xf7); 1907 static uint8_t widen1_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1908 assert(I->src[1].swizzle < 13); 1909 unsigned widen1 = widen1_table[I->src[1].swizzle]; 1910 assert(widen1 < 4); 1911 return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6); 1912} 1913 1914static inline unsigned 1915bi_pack_add_atom_cx(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1916{ 1917 1918 return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6); 1919} 1920 1921static inline unsigned 1922bi_pack_add_axchg_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1923{ 1924 unsigned seg = I->seg; 1925 assert(seg < 2); 1926 return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9); 1927} 1928 1929static inline unsigned 1930bi_pack_add_axchg_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1931{ 1932 unsigned seg = I->seg; 1933 assert(seg < 2); 1934 return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9); 1935} 1936 1937static inline unsigned 1938bi_pack_add_barrier(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1939{ 1940 1941 return 0xd7874; 1942} 1943 1944static inline unsigned 1945bi_pack_add_blend(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1946{ 1947 assert((1 << src1) & 0xf7); 1948 assert((1 << src2) & 0xf7); 1949 return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6); 1950} 1951 1952static inline unsigned 1953bi_pack_add_branch_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1954{ 1955 assert((1 << src2) & 0xf7); 1956 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1957 assert(I->src[0].swizzle < 13); 1958 unsigned widen0 = widen0_table[I->src[0].swizzle]; 1959 assert(widen0 < 4); 1960 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1961 assert(I->src[1].swizzle < 13); 1962 unsigned widen1 = widen1_table[I->src[1].swizzle]; 1963 assert(widen1 < 4); 1964 unsigned cmpf = I->cmpf; 1965 assert(cmpf < 8); 1966 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { 1967 { unsigned temp = src0; src0 = src1; src1 = temp; } 1968 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 1969 if (cmpf == 4) cmpf = 1; 1970 else if (cmpf == 5) cmpf = 2; 1971 else if (cmpf == 1) cmpf = 4; 1972 else if (cmpf == 2) cmpf = 5; 1973 } 1974 1975 unsigned derived_12 = 0; 1976 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 1977 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 1978 else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3; 1979 else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4; 1980 else unreachable("No pattern match at pos 12"); 1981 1982 unsigned derived_9 = 0; 1983 if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; 1984 else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; 1985 else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; 1986 else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; 1987 else unreachable("No pattern match at pos 9"); 1988 1989 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 1990} 1991 1992static inline unsigned 1993bi_pack_add_branch_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 1994{ 1995 assert((1 << src2) & 0xf7); 1996 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 1997 assert(I->src[0].swizzle < 13); 1998 unsigned widen0 = widen0_table[I->src[0].swizzle]; 1999 assert(widen0 < 4); 2000 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2001 assert(I->src[1].swizzle < 13); 2002 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2003 assert(widen1 < 4); 2004 unsigned cmpf = I->cmpf; 2005 assert(cmpf < 8); 2006 if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { 2007 { unsigned temp = src0; src0 = src1; src1 = temp; } 2008 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2009 if (cmpf == 4) cmpf = 1; 2010 else if (cmpf == 5) cmpf = 2; 2011 else if (cmpf == 1) cmpf = 4; 2012 else if (cmpf == 2) cmpf = 5; 2013 } 2014 2015 unsigned derived_12 = 0; 2016 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 2017 else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5; 2018 else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6; 2019 else unreachable("No pattern match at pos 12"); 2020 2021 unsigned derived_9 = 0; 2022 if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1; 2023 else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2; 2024 else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3; 2025 else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; 2026 else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; 2027 else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; 2028 else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; 2029 else unreachable("No pattern match at pos 9"); 2030 2031 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2032} 2033 2034static inline unsigned 2035bi_pack_add_branch_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2036{ 2037 assert((1 << src2) & 0xf7); 2038 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2039 assert(I->src[0].swizzle < 13); 2040 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2041 assert(widen0 < 4); 2042 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2043 assert(I->src[1].swizzle < 13); 2044 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2045 assert(widen1 < 4); 2046 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 2047 assert(I->cmpf < 9); 2048 unsigned cmpf = cmpf_table[I->cmpf]; 2049 assert(cmpf < 2); 2050 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) { 2051 { unsigned temp = src0; src0 = src1; src1 = temp; } 2052 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2053 } 2054 2055 unsigned derived_12 = 0; 2056 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 2057 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 2058 else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3; 2059 else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4; 2060 else unreachable("No pattern match at pos 12"); 2061 2062 unsigned derived_9 = 0; 2063 if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1; 2064 else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4; 2065 else unreachable("No pattern match at pos 9"); 2066 2067 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2068} 2069 2070static inline unsigned 2071bi_pack_add_branch_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2072{ 2073 assert((1 << src2) & 0xf7); 2074 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2075 assert(I->src[0].swizzle < 13); 2076 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2077 assert(widen0 < 4); 2078 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2079 assert(I->src[1].swizzle < 13); 2080 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2081 assert(widen1 < 4); 2082 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 2083 assert(I->cmpf < 9); 2084 unsigned cmpf = cmpf_table[I->cmpf]; 2085 assert(cmpf < 2); 2086 if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) { 2087 { unsigned temp = src0; src0 = src1; src1 = temp; } 2088 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2089 } 2090 2091 unsigned derived_12 = 0; 2092 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 2093 else unreachable("No pattern match at pos 12"); 2094 2095 unsigned derived_9 = 0; 2096 if ((src0 == src1) && (cmpf == 0)) derived_9 = 1; 2097 else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4; 2098 else unreachable("No pattern match at pos 9"); 2099 2100 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2101} 2102 2103static inline unsigned 2104bi_pack_add_branch_s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2105{ 2106 assert((1 << src2) & 0xf7); 2107 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2108 assert(I->src[0].swizzle < 13); 2109 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2110 assert(widen0 < 4); 2111 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2112 assert(I->src[1].swizzle < 13); 2113 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2114 assert(widen1 < 4); 2115 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2116 assert(I->cmpf < 9); 2117 unsigned cmpf = cmpf_table[I->cmpf]; 2118 assert(cmpf < 4); 2119 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) { 2120 { unsigned temp = src0; src0 = src1; src1 = temp; } 2121 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2122 if (cmpf == 2) cmpf = 0; 2123 else if (cmpf == 3) cmpf = 1; 2124 else if (cmpf == 0) cmpf = 2; 2125 else if (cmpf == 1) cmpf = 3; 2126 } 2127 2128 unsigned derived_12 = 0; 2129 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 2130 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 2131 else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4; 2132 else unreachable("No pattern match at pos 12"); 2133 2134 unsigned derived_9 = 0; 2135 if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0; 2136 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; 2137 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2; 2138 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3; 2139 else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; 2140 else unreachable("No pattern match at pos 9"); 2141 2142 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2143} 2144 2145static inline unsigned 2146bi_pack_add_branch_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2147{ 2148 assert((1 << src2) & 0xf7); 2149 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2150 assert(I->src[0].swizzle < 13); 2151 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2152 assert(widen0 < 4); 2153 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2154 assert(I->src[1].swizzle < 13); 2155 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2156 assert(widen1 < 4); 2157 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2158 assert(I->cmpf < 9); 2159 unsigned cmpf = cmpf_table[I->cmpf]; 2160 assert(cmpf < 4); 2161 if (src0 > src1) { 2162 { unsigned temp = src0; src0 = src1; src1 = temp; } 2163 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2164 if (cmpf == 2) cmpf = 0; 2165 else if (cmpf == 3) cmpf = 1; 2166 else if (cmpf == 0) cmpf = 2; 2167 else if (cmpf == 1) cmpf = 3; 2168 } 2169 2170 unsigned derived_12 = 0; 2171 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 2172 else unreachable("No pattern match at pos 12"); 2173 2174 unsigned derived_9 = 0; 2175 if ((src0 < src1) && (cmpf == 2)) derived_9 = 0; 2176 else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; 2177 else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2; 2178 else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3; 2179 else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; 2180 else unreachable("No pattern match at pos 9"); 2181 2182 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2183} 2184 2185static inline unsigned 2186bi_pack_add_branch_u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2187{ 2188 assert((1 << src2) & 0xf7); 2189 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2190 assert(I->src[0].swizzle < 13); 2191 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2192 assert(widen0 < 4); 2193 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2194 assert(I->src[1].swizzle < 13); 2195 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2196 assert(widen1 < 4); 2197 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2198 assert(I->cmpf < 9); 2199 unsigned cmpf = cmpf_table[I->cmpf]; 2200 assert(cmpf < 4); 2201 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) { 2202 { unsigned temp = src0; src0 = src1; src1 = temp; } 2203 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2204 if (cmpf == 2) cmpf = 0; 2205 else if (cmpf == 3) cmpf = 1; 2206 else if (cmpf == 0) cmpf = 2; 2207 else if (cmpf == 1) cmpf = 3; 2208 } 2209 2210 unsigned derived_12 = 0; 2211 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; 2212 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; 2213 else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3; 2214 else unreachable("No pattern match at pos 12"); 2215 2216 unsigned derived_9 = 0; 2217 if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0; 2218 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1; 2219 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2; 2220 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3; 2221 else unreachable("No pattern match at pos 9"); 2222 2223 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2224} 2225 2226static inline unsigned 2227bi_pack_add_branch_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2228{ 2229 assert((1 << src2) & 0xf7); 2230 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2231 assert(I->src[0].swizzle < 13); 2232 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2233 assert(widen0 < 4); 2234 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2235 assert(I->src[1].swizzle < 13); 2236 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2237 assert(widen1 < 4); 2238 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2239 assert(I->cmpf < 9); 2240 unsigned cmpf = cmpf_table[I->cmpf]; 2241 assert(cmpf < 4); 2242 if (src0 < src1) { 2243 { unsigned temp = src0; src0 = src1; src1 = temp; } 2244 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2245 if (cmpf == 2) cmpf = 0; 2246 else if (cmpf == 3) cmpf = 1; 2247 else if (cmpf == 0) cmpf = 2; 2248 else if (cmpf == 1) cmpf = 3; 2249 } 2250 2251 unsigned derived_12 = 0; 2252 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; 2253 else unreachable("No pattern match at pos 12"); 2254 2255 unsigned derived_9 = 0; 2256 if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0; 2257 else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1; 2258 else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2; 2259 else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3; 2260 else unreachable("No pattern match at pos 9"); 2261 2262 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); 2263} 2264 2265static inline unsigned 2266bi_pack_add_branchc_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2267{ 2268 assert((1 << src1) & 0xf7); 2269 unsigned combine = I->combine; 2270 assert(combine < 2); 2271 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2272 assert(I->src[0].swizzle < 13); 2273 unsigned lane0 = lane0_table[I->src[0].swizzle]; 2274 assert(lane0 < 2); 2275 unsigned derived_9 = 0; 2276 if (lane0 == 0) derived_9 = 0; 2277 else if (lane0 == 1) derived_9 = 1; 2278 else unreachable("No pattern match at pos 9"); 2279 2280 unsigned derived_3 = 0; 2281 if (lane0 == 1) derived_3 = 0; 2282 else if (lane0 == 0) derived_3 = 1; 2283 else unreachable("No pattern match at pos 3"); 2284 2285 return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3); 2286} 2287 2288static inline unsigned 2289bi_pack_add_branchc_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2290{ 2291 assert((1 << src1) & 0xf7); 2292 unsigned combine = I->combine; 2293 assert(combine < 2); 2294 return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10); 2295} 2296 2297static inline unsigned 2298bi_pack_add_branchz_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2299{ 2300 assert((1 << src1) & 0xf7); 2301 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2302 assert(I->src[0].swizzle < 13); 2303 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2304 assert(widen0 < 4); 2305 unsigned cmpf = I->cmpf; 2306 assert(cmpf < 8); 2307 unsigned derived_4 = 0; 2308 if (widen0 == 2) derived_4 = 1; 2309 else if (widen0 == 1) derived_4 = 2; 2310 else unreachable("No pattern match at pos 4"); 2311 2312 unsigned derived_3 = 0; 2313 if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; 2314 else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; 2315 else unreachable("No pattern match at pos 3"); 2316 2317 unsigned derived_9 = 0; 2318 if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; 2319 else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; 2320 else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; 2321 else unreachable("No pattern match at pos 9"); 2322 2323 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9); 2324} 2325 2326static inline unsigned 2327bi_pack_add_branchz_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2328{ 2329 assert((1 << src1) & 0xf7); 2330 unsigned cmpf = I->cmpf; 2331 assert(cmpf < 8); 2332 unsigned derived_3 = 0; 2333 if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; 2334 else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; 2335 else unreachable("No pattern match at pos 3"); 2336 2337 unsigned derived_9 = 0; 2338 if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; 2339 else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; 2340 else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; 2341 else unreachable("No pattern match at pos 9"); 2342 2343 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9); 2344} 2345 2346static inline unsigned 2347bi_pack_add_branchz_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2348{ 2349 assert((1 << src1) & 0xf7); 2350 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2351 assert(I->src[0].swizzle < 13); 2352 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2353 assert(widen0 < 4); 2354 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 2355 assert(I->cmpf < 9); 2356 unsigned cmpf = cmpf_table[I->cmpf]; 2357 assert(cmpf < 2); 2358 unsigned derived_4 = 0; 2359 if (widen0 == 2) derived_4 = 1; 2360 else if (widen0 == 1) derived_4 = 2; 2361 else unreachable("No pattern match at pos 4"); 2362 2363 unsigned derived_3 = 0; 2364 if (cmpf == 1) derived_3 = 0; 2365 else if (cmpf == 0) derived_3 = 1; 2366 else unreachable("No pattern match at pos 3"); 2367 2368 return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3); 2369} 2370 2371static inline unsigned 2372bi_pack_add_branchz_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2373{ 2374 assert((1 << src1) & 0xf7); 2375 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 2376 assert(I->cmpf < 9); 2377 unsigned cmpf = cmpf_table[I->cmpf]; 2378 assert(cmpf < 2); 2379 unsigned derived_3 = 0; 2380 if (cmpf == 1) derived_3 = 0; 2381 else if (cmpf == 0) derived_3 = 1; 2382 else unreachable("No pattern match at pos 3"); 2383 2384 return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3); 2385} 2386 2387static inline unsigned 2388bi_pack_add_branchz_s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2389{ 2390 assert((1 << src1) & 0xf7); 2391 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2392 assert(I->src[0].swizzle < 13); 2393 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2394 assert(widen0 < 4); 2395 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2396 assert(I->cmpf < 9); 2397 unsigned cmpf = cmpf_table[I->cmpf]; 2398 assert(cmpf < 4); 2399 unsigned derived_4 = 0; 2400 if (widen0 == 2) derived_4 = 1; 2401 else if (widen0 == 1) derived_4 = 2; 2402 else unreachable("No pattern match at pos 4"); 2403 2404 unsigned derived_9 = 0; 2405 if (cmpf == 2) derived_9 = 0; 2406 else if (cmpf == 3) derived_9 = 1; 2407 else if (cmpf == 1) derived_9 = 2; 2408 else if (cmpf == 0) derived_9 = 3; 2409 else unreachable("No pattern match at pos 9"); 2410 2411 return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); 2412} 2413 2414static inline unsigned 2415bi_pack_add_branchz_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2416{ 2417 assert((1 << src1) & 0xf7); 2418 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2419 assert(I->cmpf < 9); 2420 unsigned cmpf = cmpf_table[I->cmpf]; 2421 assert(cmpf < 4); 2422 unsigned derived_9 = 0; 2423 if (cmpf == 2) derived_9 = 0; 2424 else if (cmpf == 3) derived_9 = 1; 2425 else if (cmpf == 1) derived_9 = 2; 2426 else if (cmpf == 0) derived_9 = 3; 2427 else unreachable("No pattern match at pos 9"); 2428 2429 return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); 2430} 2431 2432static inline unsigned 2433bi_pack_add_branchz_u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2434{ 2435 assert((1 << src1) & 0xf7); 2436 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2437 assert(I->src[0].swizzle < 13); 2438 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2439 assert(widen0 < 4); 2440 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2441 assert(I->cmpf < 9); 2442 unsigned cmpf = cmpf_table[I->cmpf]; 2443 assert(cmpf < 4); 2444 unsigned derived_4 = 0; 2445 if (widen0 == 2) derived_4 = 1; 2446 else if (widen0 == 1) derived_4 = 2; 2447 else unreachable("No pattern match at pos 4"); 2448 2449 unsigned derived_9 = 0; 2450 if (cmpf == 2) derived_9 = 0; 2451 else if (cmpf == 3) derived_9 = 1; 2452 else if (cmpf == 1) derived_9 = 2; 2453 else if (cmpf == 0) derived_9 = 3; 2454 else unreachable("No pattern match at pos 9"); 2455 2456 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); 2457} 2458 2459static inline unsigned 2460bi_pack_add_branchz_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2461{ 2462 assert((1 << src1) & 0xf7); 2463 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 2464 assert(I->cmpf < 9); 2465 unsigned cmpf = cmpf_table[I->cmpf]; 2466 assert(cmpf < 4); 2467 unsigned derived_9 = 0; 2468 if (cmpf == 2) derived_9 = 0; 2469 else if (cmpf == 3) derived_9 = 1; 2470 else if (cmpf == 1) derived_9 = 2; 2471 else if (cmpf == 0) derived_9 = 3; 2472 else unreachable("No pattern match at pos 9"); 2473 2474 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); 2475} 2476 2477static inline unsigned 2478bi_pack_add_branch_diverg(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2479{ 2480 assert((1 << src0) & 0xf7); 2481 return 0x6f83c | (src0 << 6); 2482} 2483 2484static inline unsigned 2485bi_pack_add_branch_lowbits_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2486{ 2487 assert((1 << src1) & 0xf7); 2488 return 0x6fa38 | (src0 << 0) | (src1 << 6); 2489} 2490 2491static inline unsigned 2492bi_pack_add_branch_no_diverg(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2493{ 2494 assert((1 << src0) & 0xf7); 2495 return 0x6fa34 | (src0 << 6); 2496} 2497 2498static inline unsigned 2499bi_pack_add_clper_v6_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2500{ 2501 assert((1 << src0) & 0x7); 2502 return 0x3f0c0 | (src0 << 0) | (src1 << 3); 2503} 2504 2505static inline unsigned 2506bi_pack_add_clper_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2507{ 2508 assert((1 << src0) & 0x7); 2509 unsigned lane_op = I->lane_op; 2510 assert(lane_op < 4); 2511 unsigned subgroup = I->subgroup; 2512 assert(subgroup < 4); 2513 unsigned inactive_result = I->inactive_result; 2514 assert(inactive_result < 16); 2515 return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10); 2516} 2517 2518static inline unsigned 2519bi_pack_add_cubeface2(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2520{ 2521 2522 return 0x3de58 | (src0 << 0); 2523} 2524 2525static inline unsigned 2526bi_pack_add_cube_ssel(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2527{ 2528 unsigned neg0 = I->src[0].neg; 2529 assert(neg0 < 2); 2530 unsigned neg1 = I->src[1].neg; 2531 assert(neg1 < 2); 2532 unsigned derived_9 = 0; 2533 if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; 2534 else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; 2535 else unreachable("No pattern match at pos 9"); 2536 2537 return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 2538} 2539 2540static inline unsigned 2541bi_pack_add_cube_tsel(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2542{ 2543 unsigned neg0 = I->src[0].neg; 2544 assert(neg0 < 2); 2545 unsigned neg1 = I->src[1].neg; 2546 assert(neg1 < 2); 2547 unsigned derived_9 = 0; 2548 if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; 2549 else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; 2550 else unreachable("No pattern match at pos 9"); 2551 2552 return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 2553} 2554 2555static inline unsigned 2556bi_pack_add_discard_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2557{ 2558 unsigned cmpf = I->cmpf; 2559 assert(cmpf < 8); 2560 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2561 assert(I->src[0].swizzle < 13); 2562 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2563 assert(widen0 < 4); 2564 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2565 assert(I->src[1].swizzle < 13); 2566 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2567 assert(widen1 < 4); 2568 if ((cmpf == 1) || (cmpf == 2)) { 2569 { unsigned temp = src0; src0 = src1; src1 = temp; } 2570 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2571 if (cmpf == 1) cmpf = 4; 2572 else if (cmpf == 2) cmpf = 5; 2573 } 2574 2575 unsigned derived_6 = 0; 2576 if (cmpf == 0) derived_6 = 0; 2577 else if (cmpf == 3) derived_6 = 1; 2578 else if (cmpf == 4) derived_6 = 2; 2579 else if (cmpf == 5) derived_6 = 3; 2580 else unreachable("No pattern match at pos 6"); 2581 2582 unsigned derived_8 = 0; 2583 if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0; 2584 else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1; 2585 else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2; 2586 else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3; 2587 else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4; 2588 else unreachable("No pattern match at pos 8"); 2589 2590 return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8); 2591} 2592 2593static inline unsigned 2594bi_pack_add_f16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2595{ 2596 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2597 assert(I->src[0].swizzle < 13); 2598 unsigned lane0 = lane0_table[I->src[0].swizzle]; 2599 assert(lane0 < 2); 2600 return 0x3cd10 | (src0 << 0) | (lane0 << 3); 2601} 2602 2603static inline unsigned 2604bi_pack_add_f16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2605{ 2606 unsigned round = I->round; 2607 assert(round < 8); 2608 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2609 assert(I->src[0].swizzle < 13); 2610 unsigned lane0 = lane0_table[I->src[0].swizzle]; 2611 assert(lane0 < 2); 2612 if (round != 4) { 2613 unsigned derived_4 = 0; 2614 if (round == 0) derived_4 = 0; 2615 else if (round == 1) derived_4 = 1; 2616 else if (round == 2) derived_4 = 2; 2617 else if (round == 3) derived_4 = 3; 2618 else unreachable("No pattern match at pos 4"); 2619 2620 return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); 2621 } else if (round == 4) { 2622 return 0x3cc40 | (src0 << 0) | (lane0 << 5); 2623 } else { 2624 unreachable("No matching state found in add_f16_to_s32"); 2625 } 2626} 2627 2628static inline unsigned 2629bi_pack_add_f16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2630{ 2631 unsigned round = I->round; 2632 assert(round < 8); 2633 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2634 assert(I->src[0].swizzle < 13); 2635 unsigned lane0 = lane0_table[I->src[0].swizzle]; 2636 assert(lane0 < 2); 2637 if (round != 4) { 2638 unsigned derived_4 = 0; 2639 if (round == 0) derived_4 = 0; 2640 else if (round == 1) derived_4 = 1; 2641 else if (round == 2) derived_4 = 2; 2642 else if (round == 3) derived_4 = 3; 2643 else unreachable("No pattern match at pos 4"); 2644 2645 return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); 2646 } else if (round == 4) { 2647 return 0x3cc48 | (src0 << 0) | (lane0 << 5); 2648 } else { 2649 unreachable("No matching state found in add_f16_to_u32"); 2650 } 2651} 2652 2653static inline unsigned 2654bi_pack_add_f32_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2655{ 2656 unsigned round = I->round; 2657 assert(round < 8); 2658 if (round != 4) { 2659 unsigned derived_4 = 0; 2660 if (round == 0) derived_4 = 0; 2661 else if (round == 1) derived_4 = 1; 2662 else if (round == 2) derived_4 = 2; 2663 else if (round == 3) derived_4 = 3; 2664 else unreachable("No pattern match at pos 4"); 2665 2666 return 0x3c980 | (src0 << 0) | (derived_4 << 4); 2667 } else if (round == 4) { 2668 return 0x3cca0 | (src0 << 0); 2669 } else { 2670 unreachable("No matching state found in add_f32_to_s32"); 2671 } 2672} 2673 2674static inline unsigned 2675bi_pack_add_f32_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2676{ 2677 unsigned round = I->round; 2678 assert(round < 8); 2679 if (round != 4) { 2680 unsigned derived_4 = 0; 2681 if (round == 0) derived_4 = 0; 2682 else if (round == 1) derived_4 = 1; 2683 else if (round == 2) derived_4 = 2; 2684 else if (round == 3) derived_4 = 3; 2685 else unreachable("No pattern match at pos 4"); 2686 2687 return 0x3c988 | (src0 << 0) | (derived_4 << 4); 2688 } else if (round == 4) { 2689 return 0x3cca8 | (src0 << 0); 2690 } else { 2691 unreachable("No matching state found in add_f32_to_u32"); 2692 } 2693} 2694 2695static inline unsigned 2696bi_pack_add_fadd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2697{ 2698 static uint8_t round_table[] = { 0, 1, 2, 3, ~0, 5, 4, ~0, ~0 }; 2699 assert(I->round < 9); 2700 unsigned round = round_table[I->round]; 2701 assert(round < 8); 2702 unsigned abs1 = I->src[1].abs; 2703 assert(abs1 < 2); 2704 unsigned neg0 = I->src[0].neg; 2705 assert(neg0 < 2); 2706 unsigned neg1 = I->src[1].neg; 2707 assert(neg1 < 2); 2708 unsigned clamp = I->clamp; 2709 assert(clamp < 4); 2710 unsigned abs0 = I->src[0].abs; 2711 assert(abs0 < 2); 2712 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2713 assert(I->src[0].swizzle < 13); 2714 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2715 assert(widen0 < 4); 2716 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2717 assert(I->src[1].swizzle < 13); 2718 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2719 assert(widen1 < 4); 2720 if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { 2721 { unsigned temp = src0; src0 = src1; src1 = temp; } 2722 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 2723 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 2724 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2725 } 2726 2727 if (round != 4) { 2728 unsigned derived_13 = 0; 2729 if (round == 0) derived_13 = 0; 2730 else if (round == 1) derived_13 = 1; 2731 else if (round == 2) derived_13 = 2; 2732 else if (round == 3) derived_13 = 3; 2733 else unreachable("No pattern match at pos 13"); 2734 2735 unsigned derived_9 = 0; 2736 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 2737 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 2738 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 2739 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 2740 else unreachable("No pattern match at pos 9"); 2741 2742 return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9); 2743 } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) { 2744 return 0x75200 | (src0 << 0) | (src1 << 3); 2745 } else { 2746 unreachable("No matching state found in add_fadd_f32"); 2747 } 2748} 2749 2750static inline unsigned 2751bi_pack_add_fadd_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2752{ 2753 unsigned abs1 = I->src[1].abs; 2754 assert(abs1 < 2); 2755 unsigned neg0 = I->src[0].neg; 2756 assert(neg0 < 2); 2757 unsigned neg1 = I->src[1].neg; 2758 assert(neg1 < 2); 2759 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2760 assert(I->src[0].swizzle < 13); 2761 unsigned swz0 = swz0_table[I->src[0].swizzle]; 2762 assert(swz0 < 4); 2763 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2764 assert(I->src[1].swizzle < 13); 2765 unsigned swz1 = swz1_table[I->src[1].swizzle]; 2766 assert(swz1 < 4); 2767 unsigned round = I->round; 2768 assert(round < 4); 2769 unsigned abs0 = I->src[0].abs; 2770 assert(abs0 < 2); 2771 return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15); 2772} 2773 2774static inline unsigned 2775bi_pack_add_fadd_rscale_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2776{ 2777 static uint8_t clamp_table[] = { 0, ~0, ~0, 1 }; 2778 assert(I->clamp < 4); 2779 unsigned clamp = clamp_table[I->clamp]; 2780 assert(clamp < 2); 2781 unsigned special = I->special; 2782 assert(special < 2); 2783 unsigned round = I->round; 2784 assert(round < 8); 2785 unsigned abs1 = I->src[1].abs; 2786 assert(abs1 < 2); 2787 unsigned neg0 = I->src[0].neg; 2788 assert(neg0 < 2); 2789 unsigned neg1 = I->src[1].neg; 2790 assert(neg1 < 2); 2791 unsigned abs0 = I->src[0].abs; 2792 assert(abs0 < 2); 2793 unsigned derived_9 = 0; 2794 if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0; 2795 else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2; 2796 else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3; 2797 else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4; 2798 else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5; 2799 else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6; 2800 else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7; 2801 else unreachable("No pattern match at pos 9"); 2802 2803 return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9); 2804} 2805 2806static inline unsigned 2807bi_pack_add_fcmp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2808{ 2809 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2810 assert(I->src[0].swizzle < 13); 2811 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2812 assert(widen0 < 4); 2813 static uint8_t widen1_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2814 assert(I->src[1].swizzle < 13); 2815 unsigned widen1 = widen1_table[I->src[1].swizzle]; 2816 assert(widen1 < 4); 2817 unsigned neg0 = I->src[0].neg; 2818 assert(neg0 < 2); 2819 unsigned neg1 = I->src[1].neg; 2820 assert(neg1 < 2); 2821 static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 2822 assert(I->cmpf < 9); 2823 unsigned cmpf = cmpf_table[I->cmpf]; 2824 assert(cmpf < 8); 2825 unsigned abs0 = I->src[0].abs; 2826 assert(abs0 < 2); 2827 unsigned abs1 = I->src[1].abs; 2828 assert(abs1 < 2); 2829 unsigned result_type = I->result_type; 2830 assert(result_type < 4); 2831 if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { 2832 { unsigned temp = src0; src0 = src1; src1 = temp; } 2833 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } 2834 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 2835 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 2836 if (cmpf == 4) cmpf = 1; 2837 else if (cmpf == 5) cmpf = 2; 2838 else if (cmpf == 1) cmpf = 4; 2839 else if (cmpf == 2) cmpf = 5; 2840 } 2841 2842 unsigned derived_9 = 0; 2843 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; 2844 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; 2845 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; 2846 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; 2847 else unreachable("No pattern match at pos 9"); 2848 2849 unsigned derived_13 = 0; 2850 if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; 2851 else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; 2852 else unreachable("No pattern match at pos 13"); 2853 2854 return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13); 2855} 2856 2857static inline unsigned 2858bi_pack_add_fcmp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2859{ 2860 unsigned neg0 = I->src[0].neg; 2861 assert(neg0 < 2); 2862 unsigned neg1 = I->src[1].neg; 2863 assert(neg1 < 2); 2864 static uint8_t cmpf_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 2865 assert(I->cmpf < 9); 2866 unsigned cmpf = cmpf_table[I->cmpf]; 2867 assert(cmpf < 8); 2868 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2869 assert(I->src[0].swizzle < 13); 2870 unsigned swz0 = swz0_table[I->src[0].swizzle]; 2871 assert(swz0 < 4); 2872 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2873 assert(I->src[1].swizzle < 13); 2874 unsigned swz1 = swz1_table[I->src[1].swizzle]; 2875 assert(swz1 < 4); 2876 unsigned result_type = I->result_type; 2877 assert(result_type < 4); 2878 if ((neg0 == 0) && (neg1 == 1)) { 2879 { unsigned temp = src0; src0 = src1; src1 = temp; } 2880 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 2881 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 2882 if (cmpf == 4) cmpf = 1; 2883 else if (cmpf == 5) cmpf = 2; 2884 else if (cmpf == 1) cmpf = 4; 2885 else if (cmpf == 2) cmpf = 5; 2886 } 2887 2888 unsigned derived_13 = 0; 2889 if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; 2890 else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; 2891 else unreachable("No pattern match at pos 13"); 2892 2893 return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13); 2894} 2895 2896static inline unsigned 2897bi_pack_add_fcos_table_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2898{ 2899 assert((1 << src0) & 0xf7); 2900 unsigned offset = I->offset; 2901 assert(offset < 2); 2902 return 0x67a88 | (src0 << 0) | (offset << 4); 2903} 2904 2905static inline unsigned 2906bi_pack_add_fexp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2907{ 2908 assert((1 << src0) & 0xf7); 2909 assert((1 << src1) & 0xf7); 2910 return 0x66ac0 | (src0 << 0) | (src1 << 3); 2911} 2912 2913static inline unsigned 2914bi_pack_add_fexp_table_u4(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2915{ 2916 assert((1 << src0) & 0xf7); 2917 unsigned adj = I->adj; 2918 assert(adj < 4); 2919 return 0x67ac0 | (src0 << 0) | (adj << 3); 2920} 2921 2922static inline unsigned 2923bi_pack_add_flogd_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2924{ 2925 assert((1 << src0) & 0xf7); 2926 return 0x66340 | (src0 << 0); 2927} 2928 2929static inline unsigned 2930bi_pack_add_flog_table_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2931{ 2932 assert((1 << src0) & 0xf7); 2933 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 2934 assert(I->src[0].swizzle < 13); 2935 unsigned widen0 = widen0_table[I->src[0].swizzle]; 2936 assert(widen0 < 4); 2937 unsigned mode = I->mode; 2938 assert(mode < 4); 2939 unsigned precision = I->precision; 2940 assert(precision < 4); 2941 unsigned neg0 = I->src[0].neg; 2942 assert(neg0 < 2); 2943 unsigned abs0 = I->src[0].abs; 2944 assert(abs0 < 2); 2945 unsigned divzero = I->divzero; 2946 assert(divzero < 2); 2947 if ((mode == 0) && (widen0 == 0) && (precision == 0)) { 2948 return 0x67300 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); 2949 } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) { 2950 unsigned derived_7 = 0; 2951 if (widen0 == 1) derived_7 = 0; 2952 else if (widen0 == 2) derived_7 = 1; 2953 else unreachable("No pattern match at pos 7"); 2954 2955 return 0x67340 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); 2956 } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) { 2957 unsigned derived_5 = 0; 2958 if (mode == 1) derived_5 = 0; 2959 else if (mode == 2) derived_5 = 1; 2960 else unreachable("No pattern match at pos 5"); 2961 2962 return 0x67b00 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_5 << 5); 2963 } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) { 2964 unsigned derived_5 = 0; 2965 if (mode == 1) derived_5 = 0; 2966 else if (mode == 2) derived_5 = 1; 2967 else unreachable("No pattern match at pos 5"); 2968 2969 unsigned derived_7 = 0; 2970 if (widen0 == 1) derived_7 = 0; 2971 else if (widen0 == 2) derived_7 = 1; 2972 else unreachable("No pattern match at pos 7"); 2973 2974 return 0x67b40 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7); 2975 } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg0 == 0)) { 2976 unsigned derived_3 = 0; 2977 if (mode == 2) derived_3 = 0; 2978 else if (mode == 1) derived_3 = 1; 2979 else unreachable("No pattern match at pos 3"); 2980 2981 unsigned derived_4 = 0; 2982 if (precision == 1) derived_4 = 0; 2983 else if (precision == 2) derived_4 = 1; 2984 else unreachable("No pattern match at pos 4"); 2985 2986 return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4); 2987 } else { 2988 unreachable("No matching state found in add_flog_table_f32"); 2989 } 2990} 2991 2992static inline unsigned 2993bi_pack_add_fmax_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 2994{ 2995 unsigned abs1 = I->src[1].abs; 2996 assert(abs1 < 2); 2997 unsigned neg0 = I->src[0].neg; 2998 assert(neg0 < 2); 2999 unsigned neg1 = I->src[1].neg; 3000 assert(neg1 < 2); 3001 unsigned clamp = I->clamp; 3002 assert(clamp < 4); 3003 unsigned sem = I->sem; 3004 assert(sem < 4); 3005 unsigned abs0 = I->src[0].abs; 3006 assert(abs0 < 2); 3007 return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); 3008} 3009 3010static inline unsigned 3011bi_pack_add_fmax_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3012{ 3013 unsigned abs0 = I->src[0].abs; 3014 assert(abs0 < 2); 3015 unsigned abs1 = I->src[1].abs; 3016 assert(abs1 < 2); 3017 unsigned neg0 = I->src[0].neg; 3018 assert(neg0 < 2); 3019 unsigned neg1 = I->src[1].neg; 3020 assert(neg1 < 2); 3021 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3022 assert(I->src[0].swizzle < 13); 3023 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3024 assert(swz0 < 4); 3025 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3026 assert(I->src[1].swizzle < 13); 3027 unsigned swz1 = swz1_table[I->src[1].swizzle]; 3028 assert(swz1 < 4); 3029 unsigned sem = I->sem; 3030 assert(sem < 4); 3031 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 3032 { unsigned temp = src0; src0 = src1; src1 = temp; } 3033 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 3034 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 3035 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 3036 if (sem == 2) sem = 3; 3037 else if (sem == 3) sem = 2; 3038 } 3039 3040 unsigned derived_6 = 0; 3041 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 3042 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 3043 else unreachable("No pattern match at pos 6"); 3044 3045 return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); 3046} 3047 3048static inline unsigned 3049bi_pack_add_fmin_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3050{ 3051 unsigned abs1 = I->src[1].abs; 3052 assert(abs1 < 2); 3053 unsigned neg0 = I->src[0].neg; 3054 assert(neg0 < 2); 3055 unsigned neg1 = I->src[1].neg; 3056 assert(neg1 < 2); 3057 unsigned clamp = I->clamp; 3058 assert(clamp < 4); 3059 unsigned sem = I->sem; 3060 assert(sem < 4); 3061 unsigned abs0 = I->src[0].abs; 3062 assert(abs0 < 2); 3063 return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); 3064} 3065 3066static inline unsigned 3067bi_pack_add_fmin_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3068{ 3069 unsigned abs0 = I->src[0].abs; 3070 assert(abs0 < 2); 3071 unsigned abs1 = I->src[1].abs; 3072 assert(abs1 < 2); 3073 unsigned neg0 = I->src[0].neg; 3074 assert(neg0 < 2); 3075 unsigned neg1 = I->src[1].neg; 3076 assert(neg1 < 2); 3077 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3078 assert(I->src[0].swizzle < 13); 3079 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3080 assert(swz0 < 4); 3081 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3082 assert(I->src[1].swizzle < 13); 3083 unsigned swz1 = swz1_table[I->src[1].swizzle]; 3084 assert(swz1 < 4); 3085 unsigned sem = I->sem; 3086 assert(sem < 4); 3087 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { 3088 { unsigned temp = src0; src0 = src1; src1 = temp; } 3089 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } 3090 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } 3091 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 3092 if (sem == 2) sem = 3; 3093 else if (sem == 3) sem = 2; 3094 } 3095 3096 unsigned derived_6 = 0; 3097 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; 3098 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; 3099 else unreachable("No pattern match at pos 6"); 3100 3101 return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); 3102} 3103 3104static inline unsigned 3105bi_pack_add_fpclass_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3106{ 3107 assert((1 << src0) & 0xf7); 3108 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3109 assert(I->src[0].swizzle < 13); 3110 unsigned lane0 = lane0_table[I->src[0].swizzle]; 3111 assert(lane0 < 2); 3112 return 0x67c40 | (src0 << 0) | (lane0 << 3); 3113} 3114 3115static inline unsigned 3116bi_pack_add_fpclass_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3117{ 3118 assert((1 << src0) & 0xf7); 3119 return 0x67c50 | (src0 << 0); 3120} 3121 3122static inline unsigned 3123bi_pack_add_fpow_sc_apply(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3124{ 3125 3126 return 0x75080 | (src0 << 0) | (src1 << 3); 3127} 3128 3129static inline unsigned 3130bi_pack_add_fpow_sc_det_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3131{ 3132 assert((1 << src0) & 0xf7); 3133 assert((1 << src1) & 0xf7); 3134 unsigned func = I->func; 3135 assert(func < 4); 3136 static uint8_t lane1_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3137 assert(I->src[1].swizzle < 13); 3138 unsigned lane1 = lane1_table[I->src[1].swizzle]; 3139 assert(lane1 < 4); 3140 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3141 assert(I->src[0].swizzle < 13); 3142 unsigned lane0 = lane0_table[I->src[0].swizzle]; 3143 assert(lane0 < 2); 3144 if ((func == 0) || (func == 1)) { 3145 unsigned derived_6 = 0; 3146 if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0; 3147 else if (lane1 == 1) derived_6 = 1; 3148 else unreachable("No pattern match at pos 6"); 3149 3150 unsigned derived_8 = 0; 3151 if (func == 0) derived_8 = 0; 3152 else if (func == 1) derived_8 = 1; 3153 else unreachable("No pattern match at pos 8"); 3154 3155 return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8); 3156 } else if (((func == 2) || (func == 3)) && (lane1 == 2)) { 3157 unsigned derived_8 = 0; 3158 if (func == 2) derived_8 = 0; 3159 else if (func == 3) derived_8 = 1; 3160 else unreachable("No pattern match at pos 8"); 3161 3162 return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8); 3163 } else { 3164 unreachable("No matching state found in add_fpow_sc_det_f16"); 3165 } 3166} 3167 3168static inline unsigned 3169bi_pack_add_fpow_sc_det_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3170{ 3171 assert((1 << src0) & 0xf7); 3172 assert((1 << src1) & 0xf7); 3173 unsigned func = I->func; 3174 assert(func < 4); 3175 return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7); 3176} 3177 3178static inline unsigned 3179bi_pack_add_frcp_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3180{ 3181 assert((1 << src0) & 0xf7); 3182 unsigned neg0 = I->src[0].neg; 3183 assert(neg0 < 2); 3184 unsigned abs0 = I->src[0].abs; 3185 assert(abs0 < 2); 3186 unsigned divzero = I->divzero; 3187 assert(divzero < 2); 3188 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3189 assert(I->src[0].swizzle < 13); 3190 unsigned lane0 = lane0_table[I->src[0].swizzle]; 3191 assert(lane0 < 2); 3192 return 0x67080 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); 3193} 3194 3195static inline unsigned 3196bi_pack_add_frcp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3197{ 3198 assert((1 << src0) & 0xf7); 3199 static uint8_t widen0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3200 assert(I->src[0].swizzle < 13); 3201 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3202 assert(widen0 < 4); 3203 unsigned neg0 = I->src[0].neg; 3204 assert(neg0 < 2); 3205 unsigned abs0 = I->src[0].abs; 3206 assert(abs0 < 2); 3207 unsigned derived_6 = 0; 3208 if (widen0 == 0) derived_6 = 0; 3209 else unreachable("No pattern match at pos 6"); 3210 3211 return 0x66000 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_6 << 6); 3212} 3213 3214static inline unsigned 3215bi_pack_add_frcp_approx_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3216{ 3217 assert((1 << src0) & 0xf7); 3218 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3219 assert(I->src[0].swizzle < 13); 3220 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3221 assert(widen0 < 4); 3222 unsigned neg0 = I->src[0].neg; 3223 assert(neg0 < 2); 3224 unsigned abs0 = I->src[0].abs; 3225 assert(abs0 < 2); 3226 unsigned divzero = I->divzero; 3227 assert(divzero < 2); 3228 if (widen0 == 0) { 3229 return 0x67000 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); 3230 } else if (widen0 != 0) { 3231 unsigned derived_7 = 0; 3232 if (widen0 == 1) derived_7 = 0; 3233 else if (widen0 == 2) derived_7 = 1; 3234 else unreachable("No pattern match at pos 7"); 3235 3236 return 0x67040 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); 3237 } else { 3238 unreachable("No matching state found in add_frcp_approx_f32"); 3239 } 3240} 3241 3242static inline unsigned 3243bi_pack_add_frexpe_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3244{ 3245 unsigned neg0 = I->src[0].neg; 3246 assert(neg0 < 2); 3247 unsigned sqrt = I->sqrt; 3248 assert(sqrt < 2); 3249 unsigned log = I->log; 3250 assert(log < 2); 3251 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3252 assert(I->src[0].swizzle < 13); 3253 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3254 assert(widen0 < 4); 3255 if (log == 0) { 3256 return 0x3dc20 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (widen0 << 3); 3257 } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 3258 return 0x3de20 | (src0 << 0) | (widen0 << 3); 3259 } else { 3260 unreachable("No matching state found in add_frexpe_f32"); 3261 } 3262} 3263 3264static inline unsigned 3265bi_pack_add_frexpe_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3266{ 3267 unsigned neg0 = I->src[0].neg; 3268 assert(neg0 < 2); 3269 unsigned sqrt = I->sqrt; 3270 assert(sqrt < 2); 3271 unsigned log = I->log; 3272 assert(log < 2); 3273 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3274 assert(I->src[0].swizzle < 13); 3275 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3276 assert(swz0 < 4); 3277 if (log == 0) { 3278 return 0x3dc00 | (src0 << 0) | (neg0 << 6) | (sqrt << 8) | (swz0 << 3); 3279 } else if ((log == 1) && (sqrt == 0) && (neg0 == 0)) { 3280 return 0x3de00 | (src0 << 0) | (swz0 << 3); 3281 } else { 3282 unreachable("No matching state found in add_frexpe_v2f16"); 3283 } 3284} 3285 3286static inline unsigned 3287bi_pack_add_frexpm_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3288{ 3289 unsigned abs0 = I->src[0].abs; 3290 assert(abs0 < 2); 3291 unsigned sqrt = I->sqrt; 3292 assert(sqrt < 2); 3293 unsigned log = I->log; 3294 assert(log < 2); 3295 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3296 assert(I->src[0].swizzle < 13); 3297 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3298 assert(widen0 < 4); 3299 unsigned neg0 = I->src[0].neg; 3300 assert(neg0 < 2); 3301 if ((log == 0) && (neg0 == 0)) { 3302 return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); 3303 } else if ((log == 1) && (sqrt == 0)) { 3304 return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); 3305 } else { 3306 unreachable("No matching state found in add_frexpm_f32"); 3307 } 3308} 3309 3310static inline unsigned 3311bi_pack_add_frexpm_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3312{ 3313 unsigned abs0 = I->src[0].abs; 3314 assert(abs0 < 2); 3315 unsigned sqrt = I->sqrt; 3316 assert(sqrt < 2); 3317 unsigned log = I->log; 3318 assert(log < 2); 3319 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3320 assert(I->src[0].swizzle < 13); 3321 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3322 assert(swz0 < 4); 3323 unsigned neg0 = I->src[0].neg; 3324 assert(neg0 < 2); 3325 if ((log == 0) && (neg0 == 0)) { 3326 return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); 3327 } else if ((log == 1) && (sqrt == 0)) { 3328 return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); 3329 } else { 3330 unreachable("No matching state found in add_frexpm_v2f16"); 3331 } 3332} 3333 3334static inline unsigned 3335bi_pack_add_fround_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3336{ 3337 unsigned abs0 = I->src[0].abs; 3338 assert(abs0 < 2); 3339 unsigned neg0 = I->src[0].neg; 3340 assert(neg0 < 2); 3341 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3342 assert(I->src[0].swizzle < 13); 3343 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3344 assert(widen0 < 4); 3345 unsigned round = I->round; 3346 assert(round < 4); 3347 return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9); 3348} 3349 3350static inline unsigned 3351bi_pack_add_fround_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3352{ 3353 unsigned abs0 = I->src[0].abs; 3354 assert(abs0 < 2); 3355 unsigned neg0 = I->src[0].neg; 3356 assert(neg0 < 2); 3357 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3358 assert(I->src[0].swizzle < 13); 3359 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3360 assert(swz0 < 4); 3361 unsigned round = I->round; 3362 assert(round < 4); 3363 return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9); 3364} 3365 3366static inline unsigned 3367bi_pack_add_frsq_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3368{ 3369 assert((1 << src0) & 0xf7); 3370 unsigned neg0 = I->src[0].neg; 3371 assert(neg0 < 2); 3372 unsigned abs0 = I->src[0].abs; 3373 assert(abs0 < 2); 3374 unsigned divzero = I->divzero; 3375 assert(divzero < 2); 3376 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3377 assert(I->src[0].swizzle < 13); 3378 unsigned lane0 = lane0_table[I->src[0].swizzle]; 3379 assert(lane0 < 2); 3380 return 0x67280 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); 3381} 3382 3383static inline unsigned 3384bi_pack_add_frsq_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3385{ 3386 assert((1 << src0) & 0xf7); 3387 static uint8_t widen0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3388 assert(I->src[0].swizzle < 13); 3389 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3390 assert(widen0 < 4); 3391 unsigned neg0 = I->src[0].neg; 3392 assert(neg0 < 2); 3393 unsigned abs0 = I->src[0].abs; 3394 assert(abs0 < 2); 3395 unsigned derived_6 = 0; 3396 if (widen0 == 0) derived_6 = 0; 3397 else unreachable("No pattern match at pos 6"); 3398 3399 return 0x66100 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (derived_6 << 6); 3400} 3401 3402static inline unsigned 3403bi_pack_add_frsq_approx_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3404{ 3405 assert((1 << src0) & 0xf7); 3406 static uint8_t widen0_table[] = { 1, 0, ~0, 2, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3407 assert(I->src[0].swizzle < 13); 3408 unsigned widen0 = widen0_table[I->src[0].swizzle]; 3409 assert(widen0 < 4); 3410 unsigned neg0 = I->src[0].neg; 3411 assert(neg0 < 2); 3412 unsigned abs0 = I->src[0].abs; 3413 assert(abs0 < 2); 3414 unsigned divzero = I->divzero; 3415 assert(divzero < 2); 3416 if (widen0 == 0) { 3417 return 0x67100 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5); 3418 } else if (widen0 != 0) { 3419 unsigned derived_7 = 0; 3420 if (widen0 == 1) derived_7 = 0; 3421 else if (widen0 == 2) derived_7 = 1; 3422 else unreachable("No pattern match at pos 7"); 3423 3424 return 0x67140 | (src0 << 0) | (neg0 << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); 3425 } else { 3426 unreachable("No matching state found in add_frsq_approx_f32"); 3427 } 3428} 3429 3430static inline unsigned 3431bi_pack_add_fsincos_offset_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3432{ 3433 assert((1 << src0) & 0xf7); 3434 unsigned scale = I->scale; 3435 assert(scale < 2); 3436 return 0x67aa0 | (src0 << 0) | (scale << 3); 3437} 3438 3439static inline unsigned 3440bi_pack_add_fsin_table_u6(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3441{ 3442 assert((1 << src0) & 0xf7); 3443 unsigned offset = I->offset; 3444 assert(offset < 2); 3445 return 0x67a80 | (src0 << 0) | (offset << 4); 3446} 3447 3448static inline unsigned 3449bi_pack_add_hadd_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3450{ 3451 static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3452 assert(I->round < 9); 3453 unsigned round = round_table[I->round]; 3454 assert(round < 2); 3455 return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12); 3456} 3457 3458static inline unsigned 3459bi_pack_add_hadd_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3460{ 3461 static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3462 assert(I->round < 9); 3463 unsigned round = round_table[I->round]; 3464 assert(round < 2); 3465 return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12); 3466} 3467 3468static inline unsigned 3469bi_pack_add_hadd_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3470{ 3471 static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3472 assert(I->round < 9); 3473 unsigned round = round_table[I->round]; 3474 assert(round < 2); 3475 static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3476 assert(I->src[1].swizzle < 13); 3477 unsigned swap1 = swap1_table[I->src[1].swizzle]; 3478 assert(swap1 < 2); 3479 static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3480 assert(I->src[0].swizzle < 13); 3481 unsigned swap0 = swap0_table[I->src[0].swizzle]; 3482 assert(swap0 < 2); 3483 return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); 3484} 3485 3486static inline unsigned 3487bi_pack_add_hadd_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3488{ 3489 static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3490 assert(I->round < 9); 3491 unsigned round = round_table[I->round]; 3492 assert(round < 2); 3493 static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3494 assert(I->src[1].swizzle < 13); 3495 unsigned swap1 = swap1_table[I->src[1].swizzle]; 3496 assert(swap1 < 2); 3497 static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3498 assert(I->src[0].swizzle < 13); 3499 unsigned swap0 = swap0_table[I->src[0].swizzle]; 3500 assert(swap0 < 2); 3501 return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); 3502} 3503 3504static inline unsigned 3505bi_pack_add_hadd_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3506{ 3507 static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3508 assert(I->round < 9); 3509 unsigned round = round_table[I->round]; 3510 assert(round < 2); 3511 return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12); 3512} 3513 3514static inline unsigned 3515bi_pack_add_hadd_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3516{ 3517 static uint8_t round_table[] = { ~0, 1, 0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3518 assert(I->round < 9); 3519 unsigned round = round_table[I->round]; 3520 assert(round < 2); 3521 return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12); 3522} 3523 3524static inline unsigned 3525bi_pack_add_iabs_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3526{ 3527 3528 return 0x3dea0 | (src0 << 0); 3529} 3530 3531static inline unsigned 3532bi_pack_add_iabs_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3533{ 3534 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3535 assert(I->src[0].swizzle < 13); 3536 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3537 assert(swz0 < 4); 3538 return 0x3de88 | (src0 << 0) | (swz0 << 4); 3539} 3540 3541static inline unsigned 3542bi_pack_add_iabs_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3543{ 3544 3545 return 0x3deb0 | (src0 << 0); 3546} 3547 3548static inline unsigned 3549bi_pack_add_iadd_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3550{ 3551 unsigned saturate = I->saturate; 3552 assert(saturate < 2); 3553 static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 3554 assert(I->src[1].swizzle < 13); 3555 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 3556 assert(lanes1 < 8); 3557 if (lanes1 == 0) { 3558 return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8); 3559 } else if ((lanes1 == 1) || (lanes1 == 2)) { 3560 unsigned derived_9 = 0; 3561 if (lanes1 == 1) derived_9 = 0; 3562 else if (lanes1 == 2) derived_9 = 1; 3563 else unreachable("No pattern match at pos 9"); 3564 3565 return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 3566 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 3567 unsigned derived_9 = 0; 3568 if (lanes1 == 3) derived_9 = 0; 3569 else if (lanes1 == 4) derived_9 = 1; 3570 else if (lanes1 == 5) derived_9 = 2; 3571 else if (lanes1 == 6) derived_9 = 3; 3572 else unreachable("No pattern match at pos 9"); 3573 3574 return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 3575 } else { 3576 unreachable("No matching state found in add_iadd_s32"); 3577 } 3578} 3579 3580static inline unsigned 3581bi_pack_add_iadd_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3582{ 3583 unsigned saturate = I->saturate; 3584 assert(saturate < 2); 3585 static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 3586 assert(I->src[1].swizzle < 13); 3587 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 3588 assert(lanes1 < 8); 3589 if (lanes1 == 0) { 3590 unsigned derived_7 = 0; 3591 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 3592 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 3593 else unreachable("No pattern match at pos 7"); 3594 3595 return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 3596 } else if ((lanes1 == 1) || (lanes1 == 2)) { 3597 unsigned derived_7 = 0; 3598 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 3599 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 3600 else unreachable("No pattern match at pos 7"); 3601 3602 unsigned derived_9 = 0; 3603 if (lanes1 == 1) derived_9 = 0; 3604 else if (lanes1 == 2) derived_9 = 1; 3605 else unreachable("No pattern match at pos 9"); 3606 3607 return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 3608 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 3609 unsigned derived_7 = 0; 3610 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 3611 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 3612 else unreachable("No pattern match at pos 7"); 3613 3614 unsigned derived_9 = 0; 3615 if (lanes1 == 3) derived_9 = 0; 3616 else if (lanes1 == 4) derived_9 = 1; 3617 else if (lanes1 == 5) derived_9 = 2; 3618 else if (lanes1 == 6) derived_9 = 3; 3619 else unreachable("No pattern match at pos 9"); 3620 3621 return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 3622 } else { 3623 unreachable("No matching state found in add_iadd_u32"); 3624 } 3625} 3626 3627static inline unsigned 3628bi_pack_add_iadd_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3629{ 3630 unsigned saturate = I->saturate; 3631 assert(saturate < 2); 3632 static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3633 assert(I->src[0].swizzle < 13); 3634 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 3635 assert(lanes0 < 2); 3636 static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 3637 assert(I->src[1].swizzle < 13); 3638 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 3639 assert(lanes1 < 8); 3640 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 3641 unsigned derived_9 = 0; 3642 if (lanes1 == 0) derived_9 = 0; 3643 else if (lanes1 == 1) derived_9 = 1; 3644 else unreachable("No pattern match at pos 9"); 3645 3646 unsigned derived_10 = 0; 3647 if (lanes0 == 0) derived_10 = 0; 3648 else if (lanes0 == 1) derived_10 = 1; 3649 else unreachable("No pattern match at pos 10"); 3650 3651 return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); 3652 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 3653 unsigned derived_9 = 0; 3654 if (lanes1 == 2) derived_9 = 0; 3655 else if (lanes1 == 3) derived_9 = 1; 3656 else unreachable("No pattern match at pos 9"); 3657 3658 return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 3659 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 3660 unsigned derived_9 = 0; 3661 if (lanes1 == 4) derived_9 = 0; 3662 else if (lanes1 == 5) derived_9 = 1; 3663 else unreachable("No pattern match at pos 9"); 3664 3665 return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 3666 } else { 3667 unreachable("No matching state found in add_iadd_v2s16"); 3668 } 3669} 3670 3671static inline unsigned 3672bi_pack_add_iadd_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3673{ 3674 unsigned saturate = I->saturate; 3675 assert(saturate < 2); 3676 static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3677 assert(I->src[0].swizzle < 13); 3678 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 3679 assert(lanes0 < 2); 3680 static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 3681 assert(I->src[1].swizzle < 13); 3682 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 3683 assert(lanes1 < 8); 3684 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 3685 unsigned derived_7 = 0; 3686 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 3687 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 3688 else unreachable("No pattern match at pos 7"); 3689 3690 unsigned derived_9 = 0; 3691 if (lanes1 == 0) derived_9 = 0; 3692 else if (lanes1 == 1) derived_9 = 1; 3693 else unreachable("No pattern match at pos 9"); 3694 3695 unsigned derived_10 = 0; 3696 if (lanes0 == 0) derived_10 = 0; 3697 else if (lanes0 == 1) derived_10 = 1; 3698 else unreachable("No pattern match at pos 10"); 3699 3700 return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); 3701 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 3702 unsigned derived_7 = 0; 3703 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 3704 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 3705 else unreachable("No pattern match at pos 7"); 3706 3707 unsigned derived_9 = 0; 3708 if (lanes1 == 2) derived_9 = 0; 3709 else if (lanes1 == 3) derived_9 = 1; 3710 else unreachable("No pattern match at pos 9"); 3711 3712 return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 3713 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 3714 unsigned derived_7 = 0; 3715 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 3716 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 3717 else unreachable("No pattern match at pos 7"); 3718 3719 unsigned derived_9 = 0; 3720 if (lanes1 == 4) derived_9 = 0; 3721 else if (lanes1 == 5) derived_9 = 1; 3722 else unreachable("No pattern match at pos 9"); 3723 3724 return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 3725 } else { 3726 unreachable("No matching state found in add_iadd_v2u16"); 3727 } 3728} 3729 3730static inline unsigned 3731bi_pack_add_iadd_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3732{ 3733 unsigned saturate = I->saturate; 3734 assert(saturate < 2); 3735 static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3736 assert(I->src[0].swizzle < 13); 3737 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 3738 assert(lanes0 < 8); 3739 static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 3740 assert(I->src[1].swizzle < 13); 3741 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 3742 assert(lanes1 < 8); 3743 if ((lanes0 == 0) && (lanes1 == 0)) { 3744 return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8); 3745 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 3746 unsigned derived_9 = 0; 3747 if (lanes1 == 1) derived_9 = 0; 3748 else if (lanes1 == 2) derived_9 = 1; 3749 else if (lanes1 == 3) derived_9 = 2; 3750 else if (lanes1 == 4) derived_9 = 3; 3751 else unreachable("No pattern match at pos 9"); 3752 3753 return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 3754 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 3755 unsigned derived_9 = 0; 3756 if (lanes1 == 5) derived_9 = 0; 3757 else if (lanes1 == 6) derived_9 = 1; 3758 else unreachable("No pattern match at pos 9"); 3759 3760 return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 3761 } else { 3762 unreachable("No matching state found in add_iadd_v4s8"); 3763 } 3764} 3765 3766static inline unsigned 3767bi_pack_add_iadd_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3768{ 3769 unsigned saturate = I->saturate; 3770 assert(saturate < 2); 3771 static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3772 assert(I->src[0].swizzle < 13); 3773 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 3774 assert(lanes0 < 8); 3775 static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 3776 assert(I->src[1].swizzle < 13); 3777 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 3778 assert(lanes1 < 8); 3779 if ((lanes0 == 0) && (lanes1 == 0)) { 3780 unsigned derived_7 = 0; 3781 if (saturate == 0) derived_7 = 0; 3782 else if (saturate == 1) derived_7 = 1; 3783 else unreachable("No pattern match at pos 7"); 3784 3785 return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 3786 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 3787 unsigned derived_7 = 0; 3788 if (saturate == 0) derived_7 = 0; 3789 else if (saturate == 1) derived_7 = 1; 3790 else unreachable("No pattern match at pos 7"); 3791 3792 unsigned derived_9 = 0; 3793 if (lanes1 == 1) derived_9 = 0; 3794 else if (lanes1 == 2) derived_9 = 1; 3795 else if (lanes1 == 3) derived_9 = 2; 3796 else if (lanes1 == 4) derived_9 = 3; 3797 else unreachable("No pattern match at pos 9"); 3798 3799 return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 3800 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 3801 unsigned derived_7 = 0; 3802 if (saturate == 0) derived_7 = 0; 3803 else if (saturate == 1) derived_7 = 1; 3804 else unreachable("No pattern match at pos 7"); 3805 3806 unsigned derived_9 = 0; 3807 if (lanes1 == 5) derived_9 = 0; 3808 else if (lanes1 == 6) derived_9 = 1; 3809 else unreachable("No pattern match at pos 9"); 3810 3811 return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 3812 } else { 3813 unreachable("No matching state found in add_iadd_v4u8"); 3814 } 3815} 3816 3817static inline unsigned 3818bi_pack_add_icmp_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3819{ 3820 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3821 assert(I->result_type < 4); 3822 unsigned result_type = result_type_table[I->result_type]; 3823 assert(result_type < 2); 3824 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 3825 assert(I->cmpf < 9); 3826 unsigned cmpf = cmpf_table[I->cmpf]; 3827 assert(cmpf < 2); 3828 return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 3829} 3830 3831static inline unsigned 3832bi_pack_add_icmp_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3833{ 3834 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3835 assert(I->result_type < 4); 3836 unsigned result_type = result_type_table[I->result_type]; 3837 assert(result_type < 2); 3838 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 3839 assert(I->cmpf < 9); 3840 unsigned cmpf = cmpf_table[I->cmpf]; 3841 assert(cmpf < 4); 3842 if ((cmpf == 2) || (cmpf == 3)) { 3843 { unsigned temp = src0; src0 = src1; src1 = temp; } 3844 if (cmpf == 2) cmpf = 0; 3845 else if (cmpf == 3) cmpf = 1; 3846 } 3847 3848 unsigned derived_6 = 0; 3849 if (cmpf == 0) derived_6 = 0; 3850 else if (cmpf == 1) derived_6 = 1; 3851 else unreachable("No pattern match at pos 6"); 3852 3853 return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 3854} 3855 3856static inline unsigned 3857bi_pack_add_icmp_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3858{ 3859 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3860 assert(I->result_type < 4); 3861 unsigned result_type = result_type_table[I->result_type]; 3862 assert(result_type < 2); 3863 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 3864 assert(I->cmpf < 9); 3865 unsigned cmpf = cmpf_table[I->cmpf]; 3866 assert(cmpf < 4); 3867 if ((cmpf == 2) || (cmpf == 3)) { 3868 { unsigned temp = src0; src0 = src1; src1 = temp; } 3869 if (cmpf == 2) cmpf = 0; 3870 else if (cmpf == 3) cmpf = 1; 3871 } 3872 3873 unsigned derived_6 = 0; 3874 if (cmpf == 0) derived_6 = 0; 3875 else if (cmpf == 1) derived_6 = 1; 3876 else unreachable("No pattern match at pos 6"); 3877 3878 return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 3879} 3880 3881static inline unsigned 3882bi_pack_add_icmp_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3883{ 3884 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3885 assert(I->src[0].swizzle < 13); 3886 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3887 assert(swz0 < 4); 3888 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3889 assert(I->src[1].swizzle < 13); 3890 unsigned swz1 = swz1_table[I->src[1].swizzle]; 3891 assert(swz1 < 4); 3892 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3893 assert(I->result_type < 4); 3894 unsigned result_type = result_type_table[I->result_type]; 3895 assert(result_type < 2); 3896 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 3897 assert(I->cmpf < 9); 3898 unsigned cmpf = cmpf_table[I->cmpf]; 3899 assert(cmpf < 2); 3900 return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11); 3901} 3902 3903static inline unsigned 3904bi_pack_add_icmp_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3905{ 3906 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3907 assert(I->src[0].swizzle < 13); 3908 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3909 assert(swz0 < 4); 3910 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3911 assert(I->src[1].swizzle < 13); 3912 unsigned swz1 = swz1_table[I->src[1].swizzle]; 3913 assert(swz1 < 4); 3914 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3915 assert(I->result_type < 4); 3916 unsigned result_type = result_type_table[I->result_type]; 3917 assert(result_type < 2); 3918 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 3919 assert(I->cmpf < 9); 3920 unsigned cmpf = cmpf_table[I->cmpf]; 3921 assert(cmpf < 4); 3922 if ((cmpf == 2) || (cmpf == 3)) { 3923 { unsigned temp = src0; src0 = src1; src1 = temp; } 3924 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 3925 if (cmpf == 2) cmpf = 0; 3926 else if (cmpf == 3) cmpf = 1; 3927 } 3928 3929 unsigned derived_12 = 0; 3930 if (cmpf == 0) derived_12 = 0; 3931 else if (cmpf == 1) derived_12 = 1; 3932 else unreachable("No pattern match at pos 12"); 3933 3934 return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); 3935} 3936 3937static inline unsigned 3938bi_pack_add_icmp_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3939{ 3940 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3941 assert(I->src[0].swizzle < 13); 3942 unsigned swz0 = swz0_table[I->src[0].swizzle]; 3943 assert(swz0 < 4); 3944 static uint8_t swz1_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 3945 assert(I->src[1].swizzle < 13); 3946 unsigned swz1 = swz1_table[I->src[1].swizzle]; 3947 assert(swz1 < 4); 3948 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3949 assert(I->result_type < 4); 3950 unsigned result_type = result_type_table[I->result_type]; 3951 assert(result_type < 2); 3952 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 3953 assert(I->cmpf < 9); 3954 unsigned cmpf = cmpf_table[I->cmpf]; 3955 assert(cmpf < 4); 3956 if ((cmpf == 2) || (cmpf == 3)) { 3957 { unsigned temp = src0; src0 = src1; src1 = temp; } 3958 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } 3959 if (cmpf == 2) cmpf = 0; 3960 else if (cmpf == 3) cmpf = 1; 3961 } 3962 3963 unsigned derived_12 = 0; 3964 if (cmpf == 0) derived_12 = 0; 3965 else if (cmpf == 1) derived_12 = 1; 3966 else unreachable("No pattern match at pos 12"); 3967 3968 return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); 3969} 3970 3971static inline unsigned 3972bi_pack_add_icmp_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3973{ 3974 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3975 assert(I->result_type < 4); 3976 unsigned result_type = result_type_table[I->result_type]; 3977 assert(result_type < 2); 3978 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 3979 assert(I->cmpf < 9); 3980 unsigned cmpf = cmpf_table[I->cmpf]; 3981 assert(cmpf < 2); 3982 return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 3983} 3984 3985static inline unsigned 3986bi_pack_add_icmp_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 3987{ 3988 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 3989 assert(I->result_type < 4); 3990 unsigned result_type = result_type_table[I->result_type]; 3991 assert(result_type < 2); 3992 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 3993 assert(I->cmpf < 9); 3994 unsigned cmpf = cmpf_table[I->cmpf]; 3995 assert(cmpf < 4); 3996 if ((cmpf == 2) || (cmpf == 3)) { 3997 { unsigned temp = src0; src0 = src1; src1 = temp; } 3998 if (cmpf == 2) cmpf = 0; 3999 else if (cmpf == 3) cmpf = 1; 4000 } 4001 4002 unsigned derived_6 = 0; 4003 if (cmpf == 0) derived_6 = 0; 4004 else if (cmpf == 1) derived_6 = 1; 4005 else unreachable("No pattern match at pos 6"); 4006 4007 return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 4008} 4009 4010static inline unsigned 4011bi_pack_add_icmp_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4012{ 4013 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 4014 assert(I->result_type < 4); 4015 unsigned result_type = result_type_table[I->result_type]; 4016 assert(result_type < 2); 4017 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, 2, 3, ~0, ~0, ~0 }; 4018 assert(I->cmpf < 9); 4019 unsigned cmpf = cmpf_table[I->cmpf]; 4020 assert(cmpf < 4); 4021 if ((cmpf == 2) || (cmpf == 3)) { 4022 { unsigned temp = src0; src0 = src1; src1 = temp; } 4023 if (cmpf == 2) cmpf = 0; 4024 else if (cmpf == 3) cmpf = 1; 4025 } 4026 4027 unsigned derived_6 = 0; 4028 if (cmpf == 0) derived_6 = 0; 4029 else if (cmpf == 1) derived_6 = 1; 4030 else unreachable("No pattern match at pos 6"); 4031 4032 return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); 4033} 4034 4035static inline unsigned 4036bi_pack_add_icmpf_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4037{ 4038 4039 return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 4040} 4041 4042static inline unsigned 4043bi_pack_add_icmpi_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4044{ 4045 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 4046 assert(I->result_type < 4); 4047 unsigned result_type = result_type_table[I->result_type]; 4048 assert(result_type < 2); 4049 static uint8_t cmpf_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0 }; 4050 assert(I->cmpf < 9); 4051 unsigned cmpf = cmpf_table[I->cmpf]; 4052 assert(cmpf < 2); 4053 return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 4054} 4055 4056static inline unsigned 4057bi_pack_add_icmpi_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4058{ 4059 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 4060 assert(I->result_type < 4); 4061 unsigned result_type = result_type_table[I->result_type]; 4062 assert(result_type < 2); 4063 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 4064 assert(I->cmpf < 9); 4065 unsigned cmpf = cmpf_table[I->cmpf]; 4066 assert(cmpf < 2); 4067 return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 4068} 4069 4070static inline unsigned 4071bi_pack_add_icmpi_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4072{ 4073 static uint8_t result_type_table[] = { 0, ~0, 1, ~0 }; 4074 assert(I->result_type < 4); 4075 unsigned result_type = result_type_table[I->result_type]; 4076 assert(result_type < 2); 4077 static uint8_t cmpf_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0 }; 4078 assert(I->cmpf < 9); 4079 unsigned cmpf = cmpf_table[I->cmpf]; 4080 assert(cmpf < 2); 4081 return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); 4082} 4083 4084static inline unsigned 4085bi_pack_add_icmpm_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4086{ 4087 4088 return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 4089} 4090 4091static inline unsigned 4092bi_pack_add_ilogb_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4093{ 4094 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4095 assert(I->src[0].swizzle < 13); 4096 unsigned widen0 = widen0_table[I->src[0].swizzle]; 4097 assert(widen0 < 4); 4098 return 0x3d9e0 | (src0 << 0) | (widen0 << 3); 4099} 4100 4101static inline unsigned 4102bi_pack_add_ilogb_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4103{ 4104 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4105 assert(I->src[0].swizzle < 13); 4106 unsigned swz0 = swz0_table[I->src[0].swizzle]; 4107 assert(swz0 < 4); 4108 return 0x3d9c0 | (src0 << 0) | (swz0 << 3); 4109} 4110 4111static inline unsigned 4112bi_pack_add_imov_fma(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4113{ 4114 unsigned threads = I->threads; 4115 assert(threads < 2); 4116 return 0xd7820 | (threads << 3); 4117} 4118 4119static inline unsigned 4120bi_pack_add_isub_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4121{ 4122 unsigned saturate = I->saturate; 4123 assert(saturate < 2); 4124 static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 4125 assert(I->src[1].swizzle < 13); 4126 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 4127 assert(lanes1 < 8); 4128 if (lanes1 == 0) { 4129 return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8); 4130 } else if ((lanes1 == 1) || (lanes1 == 2)) { 4131 unsigned derived_9 = 0; 4132 if (lanes1 == 1) derived_9 = 0; 4133 else if (lanes1 == 2) derived_9 = 1; 4134 else unreachable("No pattern match at pos 9"); 4135 4136 return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 4137 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 4138 unsigned derived_9 = 0; 4139 if (lanes1 == 3) derived_9 = 0; 4140 else if (lanes1 == 4) derived_9 = 1; 4141 else if (lanes1 == 5) derived_9 = 2; 4142 else if (lanes1 == 6) derived_9 = 3; 4143 else unreachable("No pattern match at pos 9"); 4144 4145 return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 4146 } else { 4147 unreachable("No matching state found in add_isub_s32"); 4148 } 4149} 4150 4151static inline unsigned 4152bi_pack_add_isub_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4153{ 4154 unsigned saturate = I->saturate; 4155 assert(saturate < 2); 4156 static uint8_t lanes1_table[] = { 1, 0, ~0, 2, 3, 4, 5, 6, ~0, ~0, ~0, ~0, ~0 }; 4157 assert(I->src[1].swizzle < 13); 4158 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 4159 assert(lanes1 < 8); 4160 if (lanes1 == 0) { 4161 unsigned derived_7 = 0; 4162 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 4163 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 4164 else unreachable("No pattern match at pos 7"); 4165 4166 return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 4167 } else if ((lanes1 == 1) || (lanes1 == 2)) { 4168 unsigned derived_7 = 0; 4169 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 4170 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 4171 else unreachable("No pattern match at pos 7"); 4172 4173 unsigned derived_9 = 0; 4174 if (lanes1 == 1) derived_9 = 0; 4175 else if (lanes1 == 2) derived_9 = 1; 4176 else unreachable("No pattern match at pos 9"); 4177 4178 return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 4179 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { 4180 unsigned derived_7 = 0; 4181 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; 4182 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; 4183 else unreachable("No pattern match at pos 7"); 4184 4185 unsigned derived_9 = 0; 4186 if (lanes1 == 3) derived_9 = 0; 4187 else if (lanes1 == 4) derived_9 = 1; 4188 else if (lanes1 == 5) derived_9 = 2; 4189 else if (lanes1 == 6) derived_9 = 3; 4190 else unreachable("No pattern match at pos 9"); 4191 4192 return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 4193 } else { 4194 unreachable("No matching state found in add_isub_u32"); 4195 } 4196} 4197 4198static inline unsigned 4199bi_pack_add_isub_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4200{ 4201 unsigned saturate = I->saturate; 4202 assert(saturate < 2); 4203 static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4204 assert(I->src[0].swizzle < 13); 4205 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 4206 assert(lanes0 < 2); 4207 static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 4208 assert(I->src[1].swizzle < 13); 4209 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 4210 assert(lanes1 < 8); 4211 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 4212 unsigned derived_9 = 0; 4213 if (lanes1 == 0) derived_9 = 0; 4214 else if (lanes1 == 1) derived_9 = 1; 4215 else unreachable("No pattern match at pos 9"); 4216 4217 unsigned derived_10 = 0; 4218 if (lanes0 == 0) derived_10 = 0; 4219 else if (lanes0 == 1) derived_10 = 1; 4220 else unreachable("No pattern match at pos 10"); 4221 4222 return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); 4223 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 4224 unsigned derived_9 = 0; 4225 if (lanes1 == 2) derived_9 = 0; 4226 else if (lanes1 == 3) derived_9 = 1; 4227 else unreachable("No pattern match at pos 9"); 4228 4229 return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 4230 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 4231 unsigned derived_9 = 0; 4232 if (lanes1 == 4) derived_9 = 0; 4233 else if (lanes1 == 5) derived_9 = 1; 4234 else unreachable("No pattern match at pos 9"); 4235 4236 return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 4237 } else { 4238 unreachable("No matching state found in add_isub_v2s16"); 4239 } 4240} 4241 4242static inline unsigned 4243bi_pack_add_isub_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4244{ 4245 unsigned saturate = I->saturate; 4246 assert(saturate < 2); 4247 static uint8_t lanes0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4248 assert(I->src[0].swizzle < 13); 4249 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 4250 assert(lanes0 < 2); 4251 static uint8_t lanes1_table[] = { 2, 0, 1, 3, ~0, ~0, ~0, ~0, 4, 5, ~0, ~0, ~0 }; 4252 assert(I->src[1].swizzle < 13); 4253 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 4254 assert(lanes1 < 8); 4255 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { 4256 unsigned derived_7 = 0; 4257 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 4258 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 4259 else unreachable("No pattern match at pos 7"); 4260 4261 unsigned derived_9 = 0; 4262 if (lanes1 == 0) derived_9 = 0; 4263 else if (lanes1 == 1) derived_9 = 1; 4264 else unreachable("No pattern match at pos 9"); 4265 4266 unsigned derived_10 = 0; 4267 if (lanes0 == 0) derived_10 = 0; 4268 else if (lanes0 == 1) derived_10 = 1; 4269 else unreachable("No pattern match at pos 10"); 4270 4271 return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); 4272 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { 4273 unsigned derived_7 = 0; 4274 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 4275 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 4276 else unreachable("No pattern match at pos 7"); 4277 4278 unsigned derived_9 = 0; 4279 if (lanes1 == 2) derived_9 = 0; 4280 else if (lanes1 == 3) derived_9 = 1; 4281 else unreachable("No pattern match at pos 9"); 4282 4283 return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 4284 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { 4285 unsigned derived_7 = 0; 4286 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; 4287 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; 4288 else unreachable("No pattern match at pos 7"); 4289 4290 unsigned derived_9 = 0; 4291 if (lanes1 == 4) derived_9 = 0; 4292 else if (lanes1 == 5) derived_9 = 1; 4293 else unreachable("No pattern match at pos 9"); 4294 4295 return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 4296 } else { 4297 unreachable("No matching state found in add_isub_v2u16"); 4298 } 4299} 4300 4301static inline unsigned 4302bi_pack_add_isub_v4s8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4303{ 4304 unsigned saturate = I->saturate; 4305 assert(saturate < 2); 4306 static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4307 assert(I->src[0].swizzle < 13); 4308 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 4309 assert(lanes0 < 8); 4310 static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 4311 assert(I->src[1].swizzle < 13); 4312 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 4313 assert(lanes1 < 8); 4314 if ((lanes0 == 0) && (lanes1 == 0)) { 4315 return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8); 4316 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 4317 unsigned derived_9 = 0; 4318 if (lanes1 == 1) derived_9 = 0; 4319 else if (lanes1 == 2) derived_9 = 1; 4320 else if (lanes1 == 3) derived_9 = 2; 4321 else if (lanes1 == 4) derived_9 = 3; 4322 else unreachable("No pattern match at pos 9"); 4323 4324 return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 4325 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 4326 unsigned derived_9 = 0; 4327 if (lanes1 == 5) derived_9 = 0; 4328 else if (lanes1 == 6) derived_9 = 1; 4329 else unreachable("No pattern match at pos 9"); 4330 4331 return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); 4332 } else { 4333 unreachable("No matching state found in add_isub_v4s8"); 4334 } 4335} 4336 4337static inline unsigned 4338bi_pack_add_isub_v4u8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4339{ 4340 unsigned saturate = I->saturate; 4341 assert(saturate < 2); 4342 static uint8_t lanes0_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4343 assert(I->src[0].swizzle < 13); 4344 unsigned lanes0 = lanes0_table[I->src[0].swizzle]; 4345 assert(lanes0 < 8); 4346 static uint8_t lanes1_table[] = { ~0, 0, ~0, ~0, 1, 2, 3, 4, ~0, ~0, ~0, ~0, ~0 }; 4347 assert(I->src[1].swizzle < 13); 4348 unsigned lanes1 = lanes1_table[I->src[1].swizzle]; 4349 assert(lanes1 < 8); 4350 if ((lanes0 == 0) && (lanes1 == 0)) { 4351 unsigned derived_7 = 0; 4352 if (saturate == 0) derived_7 = 0; 4353 else if (saturate == 1) derived_7 = 1; 4354 else unreachable("No pattern match at pos 7"); 4355 4356 return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); 4357 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { 4358 unsigned derived_7 = 0; 4359 if (saturate == 0) derived_7 = 0; 4360 else if (saturate == 1) derived_7 = 1; 4361 else unreachable("No pattern match at pos 7"); 4362 4363 unsigned derived_9 = 0; 4364 if (lanes1 == 1) derived_9 = 0; 4365 else if (lanes1 == 2) derived_9 = 1; 4366 else if (lanes1 == 3) derived_9 = 2; 4367 else if (lanes1 == 4) derived_9 = 3; 4368 else unreachable("No pattern match at pos 9"); 4369 4370 return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 4371 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { 4372 unsigned derived_7 = 0; 4373 if (saturate == 0) derived_7 = 0; 4374 else if (saturate == 1) derived_7 = 1; 4375 else unreachable("No pattern match at pos 7"); 4376 4377 unsigned derived_9 = 0; 4378 if (lanes1 == 5) derived_9 = 0; 4379 else if (lanes1 == 6) derived_9 = 1; 4380 else unreachable("No pattern match at pos 9"); 4381 4382 return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); 4383 } else { 4384 unreachable("No matching state found in add_isub_v4u8"); 4385 } 4386} 4387 4388static inline unsigned 4389bi_pack_add_jump(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4390{ 4391 assert((1 << src0) & 0xf7); 4392 return 0x6fe34 | (src0 << 6); 4393} 4394 4395static inline unsigned 4396bi_pack_add_ldexp_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4397{ 4398 static uint8_t round_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 4399 assert(I->round < 9); 4400 unsigned round = round_table[I->round]; 4401 assert(round < 8); 4402 return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6); 4403} 4404 4405static inline unsigned 4406bi_pack_add_ldexp_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4407{ 4408 static uint8_t round_table[] = { 0, 1, 2, 3, 4, 5, ~0, 6, 7 }; 4409 assert(I->round < 9); 4410 unsigned round = round_table[I->round]; 4411 assert(round < 8); 4412 return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6); 4413} 4414 4415static inline unsigned 4416bi_pack_add_ld_attr(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4417{ 4418 unsigned register_format = I->register_format; 4419 assert(register_format < 16); 4420 unsigned vecsize = I->vecsize; 4421 assert(vecsize < 4); 4422 if (register_format != 8) { 4423 unsigned derived_13 = 0; 4424 if (register_format == 0) derived_13 = 0; 4425 else if (register_format == 1) derived_13 = 1; 4426 else if (register_format == 2) derived_13 = 2; 4427 else if (register_format == 3) derived_13 = 3; 4428 else if (register_format == 4) derived_13 = 4; 4429 else if (register_format == 5) derived_13 = 5; 4430 else if (register_format == 6) derived_13 = 6; 4431 else if (register_format == 7) derived_13 = 7; 4432 else unreachable("No pattern match at pos 13"); 4433 4434 return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); 4435 } else if (register_format == 8) { 4436 return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); 4437 } else { 4438 unreachable("No matching state found in add_ld_attr"); 4439 } 4440} 4441 4442static inline unsigned 4443bi_pack_add_ld_attr_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4444{ 4445 unsigned register_format = I->register_format; 4446 assert(register_format < 16); 4447 unsigned vecsize = I->vecsize; 4448 assert(vecsize < 4); 4449 unsigned attribute_index = I->attribute_index; 4450 assert(attribute_index < 0x10); 4451 if (register_format != 8) { 4452 unsigned derived_13 = 0; 4453 if (register_format == 0) derived_13 = 0; 4454 else if (register_format == 1) derived_13 = 1; 4455 else if (register_format == 2) derived_13 = 2; 4456 else if (register_format == 3) derived_13 = 3; 4457 else if (register_format == 4) derived_13 = 4; 4458 else if (register_format == 5) derived_13 = 5; 4459 else if (register_format == 6) derived_13 = 6; 4460 else if (register_format == 7) derived_13 = 7; 4461 else unreachable("No pattern match at pos 13"); 4462 4463 return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13); 4464 } else if (register_format == 8) { 4465 return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6); 4466 } else { 4467 unreachable("No matching state found in add_ld_attr_imm"); 4468 } 4469} 4470 4471static inline unsigned 4472bi_pack_add_ld_attr_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4473{ 4474 unsigned register_format = I->register_format; 4475 assert(register_format < 16); 4476 unsigned vecsize = I->vecsize; 4477 assert(vecsize < 4); 4478 if (register_format != 8) { 4479 unsigned derived_13 = 0; 4480 if (register_format == 0) derived_13 = 0; 4481 else if (register_format == 1) derived_13 = 1; 4482 else if (register_format == 2) derived_13 = 2; 4483 else if (register_format == 3) derived_13 = 3; 4484 else if (register_format == 4) derived_13 = 4; 4485 else if (register_format == 5) derived_13 = 5; 4486 else if (register_format == 6) derived_13 = 6; 4487 else if (register_format == 7) derived_13 = 7; 4488 else unreachable("No pattern match at pos 13"); 4489 4490 return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); 4491 } else if (register_format == 8) { 4492 return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); 4493 } else { 4494 unreachable("No matching state found in add_ld_attr_tex"); 4495 } 4496} 4497 4498static inline unsigned 4499bi_pack_add_ld_cvt(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4500{ 4501 assert((1 << src2) & 0xf7); 4502 unsigned vecsize = I->vecsize; 4503 assert(vecsize < 4); 4504 return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 4505} 4506 4507static inline unsigned 4508bi_pack_add_ld_gclk_u64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4509{ 4510 static uint8_t source_table[] = { 0, 6, 7 }; 4511 assert(I->source < 3); 4512 unsigned source = source_table[I->source]; 4513 assert(source < 8); 4514 return 0xd7800 | (source << 0); 4515} 4516 4517static inline unsigned 4518bi_pack_add_ld_tile(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4519{ 4520 assert((1 << src2) & 0xf7); 4521 unsigned vecsize = I->vecsize; 4522 assert(vecsize < 4); 4523 return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 4524} 4525 4526static inline unsigned 4527bi_pack_add_ld_var(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4528{ 4529 unsigned vecsize = I->vecsize; 4530 assert(vecsize < 4); 4531 unsigned update = I->update; 4532 assert(update < 4); 4533 static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; 4534 assert(I->register_format < 10); 4535 unsigned register_format = register_format_table[I->register_format]; 4536 assert(register_format < 4); 4537 unsigned sample = I->sample; 4538 assert(sample < 8); 4539 if (register_format != 2) { 4540 unsigned derived_19 = 0; 4541 if (register_format == 0) derived_19 = 0; 4542 else if (register_format == 1) derived_19 = 1; 4543 else unreachable("No pattern match at pos 19"); 4544 4545 unsigned derived_10 = 0; 4546 if ((sample == 0) && (update == 0)) derived_10 = 0; 4547 else if ((sample == 1) && (update == 0)) derived_10 = 1; 4548 else if ((sample == 2) && (update == 0)) derived_10 = 2; 4549 else if ((sample == 3) && (update == 0)) derived_10 = 3; 4550 else if ((sample == 4) && (update == 1)) derived_10 = 4; 4551 else if ((sample == 0) && (update == 2)) derived_10 = 8; 4552 else if ((sample == 1) && (update == 2)) derived_10 = 9; 4553 else if ((sample == 0) && (update == 3)) derived_10 = 10; 4554 else if ((sample == 1) && (update == 3)) derived_10 = 11; 4555 else if ((sample == 2) && (update == 3)) derived_10 = 12; 4556 else if ((sample == 3) && (update == 3)) derived_10 = 13; 4557 else unreachable("No pattern match at pos 10"); 4558 4559 return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10); 4560 } else if (register_format == 2) { 4561 unsigned derived_10 = 0; 4562 if ((sample == 0) && (update == 0)) derived_10 = 0; 4563 else if ((sample == 1) && (update == 0)) derived_10 = 1; 4564 else if ((sample == 2) && (update == 0)) derived_10 = 2; 4565 else if ((sample == 3) && (update == 0)) derived_10 = 3; 4566 else if ((sample == 4) && (update == 1)) derived_10 = 4; 4567 else if ((sample == 0) && (update == 2)) derived_10 = 8; 4568 else if ((sample == 1) && (update == 2)) derived_10 = 9; 4569 else if ((sample == 0) && (update == 3)) derived_10 = 10; 4570 else if ((sample == 1) && (update == 3)) derived_10 = 11; 4571 else if ((sample == 2) && (update == 3)) derived_10 = 12; 4572 else if ((sample == 3) && (update == 3)) derived_10 = 13; 4573 else unreachable("No pattern match at pos 10"); 4574 4575 return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10); 4576 } else { 4577 unreachable("No matching state found in add_ld_var"); 4578 } 4579} 4580 4581static inline unsigned 4582bi_pack_add_ld_var_flat(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4583{ 4584 unsigned vecsize = I->vecsize; 4585 assert(vecsize < 4); 4586 static uint8_t register_format_table[] = { 1, 0, 3, 2, ~0, ~0, ~0, ~0, 4, 5 }; 4587 assert(I->register_format < 10); 4588 unsigned register_format = register_format_table[I->register_format]; 4589 assert(register_format < 8); 4590 static uint8_t function_table[] = { 0, 3, 6, 7 }; 4591 assert(I->function < 4); 4592 unsigned function = function_table[I->function]; 4593 assert(function < 8); 4594 if (register_format != 4) { 4595 unsigned derived_10 = 0; 4596 if ((register_format == 0) || (register_format == 1)) derived_10 = 0; 4597 else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; 4598 else unreachable("No pattern match at pos 10"); 4599 4600 unsigned derived_19 = 0; 4601 if ((register_format == 0) || (register_format == 2)) derived_19 = 0; 4602 else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; 4603 else unreachable("No pattern match at pos 19"); 4604 4605 return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19); 4606 } else if (register_format == 4) { 4607 return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0); 4608 } else { 4609 unreachable("No matching state found in add_ld_var_flat"); 4610 } 4611} 4612 4613static inline unsigned 4614bi_pack_add_ld_var_flat_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4615{ 4616 unsigned vecsize = I->vecsize; 4617 assert(vecsize < 4); 4618 static uint8_t register_format_table[] = { 1, 0, 3, 2, ~0, ~0, ~0, ~0, 4, 5 }; 4619 assert(I->register_format < 10); 4620 unsigned register_format = register_format_table[I->register_format]; 4621 assert(register_format < 8); 4622 static uint8_t function_table[] = { 0, 3, 6, 7 }; 4623 assert(I->function < 4); 4624 unsigned function = function_table[I->function]; 4625 assert(function < 8); 4626 unsigned index = I->index; 4627 assert(index < 0x20); 4628 if (register_format != 4) { 4629 unsigned derived_10 = 0; 4630 if ((register_format == 0) || (register_format == 1)) derived_10 = 0; 4631 else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; 4632 else unreachable("No pattern match at pos 10"); 4633 4634 unsigned derived_19 = 0; 4635 if ((register_format == 0) || (register_format == 2)) derived_19 = 0; 4636 else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; 4637 else unreachable("No pattern match at pos 19"); 4638 4639 return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19); 4640 } else if (register_format == 4) { 4641 return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3); 4642 } else { 4643 unreachable("No matching state found in add_ld_var_flat_imm"); 4644 } 4645} 4646 4647static inline unsigned 4648bi_pack_add_ld_var_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4649{ 4650 unsigned vecsize = I->vecsize; 4651 assert(vecsize < 4); 4652 unsigned update = I->update; 4653 assert(update < 4); 4654 static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; 4655 assert(I->register_format < 10); 4656 unsigned register_format = register_format_table[I->register_format]; 4657 assert(register_format < 4); 4658 unsigned sample = I->sample; 4659 assert(sample < 8); 4660 unsigned index = I->index; 4661 assert(index < 0x20); 4662 if (register_format != 2) { 4663 unsigned derived_19 = 0; 4664 if (register_format == 0) derived_19 = 0; 4665 else if (register_format == 1) derived_19 = 1; 4666 else unreachable("No pattern match at pos 19"); 4667 4668 unsigned derived_10 = 0; 4669 if ((sample == 0) && (update == 0)) derived_10 = 0; 4670 else if ((sample == 1) && (update == 0)) derived_10 = 1; 4671 else if ((sample == 2) && (update == 0)) derived_10 = 2; 4672 else if ((sample == 3) && (update == 0)) derived_10 = 3; 4673 else if ((sample == 4) && (update == 1)) derived_10 = 4; 4674 else if ((sample == 0) && (update == 2)) derived_10 = 8; 4675 else if ((sample == 1) && (update == 2)) derived_10 = 9; 4676 else if ((sample == 0) && (update == 3)) derived_10 = 10; 4677 else if ((sample == 1) && (update == 3)) derived_10 = 11; 4678 else if ((sample == 2) && (update == 3)) derived_10 = 12; 4679 else if ((sample == 3) && (update == 3)) derived_10 = 13; 4680 else unreachable("No pattern match at pos 10"); 4681 4682 return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10); 4683 } else if (register_format == 2) { 4684 unsigned derived_10 = 0; 4685 if ((sample == 0) && (update == 0)) derived_10 = 0; 4686 else if ((sample == 1) && (update == 0)) derived_10 = 1; 4687 else if ((sample == 2) && (update == 0)) derived_10 = 2; 4688 else if ((sample == 3) && (update == 0)) derived_10 = 3; 4689 else if ((sample == 4) && (update == 1)) derived_10 = 4; 4690 else if ((sample == 0) && (update == 2)) derived_10 = 8; 4691 else if ((sample == 1) && (update == 2)) derived_10 = 9; 4692 else if ((sample == 0) && (update == 3)) derived_10 = 10; 4693 else if ((sample == 1) && (update == 3)) derived_10 = 11; 4694 else if ((sample == 2) && (update == 3)) derived_10 = 12; 4695 else if ((sample == 3) && (update == 3)) derived_10 = 13; 4696 else unreachable("No pattern match at pos 10"); 4697 4698 return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10); 4699 } else { 4700 unreachable("No matching state found in add_ld_var_imm"); 4701 } 4702} 4703 4704static inline unsigned 4705bi_pack_add_ld_var_special(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4706{ 4707 unsigned varying_name = I->varying_name; 4708 assert(varying_name < 32); 4709 unsigned vecsize = I->vecsize; 4710 assert(vecsize < 4); 4711 unsigned update = I->update; 4712 assert(update < 4); 4713 static uint8_t register_format_table[] = { 1, 0, ~0, ~0, ~0, ~0, ~0, ~0, 2, 3 }; 4714 assert(I->register_format < 10); 4715 unsigned register_format = register_format_table[I->register_format]; 4716 assert(register_format < 4); 4717 unsigned sample = I->sample; 4718 assert(sample < 8); 4719 if (register_format != 2) { 4720 unsigned derived_3 = 0; 4721 if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; 4722 else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; 4723 else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; 4724 else unreachable("No pattern match at pos 3"); 4725 4726 unsigned derived_19 = 0; 4727 if (register_format == 0) derived_19 = 0; 4728 else if (register_format == 1) derived_19 = 1; 4729 else unreachable("No pattern match at pos 19"); 4730 4731 unsigned derived_10 = 0; 4732 if ((sample == 0) && (update == 0)) derived_10 = 0; 4733 else if ((sample == 1) && (update == 0)) derived_10 = 1; 4734 else if ((sample == 2) && (update == 0)) derived_10 = 2; 4735 else if ((sample == 3) && (update == 0)) derived_10 = 3; 4736 else if ((sample == 4) && (update == 1)) derived_10 = 4; 4737 else if ((sample == 0) && (update == 2)) derived_10 = 8; 4738 else if ((sample == 1) && (update == 2)) derived_10 = 9; 4739 else if ((sample == 0) && (update == 3)) derived_10 = 10; 4740 else if ((sample == 1) && (update == 3)) derived_10 = 11; 4741 else if ((sample == 2) && (update == 3)) derived_10 = 12; 4742 else if ((sample == 3) && (update == 3)) derived_10 = 13; 4743 else unreachable("No pattern match at pos 10"); 4744 4745 return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10); 4746 } else if (register_format == 2) { 4747 unsigned derived_3 = 0; 4748 if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; 4749 else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; 4750 else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; 4751 else unreachable("No pattern match at pos 3"); 4752 4753 unsigned derived_10 = 0; 4754 if ((sample == 0) && (update == 0)) derived_10 = 0; 4755 else if ((sample == 1) && (update == 0)) derived_10 = 1; 4756 else if ((sample == 2) && (update == 0)) derived_10 = 2; 4757 else if ((sample == 3) && (update == 0)) derived_10 = 3; 4758 else if ((sample == 4) && (update == 1)) derived_10 = 4; 4759 else if ((sample == 0) && (update == 2)) derived_10 = 8; 4760 else if ((sample == 1) && (update == 2)) derived_10 = 9; 4761 else if ((sample == 0) && (update == 3)) derived_10 = 10; 4762 else if ((sample == 1) && (update == 3)) derived_10 = 11; 4763 else if ((sample == 2) && (update == 3)) derived_10 = 12; 4764 else if ((sample == 3) && (update == 3)) derived_10 = 13; 4765 else unreachable("No pattern match at pos 10"); 4766 4767 return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10); 4768 } else { 4769 unreachable("No matching state found in add_ld_var_special"); 4770 } 4771} 4772 4773static inline unsigned 4774bi_pack_add_lea_attr(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4775{ 4776 unsigned register_format = I->register_format; 4777 assert(register_format < 16); 4778 if (register_format != 8) { 4779 unsigned derived_11 = 0; 4780 if (register_format == 0) derived_11 = 0; 4781 else if (register_format == 1) derived_11 = 1; 4782 else if (register_format == 2) derived_11 = 2; 4783 else if (register_format == 3) derived_11 = 3; 4784 else if (register_format == 4) derived_11 = 4; 4785 else if (register_format == 5) derived_11 = 5; 4786 else if (register_format == 6) derived_11 = 6; 4787 else if (register_format == 7) derived_11 = 7; 4788 else unreachable("No pattern match at pos 11"); 4789 4790 return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); 4791 } else if (register_format == 8) { 4792 return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6); 4793 } else { 4794 unreachable("No matching state found in add_lea_attr"); 4795 } 4796} 4797 4798static inline unsigned 4799bi_pack_add_lea_attr_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4800{ 4801 unsigned register_format = I->register_format; 4802 assert(register_format < 16); 4803 unsigned attribute_index = I->attribute_index; 4804 assert(attribute_index < 0x10); 4805 if (register_format != 8) { 4806 unsigned derived_11 = 0; 4807 if (register_format == 0) derived_11 = 0; 4808 else if (register_format == 1) derived_11 = 1; 4809 else if (register_format == 2) derived_11 = 2; 4810 else if (register_format == 3) derived_11 = 3; 4811 else if (register_format == 4) derived_11 = 4; 4812 else if (register_format == 5) derived_11 = 5; 4813 else if (register_format == 6) derived_11 = 6; 4814 else if (register_format == 7) derived_11 = 7; 4815 else unreachable("No pattern match at pos 11"); 4816 4817 return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11); 4818 } else if (register_format == 8) { 4819 return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6); 4820 } else { 4821 unreachable("No matching state found in add_lea_attr_imm"); 4822 } 4823} 4824 4825static inline unsigned 4826bi_pack_add_lea_attr_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4827{ 4828 unsigned register_format = I->register_format; 4829 assert(register_format < 16); 4830 if (register_format != 8) { 4831 unsigned derived_11 = 0; 4832 if (register_format == 0) derived_11 = 0; 4833 else if (register_format == 1) derived_11 = 1; 4834 else if (register_format == 2) derived_11 = 2; 4835 else if (register_format == 3) derived_11 = 3; 4836 else if (register_format == 4) derived_11 = 4; 4837 else if (register_format == 5) derived_11 = 5; 4838 else if (register_format == 6) derived_11 = 6; 4839 else if (register_format == 7) derived_11 = 7; 4840 else unreachable("No pattern match at pos 11"); 4841 4842 return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); 4843 } else if (register_format == 8) { 4844 return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6); 4845 } else { 4846 unreachable("No matching state found in add_lea_attr_tex"); 4847 } 4848} 4849 4850static inline unsigned 4851bi_pack_add_lea_tex(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4852{ 4853 unsigned format = I->format; 4854 assert(format < 2); 4855 return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11); 4856} 4857 4858static inline unsigned 4859bi_pack_add_lea_tex_imm(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4860{ 4861 unsigned format = I->format; 4862 assert(format < 2); 4863 unsigned texture_index = I->texture_index; 4864 assert(texture_index < 0x20); 4865 return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6); 4866} 4867 4868static inline unsigned 4869bi_pack_add_load_i128(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4870{ 4871 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4872 assert(I->seg < 6); 4873 unsigned seg = seg_table[I->seg]; 4874 assert(seg < 8); 4875 return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6); 4876} 4877 4878static inline unsigned 4879bi_pack_add_load_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4880{ 4881 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4882 assert(I->seg < 6); 4883 unsigned seg = seg_table[I->seg]; 4884 assert(seg < 8); 4885 static uint8_t lane_dest_table[] = { 0, 2, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4886 assert(I->dest->swizzle < 13); 4887 unsigned lane_dest = lane_dest_table[I->dest->swizzle]; 4888 assert(lane_dest < 4); 4889 unsigned extend = I->extend; 4890 assert(extend < 4); 4891 if ((extend == 0) && ((lane_dest == 0) || (lane_dest == 1))) { 4892 unsigned derived_9 = 0; 4893 if (lane_dest == 0) derived_9 = 0; 4894 else if (lane_dest == 1) derived_9 = 1; 4895 else unreachable("No pattern match at pos 9"); 4896 4897 return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 4898 } else if ((extend != 0) && (lane_dest == 2)) { 4899 unsigned derived_9 = 0; 4900 if (extend == 1) derived_9 = 0; 4901 else if (extend == 2) derived_9 = 1; 4902 else unreachable("No pattern match at pos 9"); 4903 4904 return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 4905 } else if ((extend != 0) && (lane_dest == 3)) { 4906 unsigned derived_9 = 0; 4907 if (extend == 1) derived_9 = 0; 4908 else if (extend == 2) derived_9 = 1; 4909 else unreachable("No pattern match at pos 9"); 4910 4911 return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 4912 } else { 4913 unreachable("No matching state found in add_load_i16"); 4914 } 4915} 4916 4917static inline unsigned 4918bi_pack_add_load_i24(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4919{ 4920 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4921 assert(I->seg < 6); 4922 unsigned seg = seg_table[I->seg]; 4923 assert(seg < 8); 4924 return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6); 4925} 4926 4927static inline unsigned 4928bi_pack_add_load_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4929{ 4930 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4931 assert(I->seg < 6); 4932 unsigned seg = seg_table[I->seg]; 4933 assert(seg < 8); 4934 static uint8_t lane_dest_table[] = { ~0, 0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 4935 assert(I->dest->swizzle < 13); 4936 unsigned lane_dest = lane_dest_table[I->dest->swizzle]; 4937 assert(lane_dest < 2); 4938 unsigned extend = I->extend; 4939 assert(extend < 4); 4940 if ((extend == 0) && (lane_dest == 0)) { 4941 return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6); 4942 } else if ((extend != 0) && (lane_dest == 1)) { 4943 unsigned derived_9 = 0; 4944 if (extend == 1) derived_9 = 0; 4945 else if (extend == 2) derived_9 = 1; 4946 else unreachable("No pattern match at pos 9"); 4947 4948 return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 4949 } else { 4950 unreachable("No matching state found in add_load_i32"); 4951 } 4952} 4953 4954static inline unsigned 4955bi_pack_add_load_i48(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4956{ 4957 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4958 assert(I->seg < 6); 4959 unsigned seg = seg_table[I->seg]; 4960 assert(seg < 8); 4961 return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6); 4962} 4963 4964static inline unsigned 4965bi_pack_add_load_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4966{ 4967 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4968 assert(I->seg < 6); 4969 unsigned seg = seg_table[I->seg]; 4970 assert(seg < 8); 4971 return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6); 4972} 4973 4974static inline unsigned 4975bi_pack_add_load_i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 4976{ 4977 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 4978 assert(I->seg < 6); 4979 unsigned seg = seg_table[I->seg]; 4980 assert(seg < 8); 4981 static uint8_t lane_dest_table[] = { 4, 6, ~0, 5, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 4982 assert(I->dest->swizzle < 13); 4983 unsigned lane_dest = lane_dest_table[I->dest->swizzle]; 4984 assert(lane_dest < 8); 4985 unsigned extend = I->extend; 4986 assert(extend < 4); 4987 if ((extend == 0) && ((lane_dest == 0) || (lane_dest == 1) || (lane_dest == 2) || (lane_dest == 3))) { 4988 unsigned derived_9 = 0; 4989 if (lane_dest == 0) derived_9 = 0; 4990 else if (lane_dest == 1) derived_9 = 1; 4991 else if (lane_dest == 2) derived_9 = 2; 4992 else if (lane_dest == 3) derived_9 = 3; 4993 else unreachable("No pattern match at pos 9"); 4994 4995 return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 4996 } else if ((extend != 0) && ((lane_dest == 4) || (lane_dest == 5))) { 4997 unsigned derived_9 = 0; 4998 if (extend == 1) derived_9 = 0; 4999 else if (extend == 2) derived_9 = 1; 5000 else unreachable("No pattern match at pos 9"); 5001 5002 unsigned derived_10 = 0; 5003 if (lane_dest == 4) derived_10 = 0; 5004 else if (lane_dest == 5) derived_10 = 1; 5005 else unreachable("No pattern match at pos 10"); 5006 5007 return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10); 5008 } else if ((extend != 0) && (lane_dest == 6)) { 5009 unsigned derived_9 = 0; 5010 if (extend == 1) derived_9 = 0; 5011 else if (extend == 2) derived_9 = 1; 5012 else unreachable("No pattern match at pos 9"); 5013 5014 return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 5015 } else if ((extend != 0) && (lane_dest == 7)) { 5016 unsigned derived_9 = 0; 5017 if (extend == 1) derived_9 = 0; 5018 else if (extend == 2) derived_9 = 1; 5019 else unreachable("No pattern match at pos 9"); 5020 5021 return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); 5022 } else { 5023 unreachable("No matching state found in add_load_i8"); 5024 } 5025} 5026 5027static inline unsigned 5028bi_pack_add_load_i96(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5029{ 5030 static uint8_t seg_table[] = { 1, 2, 0, 3, 4, 7 }; 5031 assert(I->seg < 6); 5032 unsigned seg = seg_table[I->seg]; 5033 assert(seg < 8); 5034 return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6); 5035} 5036 5037static inline unsigned 5038bi_pack_add_logb_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5039{ 5040 static uint8_t widen0_table[] = { 2, 1, ~0, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5041 assert(I->src[0].swizzle < 13); 5042 unsigned widen0 = widen0_table[I->src[0].swizzle]; 5043 assert(widen0 < 4); 5044 return 0x3d9a0 | (src0 << 0) | (widen0 << 3); 5045} 5046 5047static inline unsigned 5048bi_pack_add_logb_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5049{ 5050 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5051 assert(I->src[0].swizzle < 13); 5052 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5053 assert(swz0 < 4); 5054 return 0x3d980 | (src0 << 0) | (swz0 << 3); 5055} 5056 5057static inline unsigned 5058bi_pack_add_mkvec_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5059{ 5060 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5061 assert(I->src[0].swizzle < 13); 5062 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5063 assert(lane0 < 2); 5064 static uint8_t lane1_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5065 assert(I->src[1].swizzle < 13); 5066 unsigned lane1 = lane1_table[I->src[1].swizzle]; 5067 assert(lane1 < 2); 5068 return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); 5069} 5070 5071static inline unsigned 5072bi_pack_add_mov_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5073{ 5074 5075 return 0x3d968 | (src0 << 0); 5076} 5077 5078static inline unsigned 5079bi_pack_add_mux_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5080{ 5081 unsigned mux = I->mux; 5082 assert(mux < 4); 5083 return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); 5084} 5085 5086static inline unsigned 5087bi_pack_add_mux_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5088{ 5089 unsigned mux = I->mux; 5090 assert(mux < 4); 5091 static uint8_t swap2_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5092 assert(I->src[2].swizzle < 13); 5093 unsigned swap2 = swap2_table[I->src[2].swizzle]; 5094 assert(swap2 < 2); 5095 static uint8_t swap1_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5096 assert(I->src[1].swizzle < 13); 5097 unsigned swap1 = swap1_table[I->src[1].swizzle]; 5098 assert(swap1 < 2); 5099 static uint8_t swap0_table[] = { ~0, 0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5100 assert(I->src[0].swizzle < 13); 5101 unsigned swap0 = swap0_table[I->src[0].swizzle]; 5102 assert(swap0 < 2); 5103 return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13); 5104} 5105 5106static inline unsigned 5107bi_pack_add_mux_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5108{ 5109 unsigned mux = I->mux; 5110 assert(mux < 2); 5111 return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); 5112} 5113 5114static inline unsigned 5115bi_pack_add_nop(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5116{ 5117 5118 return 0x3d964; 5119} 5120 5121static inline unsigned 5122bi_pack_add_quiet_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5123{ 5124 5125 return 0x3d970 | (src0 << 0); 5126} 5127 5128static inline unsigned 5129bi_pack_add_quiet_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5130{ 5131 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5132 assert(I->src[0].swizzle < 13); 5133 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5134 assert(swz0 < 4); 5135 return 0x3d900 | (src0 << 0) | (swz0 << 4); 5136} 5137 5138static inline unsigned 5139bi_pack_add_s16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5140{ 5141 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5142 assert(I->src[0].swizzle < 13); 5143 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5144 assert(lane0 < 2); 5145 return 0x3cce0 | (src0 << 0) | (lane0 << 4); 5146} 5147 5148static inline unsigned 5149bi_pack_add_s16_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5150{ 5151 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5152 assert(I->src[0].swizzle < 13); 5153 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5154 assert(lane0 < 2); 5155 return 0x3ccc0 | (src0 << 0) | (lane0 << 4); 5156} 5157 5158static inline unsigned 5159bi_pack_add_s32_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5160{ 5161 unsigned round = I->round; 5162 assert(round < 8); 5163 if (round != 4) { 5164 unsigned derived_4 = 0; 5165 if (round == 0) derived_4 = 0; 5166 else if (round == 1) derived_4 = 1; 5167 else if (round == 2) derived_4 = 2; 5168 else if (round == 3) derived_4 = 3; 5169 else unreachable("No pattern match at pos 4"); 5170 5171 return 0x3cbc0 | (src0 << 0) | (derived_4 << 4); 5172 } else if (round == 4) { 5173 return 0x3cd00 | (src0 << 0); 5174 } else { 5175 unreachable("No matching state found in add_s32_to_f32"); 5176 } 5177} 5178 5179static inline unsigned 5180bi_pack_add_s8_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5181{ 5182 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 5183 assert(I->src[0].swizzle < 13); 5184 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5185 assert(lane0 < 4); 5186 return 0x3cb80 | (src0 << 0) | (lane0 << 4); 5187} 5188 5189static inline unsigned 5190bi_pack_add_s8_to_s32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5191{ 5192 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 5193 assert(I->src[0].swizzle < 13); 5194 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5195 assert(lane0 < 4); 5196 return 0x3cb40 | (src0 << 0) | (lane0 << 4); 5197} 5198 5199static inline unsigned 5200bi_pack_add_seg_add(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5201{ 5202 static uint8_t seg_table[] = { ~0, 2, 0, ~0, ~0, 7 }; 5203 assert(I->seg < 6); 5204 unsigned seg = seg_table[I->seg]; 5205 assert(seg < 8); 5206 unsigned preserve_null = I->preserve_null; 5207 assert(preserve_null < 2); 5208 return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); 5209} 5210 5211static inline unsigned 5212bi_pack_add_shaddxh_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5213{ 5214 5215 return 0x3f8c0 | (src0 << 0) | (src1 << 3); 5216} 5217 5218static inline unsigned 5219bi_pack_add_shift_double_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5220{ 5221 5222 return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); 5223} 5224 5225static inline unsigned 5226bi_pack_add_store_i128(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5227{ 5228 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5229 assert(I->seg < 6); 5230 unsigned seg = seg_table[I->seg]; 5231 assert(seg < 8); 5232 return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6); 5233} 5234 5235static inline unsigned 5236bi_pack_add_store_i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5237{ 5238 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5239 assert(I->seg < 6); 5240 unsigned seg = seg_table[I->seg]; 5241 assert(seg < 8); 5242 return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6); 5243} 5244 5245static inline unsigned 5246bi_pack_add_store_i24(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5247{ 5248 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5249 assert(I->seg < 6); 5250 unsigned seg = seg_table[I->seg]; 5251 assert(seg < 8); 5252 return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6); 5253} 5254 5255static inline unsigned 5256bi_pack_add_store_i32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5257{ 5258 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5259 assert(I->seg < 6); 5260 unsigned seg = seg_table[I->seg]; 5261 assert(seg < 8); 5262 return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6); 5263} 5264 5265static inline unsigned 5266bi_pack_add_store_i48(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5267{ 5268 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5269 assert(I->seg < 6); 5270 unsigned seg = seg_table[I->seg]; 5271 assert(seg < 8); 5272 return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6); 5273} 5274 5275static inline unsigned 5276bi_pack_add_store_i64(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5277{ 5278 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5279 assert(I->seg < 6); 5280 unsigned seg = seg_table[I->seg]; 5281 assert(seg < 8); 5282 return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6); 5283} 5284 5285static inline unsigned 5286bi_pack_add_store_i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5287{ 5288 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5289 assert(I->seg < 6); 5290 unsigned seg = seg_table[I->seg]; 5291 assert(seg < 8); 5292 return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6); 5293} 5294 5295static inline unsigned 5296bi_pack_add_store_i96(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5297{ 5298 static uint8_t seg_table[] = { 1, 2, 0, 3, ~0, 7 }; 5299 assert(I->seg < 6); 5300 unsigned seg = seg_table[I->seg]; 5301 assert(seg < 8); 5302 return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6); 5303} 5304 5305static inline unsigned 5306bi_pack_add_st_cvt(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5307{ 5308 assert((1 << src2) & 0xf7); 5309 unsigned vecsize = I->vecsize; 5310 assert(vecsize < 4); 5311 return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 5312} 5313 5314static inline unsigned 5315bi_pack_add_st_tile(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5316{ 5317 assert((1 << src2) & 0xf7); 5318 unsigned vecsize = I->vecsize; 5319 assert(vecsize < 4); 5320 return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); 5321} 5322 5323static inline unsigned 5324bi_pack_add_swz_v2i16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5325{ 5326 static uint8_t swz0_table[] = { 0, ~0, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5327 assert(I->src[0].swizzle < 13); 5328 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5329 assert(swz0 < 4); 5330 return 0x3d948 | (src0 << 0) | (swz0 << 4); 5331} 5332 5333static inline unsigned 5334bi_pack_add_swz_v4i8(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5335{ 5336 static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, 4, 5, 6, 7, ~0 }; 5337 assert(I->src[0].swizzle < 13); 5338 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5339 assert(swz0 < 8); 5340 return 0x3df40 | (src0 << 0) | (swz0 << 3); 5341} 5342 5343static inline unsigned 5344bi_pack_add_texc(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5345{ 5346 assert((1 << src2) & 0xf7); 5347 unsigned skip = I->skip; 5348 assert(skip < 2); 5349 return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9); 5350} 5351 5352static inline unsigned 5353bi_pack_add_texs_2d_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5354{ 5355 unsigned skip = I->skip; 5356 assert(skip < 2); 5357 unsigned lod_mode = I->lod_mode; 5358 assert(lod_mode < 2); 5359 unsigned texture_index = I->texture_index; 5360 assert(texture_index < 0x8); 5361 unsigned sampler_index = I->sampler_index; 5362 assert(sampler_index < 0x8); 5363 return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); 5364} 5365 5366static inline unsigned 5367bi_pack_add_texs_2d_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5368{ 5369 unsigned skip = I->skip; 5370 assert(skip < 2); 5371 unsigned lod_mode = I->lod_mode; 5372 assert(lod_mode < 2); 5373 unsigned texture_index = I->texture_index; 5374 assert(texture_index < 0x8); 5375 unsigned sampler_index = I->sampler_index; 5376 assert(sampler_index < 0x8); 5377 return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); 5378} 5379 5380static inline unsigned 5381bi_pack_add_texs_cube_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5382{ 5383 unsigned skip = I->skip; 5384 assert(skip < 2); 5385 unsigned sampler_index = I->sampler_index; 5386 assert(sampler_index < 0x4); 5387 unsigned texture_index = I->texture_index; 5388 assert(texture_index < 0x4); 5389 return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); 5390} 5391 5392static inline unsigned 5393bi_pack_add_texs_cube_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5394{ 5395 unsigned skip = I->skip; 5396 assert(skip < 2); 5397 unsigned sampler_index = I->sampler_index; 5398 assert(sampler_index < 0x4); 5399 unsigned texture_index = I->texture_index; 5400 assert(texture_index < 0x4); 5401 return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); 5402} 5403 5404static inline unsigned 5405bi_pack_add_u16_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5406{ 5407 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5408 assert(I->src[0].swizzle < 13); 5409 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5410 assert(lane0 < 2); 5411 return 0x3cce8 | (src0 << 0) | (lane0 << 4); 5412} 5413 5414static inline unsigned 5415bi_pack_add_u16_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5416{ 5417 static uint8_t lane0_table[] = { 0, ~0, ~0, 1, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5418 assert(I->src[0].swizzle < 13); 5419 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5420 assert(lane0 < 2); 5421 return 0x3ccc8 | (src0 << 0) | (lane0 << 4); 5422} 5423 5424static inline unsigned 5425bi_pack_add_u32_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5426{ 5427 unsigned round = I->round; 5428 assert(round < 8); 5429 if (round != 4) { 5430 unsigned derived_4 = 0; 5431 if (round == 0) derived_4 = 0; 5432 else if (round == 1) derived_4 = 1; 5433 else if (round == 2) derived_4 = 2; 5434 else if (round == 3) derived_4 = 3; 5435 else unreachable("No pattern match at pos 4"); 5436 5437 return 0x3cbc8 | (src0 << 0) | (derived_4 << 4); 5438 } else if (round == 4) { 5439 return 0x3cd08 | (src0 << 0); 5440 } else { 5441 unreachable("No matching state found in add_u32_to_f32"); 5442 } 5443} 5444 5445static inline unsigned 5446bi_pack_add_u8_to_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5447{ 5448 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 5449 assert(I->src[0].swizzle < 13); 5450 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5451 assert(lane0 < 4); 5452 return 0x3cb88 | (src0 << 0) | (lane0 << 4); 5453} 5454 5455static inline unsigned 5456bi_pack_add_u8_to_u32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5457{ 5458 static uint8_t lane0_table[] = { ~0, ~0, ~0, ~0, 0, 1, 2, 3, ~0, ~0, ~0, ~0, ~0 }; 5459 assert(I->src[0].swizzle < 13); 5460 unsigned lane0 = lane0_table[I->src[0].swizzle]; 5461 assert(lane0 < 4); 5462 return 0x3cb48 | (src0 << 0) | (lane0 << 4); 5463} 5464 5465static inline unsigned 5466bi_pack_add_v2f16_to_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5467{ 5468 unsigned round = I->round; 5469 assert(round < 8); 5470 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5471 assert(I->src[0].swizzle < 13); 5472 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5473 assert(swz0 < 4); 5474 if (round != 4) { 5475 unsigned derived_4 = 0; 5476 if (round == 0) derived_4 = 0; 5477 else if (round == 1) derived_4 = 1; 5478 else if (round == 2) derived_4 = 2; 5479 else if (round == 3) derived_4 = 3; 5480 else unreachable("No pattern match at pos 4"); 5481 5482 return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 5483 } else if (round == 4) { 5484 return 0x3ca80 | (src0 << 0) | (swz0 << 4); 5485 } else { 5486 unreachable("No matching state found in add_v2f16_to_v2s16"); 5487 } 5488} 5489 5490static inline unsigned 5491bi_pack_add_v2f16_to_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5492{ 5493 unsigned round = I->round; 5494 assert(round < 8); 5495 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5496 assert(I->src[0].swizzle < 13); 5497 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5498 assert(swz0 < 4); 5499 if (round != 4) { 5500 unsigned derived_4 = 0; 5501 if (round == 0) derived_4 = 0; 5502 else if (round == 1) derived_4 = 1; 5503 else if (round == 2) derived_4 = 2; 5504 else if (round == 3) derived_4 = 3; 5505 else unreachable("No pattern match at pos 4"); 5506 5507 return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 5508 } else if (round == 4) { 5509 return 0x3ca88 | (src0 << 0) | (swz0 << 4); 5510 } else { 5511 unreachable("No matching state found in add_v2f16_to_v2u16"); 5512 } 5513} 5514 5515static inline unsigned 5516bi_pack_add_v2f32_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5517{ 5518 unsigned abs0 = I->src[0].abs; 5519 assert(abs0 < 2); 5520 unsigned abs1 = I->src[1].abs; 5521 assert(abs1 < 2); 5522 unsigned neg0 = I->src[0].neg; 5523 assert(neg0 < 2); 5524 unsigned neg1 = I->src[1].neg; 5525 assert(neg1 < 2); 5526 unsigned clamp = I->clamp; 5527 assert(clamp < 4); 5528 unsigned round = I->round; 5529 assert(round < 8); 5530 unsigned derived_6 = 0; 5531 if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; 5532 else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; 5533 else unreachable("No pattern match at pos 6"); 5534 5535 unsigned derived_7 = 0; 5536 if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; 5537 else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; 5538 else unreachable("No pattern match at pos 7"); 5539 5540 return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); 5541} 5542 5543static inline unsigned 5544bi_pack_add_v2s16_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5545{ 5546 unsigned round = I->round; 5547 assert(round < 8); 5548 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5549 assert(I->src[0].swizzle < 13); 5550 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5551 assert(swz0 < 4); 5552 if (round != 4) { 5553 unsigned derived_4 = 0; 5554 if (round == 0) derived_4 = 0; 5555 else if (round == 1) derived_4 = 1; 5556 else if (round == 2) derived_4 = 2; 5557 else if (round == 3) derived_4 = 3; 5558 else unreachable("No pattern match at pos 4"); 5559 5560 return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 5561 } else if (round == 4) { 5562 return 0x3cb00 | (src0 << 0) | (swz0 << 4); 5563 } else { 5564 unreachable("No matching state found in add_v2s16_to_v2f16"); 5565 } 5566} 5567 5568static inline unsigned 5569bi_pack_add_v2s8_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5570{ 5571 static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 5572 assert(I->src[0].swizzle < 13); 5573 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5574 assert(swz0 < 16); 5575 return 0x3c800 | (src0 << 0) | (swz0 << 4); 5576} 5577 5578static inline unsigned 5579bi_pack_add_v2s8_to_v2s16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5580{ 5581 static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 5582 assert(I->src[0].swizzle < 13); 5583 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5584 assert(swz0 < 16); 5585 return 0x3c700 | (src0 << 0) | (swz0 << 4); 5586} 5587 5588static inline unsigned 5589bi_pack_add_v2u16_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5590{ 5591 unsigned round = I->round; 5592 assert(round < 8); 5593 static uint8_t swz0_table[] = { 0, 2, 1, 3, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }; 5594 assert(I->src[0].swizzle < 13); 5595 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5596 assert(swz0 < 4); 5597 if (round != 4) { 5598 unsigned derived_4 = 0; 5599 if (round == 0) derived_4 = 0; 5600 else if (round == 1) derived_4 = 1; 5601 else if (round == 2) derived_4 = 2; 5602 else if (round == 3) derived_4 = 3; 5603 else unreachable("No pattern match at pos 4"); 5604 5605 return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); 5606 } else if (round == 4) { 5607 return 0x3cb08 | (src0 << 0) | (swz0 << 4); 5608 } else { 5609 unreachable("No matching state found in add_v2u16_to_v2f16"); 5610 } 5611} 5612 5613static inline unsigned 5614bi_pack_add_v2u8_to_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5615{ 5616 static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 5617 assert(I->src[0].swizzle < 13); 5618 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5619 assert(swz0 < 16); 5620 return 0x3c808 | (src0 << 0) | (swz0 << 4); 5621} 5622 5623static inline unsigned 5624bi_pack_add_v2u8_to_v2u16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5625{ 5626 static uint8_t swz0_table[] = { ~0, ~0, ~0, ~0, 0, 5, 10, 15, 4, 14, ~0, ~0, 8 }; 5627 assert(I->src[0].swizzle < 13); 5628 unsigned swz0 = swz0_table[I->src[0].swizzle]; 5629 assert(swz0 < 16); 5630 return 0x3c708 | (src0 << 0) | (swz0 << 4); 5631} 5632 5633static inline unsigned 5634bi_pack_add_var_tex_f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5635{ 5636 unsigned update = I->update; 5637 assert(update < 2); 5638 unsigned skip = I->skip; 5639 assert(skip < 2); 5640 unsigned lod_mode = I->lod_mode; 5641 assert(lod_mode < 2); 5642 static uint8_t sample_table[] = { 0, ~0, ~0, ~0, 1, ~0 }; 5643 assert(I->sample < 6); 5644 unsigned sample = sample_table[I->sample]; 5645 assert(sample < 2); 5646 unsigned varying_index = I->varying_index; 5647 assert(varying_index < 0x8); 5648 unsigned texture_index = I->texture_index; 5649 assert(texture_index < 0x4); 5650 unsigned derived_5 = 0; 5651 if ((sample == 0) && (update == 0)) derived_5 = 0; 5652 else if ((sample == 1) && (update == 1)) derived_5 = 1; 5653 else unreachable("No pattern match at pos 5"); 5654 5655 return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); 5656} 5657 5658static inline unsigned 5659bi_pack_add_var_tex_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5660{ 5661 unsigned update = I->update; 5662 assert(update < 2); 5663 unsigned skip = I->skip; 5664 assert(skip < 2); 5665 unsigned lod_mode = I->lod_mode; 5666 assert(lod_mode < 2); 5667 static uint8_t sample_table[] = { 0, ~0, ~0, ~0, 1, ~0 }; 5668 assert(I->sample < 6); 5669 unsigned sample = sample_table[I->sample]; 5670 assert(sample < 2); 5671 unsigned varying_index = I->varying_index; 5672 assert(varying_index < 0x8); 5673 unsigned texture_index = I->texture_index; 5674 assert(texture_index < 0x4); 5675 unsigned derived_5 = 0; 5676 if ((sample == 0) && (update == 0)) derived_5 = 0; 5677 else if ((sample == 1) && (update == 1)) derived_5 = 1; 5678 else unreachable("No pattern match at pos 5"); 5679 5680 return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); 5681} 5682 5683static inline unsigned 5684bi_pack_add_vn_asst2_f32(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5685{ 5686 unsigned scale = I->scale; 5687 assert(scale < 2); 5688 unsigned neg0 = I->src[0].neg; 5689 assert(neg0 < 2); 5690 if (scale == 0) { 5691 return 0x3df80 | (src0 << 0) | (neg0 << 3); 5692 } else if (scale == 1) { 5693 return 0x3de80 | (src0 << 0) | (neg0 << 4); 5694 } else { 5695 unreachable("No matching state found in add_vn_asst2_f32"); 5696 } 5697} 5698 5699static inline unsigned 5700bi_pack_add_vn_asst2_v2f16(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5701{ 5702 unsigned neg0 = I->src[0].neg; 5703 assert(neg0 < 2); 5704 return 0x3dfa0 | (src0 << 0) | (neg0 << 3); 5705} 5706 5707static inline unsigned 5708bi_pack_add_wmask(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5709{ 5710 unsigned subgroup = I->subgroup; 5711 assert(subgroup < 4); 5712 unsigned fill = I->fill; 5713 assert(fill < 0x2); 5714 return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3); 5715} 5716 5717static inline unsigned 5718bi_pack_add_zs_emit(bi_instr *I, enum bifrost_packed_src src0, enum bifrost_packed_src src1, enum bifrost_packed_src src2, enum bifrost_packed_src src3) 5719{ 5720 unsigned stencil = I->stencil; 5721 assert(stencil < 2); 5722 unsigned z = I->z; 5723 assert(z < 2); 5724 unsigned derived_9 = 0; 5725 if ((stencil == 1) && (z == 0)) derived_9 = 1; 5726 else if ((stencil == 0) && (z == 1)) derived_9 = 2; 5727 else if ((stencil == 1) && (z == 1)) derived_9 = 3; 5728 else unreachable("No pattern match at pos 9"); 5729 5730 return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); 5731} 5732 5733unsigned 5734bi_pack_fma(bi_instr *I, 5735 enum bifrost_packed_src src0, 5736 enum bifrost_packed_src src1, 5737 enum bifrost_packed_src src2, 5738 enum bifrost_packed_src src3) 5739{ 5740 if (!I) 5741 return bi_pack_fma_nop(I, src0, src1, src2, src3); 5742 5743 assert((1 << src0) & 0xfb); 5744 assert((1 << src1) & 0xfb); 5745 5746 switch (I->op) { 5747 case BI_OPCODE_ARSHIFT_I32: 5748 return bi_pack_fma_arshift_i32(I, src0, src1, src2, src3); 5749 case BI_OPCODE_ARSHIFT_V2I16: 5750 return bi_pack_fma_arshift_v2i16(I, src0, src1, src2, src3); 5751 case BI_OPCODE_ARSHIFT_V4I8: 5752 return bi_pack_fma_arshift_v4i8(I, src0, src1, src2, src3); 5753 case BI_OPCODE_ARSHIFT_DOUBLE_I32: 5754 return bi_pack_fma_arshift_double_i32(I, src0, src1, src2, src3); 5755 case BI_OPCODE_ATOM_C_I32: 5756 return bi_pack_fma_atom_c_i32(I, src0, src1, src2, src3); 5757 case BI_OPCODE_ATOM_C_I64: 5758 return bi_pack_fma_atom_c_i64(I, src0, src1, src2, src3); 5759 case BI_OPCODE_ATOM_C1_I32: 5760 return bi_pack_fma_atom_c1_i32(I, src0, src1, src2, src3); 5761 case BI_OPCODE_ATOM_C1_I64: 5762 return bi_pack_fma_atom_c1_i64(I, src0, src1, src2, src3); 5763 case BI_OPCODE_ATOM_C1_RETURN_I32: 5764 return bi_pack_fma_atom_c1_return_i32(I, src0, src1, src2, src3); 5765 case BI_OPCODE_ATOM_C1_RETURN_I64: 5766 return bi_pack_fma_atom_c1_return_i64(I, src0, src1, src2, src3); 5767 case BI_OPCODE_ATOM_C_RETURN_I32: 5768 return bi_pack_fma_atom_c_return_i32(I, src0, src1, src2, src3); 5769 case BI_OPCODE_ATOM_C_RETURN_I64: 5770 return bi_pack_fma_atom_c_return_i64(I, src0, src1, src2, src3); 5771 case BI_OPCODE_ATOM_POST_I32: 5772 return bi_pack_fma_atom_post_i32(I, src0, src1, src2, src3); 5773 case BI_OPCODE_ATOM_POST_I64: 5774 return bi_pack_fma_atom_post_i64(I, src0, src1, src2, src3); 5775 case BI_OPCODE_ATOM_PRE_I64: 5776 return bi_pack_fma_atom_pre_i64(I, src0, src1, src2, src3); 5777 case BI_OPCODE_BITREV_I32: 5778 return bi_pack_fma_bitrev_i32(I, src0, src1, src2, src3); 5779 case BI_OPCODE_CLZ_U32: 5780 return bi_pack_fma_clz_u32(I, src0, src1, src2, src3); 5781 case BI_OPCODE_CLZ_V2U16: 5782 return bi_pack_fma_clz_v2u16(I, src0, src1, src2, src3); 5783 case BI_OPCODE_CLZ_V4U8: 5784 return bi_pack_fma_clz_v4u8(I, src0, src1, src2, src3); 5785 case BI_OPCODE_CSEL_F32: 5786 return bi_pack_fma_csel_f32(I, src0, src1, src2, src3); 5787 case BI_OPCODE_CSEL_I32: 5788 return bi_pack_fma_csel_i32(I, src0, src1, src2, src3); 5789 case BI_OPCODE_CSEL_S32: 5790 return bi_pack_fma_csel_s32(I, src0, src1, src2, src3); 5791 case BI_OPCODE_CSEL_U32: 5792 return bi_pack_fma_csel_u32(I, src0, src1, src2, src3); 5793 case BI_OPCODE_CSEL_V2F16: 5794 return bi_pack_fma_csel_v2f16(I, src0, src1, src2, src3); 5795 case BI_OPCODE_CSEL_V2I16: 5796 return bi_pack_fma_csel_v2i16(I, src0, src1, src2, src3); 5797 case BI_OPCODE_CSEL_V2S16: 5798 return bi_pack_fma_csel_v2s16(I, src0, src1, src2, src3); 5799 case BI_OPCODE_CSEL_V2U16: 5800 return bi_pack_fma_csel_v2u16(I, src0, src1, src2, src3); 5801 case BI_OPCODE_CUBEFACE1: 5802 return bi_pack_fma_cubeface1(I, src0, src1, src2, src3); 5803 case BI_OPCODE_DTSEL_IMM: 5804 return bi_pack_fma_dtsel_imm(I, src0, src1, src2, src3); 5805 case BI_OPCODE_F16_TO_F32: 5806 return bi_pack_fma_f16_to_f32(I, src0, src1, src2, src3); 5807 case BI_OPCODE_FADD_F32: 5808 return bi_pack_fma_fadd_f32(I, src0, src1, src2, src3); 5809 case BI_OPCODE_FADD_V2F16: 5810 return bi_pack_fma_fadd_v2f16(I, src0, src1, src2, src3); 5811 case BI_OPCODE_FADD_LSCALE_F32: 5812 return bi_pack_fma_fadd_lscale_f32(I, src0, src1, src2, src3); 5813 case BI_OPCODE_FCMP_F32: 5814 return bi_pack_fma_fcmp_f32(I, src0, src1, src2, src3); 5815 case BI_OPCODE_FCMP_V2F16: 5816 return bi_pack_fma_fcmp_v2f16(I, src0, src1, src2, src3); 5817 case BI_OPCODE_FLSHIFT_DOUBLE_I32: 5818 return bi_pack_fma_flshift_double_i32(I, src0, src1, src2, src3); 5819 case BI_OPCODE_FMA_F32: 5820 return bi_pack_fma_fma_f32(I, src0, src1, src2, src3); 5821 case BI_OPCODE_FMA_V2F16: 5822 return bi_pack_fma_fma_v2f16(I, src0, src1, src2, src3); 5823 case BI_OPCODE_FMA_RSCALE_F32: 5824 return bi_pack_fma_fma_rscale_f32(I, src0, src1, src2, src3); 5825 case BI_OPCODE_FMA_RSCALE_V2F16: 5826 return bi_pack_fma_fma_rscale_v2f16(I, src0, src1, src2, src3); 5827 case BI_OPCODE_FMUL_CSLICE: 5828 return bi_pack_fma_fmul_cslice(I, src0, src1, src2, src3); 5829 case BI_OPCODE_FMUL_SLICE_F32: 5830 return bi_pack_fma_fmul_slice_f32(I, src0, src1, src2, src3); 5831 case BI_OPCODE_FREXPE_F32: 5832 return bi_pack_fma_frexpe_f32(I, src0, src1, src2, src3); 5833 case BI_OPCODE_FREXPE_V2F16: 5834 return bi_pack_fma_frexpe_v2f16(I, src0, src1, src2, src3); 5835 case BI_OPCODE_FREXPM_F32: 5836 return bi_pack_fma_frexpm_f32(I, src0, src1, src2, src3); 5837 case BI_OPCODE_FREXPM_V2F16: 5838 return bi_pack_fma_frexpm_v2f16(I, src0, src1, src2, src3); 5839 case BI_OPCODE_FROUND_F32: 5840 return bi_pack_fma_fround_f32(I, src0, src1, src2, src3); 5841 case BI_OPCODE_FROUND_V2F16: 5842 return bi_pack_fma_fround_v2f16(I, src0, src1, src2, src3); 5843 case BI_OPCODE_FRSHIFT_DOUBLE_I32: 5844 return bi_pack_fma_frshift_double_i32(I, src0, src1, src2, src3); 5845 case BI_OPCODE_IADDC_I32: 5846 return bi_pack_fma_iaddc_i32(I, src0, src1, src2, src3); 5847 case BI_OPCODE_IDP_V4I8: 5848 return bi_pack_fma_idp_v4i8(I, src0, src1, src2, src3); 5849 case BI_OPCODE_IMUL_I32: 5850 return bi_pack_fma_imul_i32(I, src0, src1, src2, src3); 5851 case BI_OPCODE_IMUL_V2I16: 5852 return bi_pack_fma_imul_v2i16(I, src0, src1, src2, src3); 5853 case BI_OPCODE_IMUL_V4I8: 5854 return bi_pack_fma_imul_v4i8(I, src0, src1, src2, src3); 5855 case BI_OPCODE_IMULD: 5856 return bi_pack_fma_imuld(I, src0, src1, src2, src3); 5857 case BI_OPCODE_ISUBB_I32: 5858 return bi_pack_fma_isubb_i32(I, src0, src1, src2, src3); 5859 case BI_OPCODE_JUMP_EX: 5860 return bi_pack_fma_jump_ex(I, src0, src1, src2, src3); 5861 case BI_OPCODE_LROT_DOUBLE_I32: 5862 return bi_pack_fma_lrot_double_i32(I, src0, src1, src2, src3); 5863 case BI_OPCODE_LSHIFT_AND_I32: 5864 return bi_pack_fma_lshift_and_i32(I, src0, src1, src2, src3); 5865 case BI_OPCODE_LSHIFT_AND_V2I16: 5866 return bi_pack_fma_lshift_and_v2i16(I, src0, src1, src2, src3); 5867 case BI_OPCODE_LSHIFT_AND_V4I8: 5868 return bi_pack_fma_lshift_and_v4i8(I, src0, src1, src2, src3); 5869 case BI_OPCODE_LSHIFT_DOUBLE_I32: 5870 return bi_pack_fma_lshift_double_i32(I, src0, src1, src2, src3); 5871 case BI_OPCODE_LSHIFT_OR_I32: 5872 return bi_pack_fma_lshift_or_i32(I, src0, src1, src2, src3); 5873 case BI_OPCODE_LSHIFT_OR_V2I16: 5874 return bi_pack_fma_lshift_or_v2i16(I, src0, src1, src2, src3); 5875 case BI_OPCODE_LSHIFT_OR_V4I8: 5876 return bi_pack_fma_lshift_or_v4i8(I, src0, src1, src2, src3); 5877 case BI_OPCODE_LSHIFT_XOR_I32: 5878 return bi_pack_fma_lshift_xor_i32(I, src0, src1, src2, src3); 5879 case BI_OPCODE_LSHIFT_XOR_V2I16: 5880 return bi_pack_fma_lshift_xor_v2i16(I, src0, src1, src2, src3); 5881 case BI_OPCODE_LSHIFT_XOR_V4I8: 5882 return bi_pack_fma_lshift_xor_v4i8(I, src0, src1, src2, src3); 5883 case BI_OPCODE_MKVEC_V2I16: 5884 return bi_pack_fma_mkvec_v2i16(I, src0, src1, src2, src3); 5885 case BI_OPCODE_MKVEC_V4I8: 5886 return bi_pack_fma_mkvec_v4i8(I, src0, src1, src2, src3); 5887 case BI_OPCODE_MOV_I32: 5888 return bi_pack_fma_mov_i32(I, src0, src1, src2, src3); 5889 case BI_OPCODE_NOP: 5890 return bi_pack_fma_nop(I, src0, src1, src2, src3); 5891 case BI_OPCODE_POPCOUNT_I32: 5892 return bi_pack_fma_popcount_i32(I, src0, src1, src2, src3); 5893 case BI_OPCODE_QUIET_F32: 5894 return bi_pack_fma_quiet_f32(I, src0, src1, src2, src3); 5895 case BI_OPCODE_QUIET_V2F16: 5896 return bi_pack_fma_quiet_v2f16(I, src0, src1, src2, src3); 5897 case BI_OPCODE_RROT_DOUBLE_I32: 5898 return bi_pack_fma_rrot_double_i32(I, src0, src1, src2, src3); 5899 case BI_OPCODE_RSHIFT_AND_I32: 5900 return bi_pack_fma_rshift_and_i32(I, src0, src1, src2, src3); 5901 case BI_OPCODE_RSHIFT_AND_V2I16: 5902 return bi_pack_fma_rshift_and_v2i16(I, src0, src1, src2, src3); 5903 case BI_OPCODE_RSHIFT_AND_V4I8: 5904 return bi_pack_fma_rshift_and_v4i8(I, src0, src1, src2, src3); 5905 case BI_OPCODE_RSHIFT_DOUBLE_I32: 5906 return bi_pack_fma_rshift_double_i32(I, src0, src1, src2, src3); 5907 case BI_OPCODE_RSHIFT_OR_I32: 5908 return bi_pack_fma_rshift_or_i32(I, src0, src1, src2, src3); 5909 case BI_OPCODE_RSHIFT_OR_V2I16: 5910 return bi_pack_fma_rshift_or_v2i16(I, src0, src1, src2, src3); 5911 case BI_OPCODE_RSHIFT_OR_V4I8: 5912 return bi_pack_fma_rshift_or_v4i8(I, src0, src1, src2, src3); 5913 case BI_OPCODE_RSHIFT_XOR_I32: 5914 return bi_pack_fma_rshift_xor_i32(I, src0, src1, src2, src3); 5915 case BI_OPCODE_RSHIFT_XOR_V2I16: 5916 return bi_pack_fma_rshift_xor_v2i16(I, src0, src1, src2, src3); 5917 case BI_OPCODE_RSHIFT_XOR_V4I8: 5918 return bi_pack_fma_rshift_xor_v4i8(I, src0, src1, src2, src3); 5919 case BI_OPCODE_S16_TO_S32: 5920 return bi_pack_fma_s16_to_s32(I, src0, src1, src2, src3); 5921 case BI_OPCODE_S8_TO_S32: 5922 return bi_pack_fma_s8_to_s32(I, src0, src1, src2, src3); 5923 case BI_OPCODE_SEG_ADD: 5924 return bi_pack_fma_seg_add(I, src0, src1, src2, src3); 5925 case BI_OPCODE_SHADDXL_I64: 5926 return bi_pack_fma_shaddxl_i64(I, src0, src1, src2, src3); 5927 case BI_OPCODE_SHADDXL_S32: 5928 return bi_pack_fma_shaddxl_s32(I, src0, src1, src2, src3); 5929 case BI_OPCODE_SHADDXL_U32: 5930 return bi_pack_fma_shaddxl_u32(I, src0, src1, src2, src3); 5931 case BI_OPCODE_U16_TO_U32: 5932 return bi_pack_fma_u16_to_u32(I, src0, src1, src2, src3); 5933 case BI_OPCODE_U8_TO_U32: 5934 return bi_pack_fma_u8_to_u32(I, src0, src1, src2, src3); 5935 case BI_OPCODE_V2F32_TO_V2F16: 5936 return bi_pack_fma_v2f32_to_v2f16(I, src0, src1, src2, src3); 5937 case BI_OPCODE_VN_ASST1_F16: 5938 return bi_pack_fma_vn_asst1_f16(I, src0, src1, src2, src3); 5939 case BI_OPCODE_VN_ASST1_F32: 5940 return bi_pack_fma_vn_asst1_f32(I, src0, src1, src2, src3); 5941 default: 5942#ifndef NDEBUG 5943 bi_print_instr(I, stderr); 5944#endif 5945 unreachable("Cannot pack instruction as *"); 5946 } 5947} 5948 5949unsigned 5950bi_pack_add(bi_instr *I, 5951 enum bifrost_packed_src src0, 5952 enum bifrost_packed_src src1, 5953 enum bifrost_packed_src src2, 5954 enum bifrost_packed_src src3) 5955{ 5956 if (!I) 5957 return bi_pack_add_nop(I, src0, src1, src2, src3); 5958 5959 switch (I->op) { 5960 case BI_OPCODE_ACMPSTORE_I32: 5961 return bi_pack_add_acmpstore_i32(I, src0, src1, src2, src3); 5962 case BI_OPCODE_ACMPSTORE_I64: 5963 return bi_pack_add_acmpstore_i64(I, src0, src1, src2, src3); 5964 case BI_OPCODE_ACMPXCHG_I32: 5965 return bi_pack_add_acmpxchg_i32(I, src0, src1, src2, src3); 5966 case BI_OPCODE_ACMPXCHG_I64: 5967 return bi_pack_add_acmpxchg_i64(I, src0, src1, src2, src3); 5968 case BI_OPCODE_ATEST: 5969 return bi_pack_add_atest(I, src0, src1, src2, src3); 5970 case BI_OPCODE_ATOM_CX: 5971 return bi_pack_add_atom_cx(I, src0, src1, src2, src3); 5972 case BI_OPCODE_AXCHG_I32: 5973 return bi_pack_add_axchg_i32(I, src0, src1, src2, src3); 5974 case BI_OPCODE_AXCHG_I64: 5975 return bi_pack_add_axchg_i64(I, src0, src1, src2, src3); 5976 case BI_OPCODE_BARRIER: 5977 return bi_pack_add_barrier(I, src0, src1, src2, src3); 5978 case BI_OPCODE_BLEND: 5979 return bi_pack_add_blend(I, src0, src1, src2, src3); 5980 case BI_OPCODE_BRANCH_F16: 5981 return bi_pack_add_branch_f16(I, src0, src1, src2, src3); 5982 case BI_OPCODE_BRANCH_F32: 5983 return bi_pack_add_branch_f32(I, src0, src1, src2, src3); 5984 case BI_OPCODE_BRANCH_I16: 5985 return bi_pack_add_branch_i16(I, src0, src1, src2, src3); 5986 case BI_OPCODE_BRANCH_I32: 5987 return bi_pack_add_branch_i32(I, src0, src1, src2, src3); 5988 case BI_OPCODE_BRANCH_S16: 5989 return bi_pack_add_branch_s16(I, src0, src1, src2, src3); 5990 case BI_OPCODE_BRANCH_S32: 5991 return bi_pack_add_branch_s32(I, src0, src1, src2, src3); 5992 case BI_OPCODE_BRANCH_U16: 5993 return bi_pack_add_branch_u16(I, src0, src1, src2, src3); 5994 case BI_OPCODE_BRANCH_U32: 5995 return bi_pack_add_branch_u32(I, src0, src1, src2, src3); 5996 case BI_OPCODE_BRANCHC_I16: 5997 return bi_pack_add_branchc_i16(I, src0, src1, src2, src3); 5998 case BI_OPCODE_BRANCHC_I32: 5999 return bi_pack_add_branchc_i32(I, src0, src1, src2, src3); 6000 case BI_OPCODE_BRANCHZ_F16: 6001 return bi_pack_add_branchz_f16(I, src0, src1, src2, src3); 6002 case BI_OPCODE_BRANCHZ_F32: 6003 return bi_pack_add_branchz_f32(I, src0, src1, src2, src3); 6004 case BI_OPCODE_BRANCHZ_I16: 6005 return bi_pack_add_branchz_i16(I, src0, src1, src2, src3); 6006 case BI_OPCODE_BRANCHZ_I32: 6007 return bi_pack_add_branchz_i32(I, src0, src1, src2, src3); 6008 case BI_OPCODE_BRANCHZ_S16: 6009 return bi_pack_add_branchz_s16(I, src0, src1, src2, src3); 6010 case BI_OPCODE_BRANCHZ_S32: 6011 return bi_pack_add_branchz_s32(I, src0, src1, src2, src3); 6012 case BI_OPCODE_BRANCHZ_U16: 6013 return bi_pack_add_branchz_u16(I, src0, src1, src2, src3); 6014 case BI_OPCODE_BRANCHZ_U32: 6015 return bi_pack_add_branchz_u32(I, src0, src1, src2, src3); 6016 case BI_OPCODE_BRANCH_DIVERG: 6017 return bi_pack_add_branch_diverg(I, src0, src1, src2, src3); 6018 case BI_OPCODE_BRANCH_LOWBITS_F32: 6019 return bi_pack_add_branch_lowbits_f32(I, src0, src1, src2, src3); 6020 case BI_OPCODE_BRANCH_NO_DIVERG: 6021 return bi_pack_add_branch_no_diverg(I, src0, src1, src2, src3); 6022 case BI_OPCODE_CLPER_I32: 6023 return bi_pack_add_clper_i32(I, src0, src1, src2, src3); 6024 case BI_OPCODE_CLPER_V6_I32: 6025 return bi_pack_add_clper_v6_i32(I, src0, src1, src2, src3); 6026 case BI_OPCODE_CUBEFACE2: 6027 return bi_pack_add_cubeface2(I, src0, src1, src2, src3); 6028 case BI_OPCODE_CUBE_SSEL: 6029 return bi_pack_add_cube_ssel(I, src0, src1, src2, src3); 6030 case BI_OPCODE_CUBE_TSEL: 6031 return bi_pack_add_cube_tsel(I, src0, src1, src2, src3); 6032 case BI_OPCODE_DISCARD_F32: 6033 return bi_pack_add_discard_f32(I, src0, src1, src2, src3); 6034 case BI_OPCODE_F16_TO_F32: 6035 return bi_pack_add_f16_to_f32(I, src0, src1, src2, src3); 6036 case BI_OPCODE_F16_TO_S32: 6037 return bi_pack_add_f16_to_s32(I, src0, src1, src2, src3); 6038 case BI_OPCODE_F16_TO_U32: 6039 return bi_pack_add_f16_to_u32(I, src0, src1, src2, src3); 6040 case BI_OPCODE_F32_TO_S32: 6041 return bi_pack_add_f32_to_s32(I, src0, src1, src2, src3); 6042 case BI_OPCODE_F32_TO_U32: 6043 return bi_pack_add_f32_to_u32(I, src0, src1, src2, src3); 6044 case BI_OPCODE_FADD_F32: 6045 return bi_pack_add_fadd_f32(I, src0, src1, src2, src3); 6046 case BI_OPCODE_FADD_V2F16: 6047 return bi_pack_add_fadd_v2f16(I, src0, src1, src2, src3); 6048 case BI_OPCODE_FADD_RSCALE_F32: 6049 return bi_pack_add_fadd_rscale_f32(I, src0, src1, src2, src3); 6050 case BI_OPCODE_FCMP_F32: 6051 return bi_pack_add_fcmp_f32(I, src0, src1, src2, src3); 6052 case BI_OPCODE_FCMP_V2F16: 6053 return bi_pack_add_fcmp_v2f16(I, src0, src1, src2, src3); 6054 case BI_OPCODE_FCOS_TABLE_U6: 6055 return bi_pack_add_fcos_table_u6(I, src0, src1, src2, src3); 6056 case BI_OPCODE_FEXP_F32: 6057 return bi_pack_add_fexp_f32(I, src0, src1, src2, src3); 6058 case BI_OPCODE_FEXP_TABLE_U4: 6059 return bi_pack_add_fexp_table_u4(I, src0, src1, src2, src3); 6060 case BI_OPCODE_FLOGD_F32: 6061 return bi_pack_add_flogd_f32(I, src0, src1, src2, src3); 6062 case BI_OPCODE_FLOG_TABLE_F32: 6063 return bi_pack_add_flog_table_f32(I, src0, src1, src2, src3); 6064 case BI_OPCODE_FMAX_F32: 6065 return bi_pack_add_fmax_f32(I, src0, src1, src2, src3); 6066 case BI_OPCODE_FMAX_V2F16: 6067 return bi_pack_add_fmax_v2f16(I, src0, src1, src2, src3); 6068 case BI_OPCODE_FMIN_F32: 6069 return bi_pack_add_fmin_f32(I, src0, src1, src2, src3); 6070 case BI_OPCODE_FMIN_V2F16: 6071 return bi_pack_add_fmin_v2f16(I, src0, src1, src2, src3); 6072 case BI_OPCODE_FPCLASS_F16: 6073 return bi_pack_add_fpclass_f16(I, src0, src1, src2, src3); 6074 case BI_OPCODE_FPCLASS_F32: 6075 return bi_pack_add_fpclass_f32(I, src0, src1, src2, src3); 6076 case BI_OPCODE_FPOW_SC_APPLY: 6077 return bi_pack_add_fpow_sc_apply(I, src0, src1, src2, src3); 6078 case BI_OPCODE_FPOW_SC_DET_F16: 6079 return bi_pack_add_fpow_sc_det_f16(I, src0, src1, src2, src3); 6080 case BI_OPCODE_FPOW_SC_DET_F32: 6081 return bi_pack_add_fpow_sc_det_f32(I, src0, src1, src2, src3); 6082 case BI_OPCODE_FRCP_F16: 6083 return bi_pack_add_frcp_f16(I, src0, src1, src2, src3); 6084 case BI_OPCODE_FRCP_F32: 6085 return bi_pack_add_frcp_f32(I, src0, src1, src2, src3); 6086 case BI_OPCODE_FRCP_APPROX_F32: 6087 return bi_pack_add_frcp_approx_f32(I, src0, src1, src2, src3); 6088 case BI_OPCODE_FREXPE_F32: 6089 return bi_pack_add_frexpe_f32(I, src0, src1, src2, src3); 6090 case BI_OPCODE_FREXPE_V2F16: 6091 return bi_pack_add_frexpe_v2f16(I, src0, src1, src2, src3); 6092 case BI_OPCODE_FREXPM_F32: 6093 return bi_pack_add_frexpm_f32(I, src0, src1, src2, src3); 6094 case BI_OPCODE_FREXPM_V2F16: 6095 return bi_pack_add_frexpm_v2f16(I, src0, src1, src2, src3); 6096 case BI_OPCODE_FROUND_F32: 6097 return bi_pack_add_fround_f32(I, src0, src1, src2, src3); 6098 case BI_OPCODE_FROUND_V2F16: 6099 return bi_pack_add_fround_v2f16(I, src0, src1, src2, src3); 6100 case BI_OPCODE_FRSQ_F16: 6101 return bi_pack_add_frsq_f16(I, src0, src1, src2, src3); 6102 case BI_OPCODE_FRSQ_F32: 6103 return bi_pack_add_frsq_f32(I, src0, src1, src2, src3); 6104 case BI_OPCODE_FRSQ_APPROX_F32: 6105 return bi_pack_add_frsq_approx_f32(I, src0, src1, src2, src3); 6106 case BI_OPCODE_FSINCOS_OFFSET_U6: 6107 return bi_pack_add_fsincos_offset_u6(I, src0, src1, src2, src3); 6108 case BI_OPCODE_FSIN_TABLE_U6: 6109 return bi_pack_add_fsin_table_u6(I, src0, src1, src2, src3); 6110 case BI_OPCODE_HADD_S32: 6111 return bi_pack_add_hadd_s32(I, src0, src1, src2, src3); 6112 case BI_OPCODE_HADD_U32: 6113 return bi_pack_add_hadd_u32(I, src0, src1, src2, src3); 6114 case BI_OPCODE_HADD_V2S16: 6115 return bi_pack_add_hadd_v2s16(I, src0, src1, src2, src3); 6116 case BI_OPCODE_HADD_V2U16: 6117 return bi_pack_add_hadd_v2u16(I, src0, src1, src2, src3); 6118 case BI_OPCODE_HADD_V4S8: 6119 return bi_pack_add_hadd_v4s8(I, src0, src1, src2, src3); 6120 case BI_OPCODE_HADD_V4U8: 6121 return bi_pack_add_hadd_v4u8(I, src0, src1, src2, src3); 6122 case BI_OPCODE_IABS_S32: 6123 return bi_pack_add_iabs_s32(I, src0, src1, src2, src3); 6124 case BI_OPCODE_IABS_V2S16: 6125 return bi_pack_add_iabs_v2s16(I, src0, src1, src2, src3); 6126 case BI_OPCODE_IABS_V4S8: 6127 return bi_pack_add_iabs_v4s8(I, src0, src1, src2, src3); 6128 case BI_OPCODE_IADD_S32: 6129 return bi_pack_add_iadd_s32(I, src0, src1, src2, src3); 6130 case BI_OPCODE_IADD_U32: 6131 return bi_pack_add_iadd_u32(I, src0, src1, src2, src3); 6132 case BI_OPCODE_IADD_V2S16: 6133 return bi_pack_add_iadd_v2s16(I, src0, src1, src2, src3); 6134 case BI_OPCODE_IADD_V2U16: 6135 return bi_pack_add_iadd_v2u16(I, src0, src1, src2, src3); 6136 case BI_OPCODE_IADD_V4S8: 6137 return bi_pack_add_iadd_v4s8(I, src0, src1, src2, src3); 6138 case BI_OPCODE_IADD_V4U8: 6139 return bi_pack_add_iadd_v4u8(I, src0, src1, src2, src3); 6140 case BI_OPCODE_ICMP_I32: 6141 return bi_pack_add_icmp_i32(I, src0, src1, src2, src3); 6142 case BI_OPCODE_ICMP_S32: 6143 return bi_pack_add_icmp_s32(I, src0, src1, src2, src3); 6144 case BI_OPCODE_ICMP_U32: 6145 return bi_pack_add_icmp_u32(I, src0, src1, src2, src3); 6146 case BI_OPCODE_ICMP_V2I16: 6147 return bi_pack_add_icmp_v2i16(I, src0, src1, src2, src3); 6148 case BI_OPCODE_ICMP_V2S16: 6149 return bi_pack_add_icmp_v2s16(I, src0, src1, src2, src3); 6150 case BI_OPCODE_ICMP_V2U16: 6151 return bi_pack_add_icmp_v2u16(I, src0, src1, src2, src3); 6152 case BI_OPCODE_ICMP_V4I8: 6153 return bi_pack_add_icmp_v4i8(I, src0, src1, src2, src3); 6154 case BI_OPCODE_ICMP_V4S8: 6155 return bi_pack_add_icmp_v4s8(I, src0, src1, src2, src3); 6156 case BI_OPCODE_ICMP_V4U8: 6157 return bi_pack_add_icmp_v4u8(I, src0, src1, src2, src3); 6158 case BI_OPCODE_ICMPF_I32: 6159 return bi_pack_add_icmpf_i32(I, src0, src1, src2, src3); 6160 case BI_OPCODE_ICMPI_I32: 6161 return bi_pack_add_icmpi_i32(I, src0, src1, src2, src3); 6162 case BI_OPCODE_ICMPI_S32: 6163 return bi_pack_add_icmpi_s32(I, src0, src1, src2, src3); 6164 case BI_OPCODE_ICMPI_U32: 6165 return bi_pack_add_icmpi_u32(I, src0, src1, src2, src3); 6166 case BI_OPCODE_ICMPM_I32: 6167 return bi_pack_add_icmpm_i32(I, src0, src1, src2, src3); 6168 case BI_OPCODE_ILOGB_F32: 6169 return bi_pack_add_ilogb_f32(I, src0, src1, src2, src3); 6170 case BI_OPCODE_ILOGB_V2F16: 6171 return bi_pack_add_ilogb_v2f16(I, src0, src1, src2, src3); 6172 case BI_OPCODE_IMOV_FMA: 6173 return bi_pack_add_imov_fma(I, src0, src1, src2, src3); 6174 case BI_OPCODE_ISUB_S32: 6175 return bi_pack_add_isub_s32(I, src0, src1, src2, src3); 6176 case BI_OPCODE_ISUB_U32: 6177 return bi_pack_add_isub_u32(I, src0, src1, src2, src3); 6178 case BI_OPCODE_ISUB_V2S16: 6179 return bi_pack_add_isub_v2s16(I, src0, src1, src2, src3); 6180 case BI_OPCODE_ISUB_V2U16: 6181 return bi_pack_add_isub_v2u16(I, src0, src1, src2, src3); 6182 case BI_OPCODE_ISUB_V4S8: 6183 return bi_pack_add_isub_v4s8(I, src0, src1, src2, src3); 6184 case BI_OPCODE_ISUB_V4U8: 6185 return bi_pack_add_isub_v4u8(I, src0, src1, src2, src3); 6186 case BI_OPCODE_JUMP: 6187 return bi_pack_add_jump(I, src0, src1, src2, src3); 6188 case BI_OPCODE_LDEXP_F32: 6189 return bi_pack_add_ldexp_f32(I, src0, src1, src2, src3); 6190 case BI_OPCODE_LDEXP_V2F16: 6191 return bi_pack_add_ldexp_v2f16(I, src0, src1, src2, src3); 6192 case BI_OPCODE_LD_ATTR: 6193 return bi_pack_add_ld_attr(I, src0, src1, src2, src3); 6194 case BI_OPCODE_LD_ATTR_IMM: 6195 return bi_pack_add_ld_attr_imm(I, src0, src1, src2, src3); 6196 case BI_OPCODE_LD_ATTR_TEX: 6197 return bi_pack_add_ld_attr_tex(I, src0, src1, src2, src3); 6198 case BI_OPCODE_LD_CVT: 6199 return bi_pack_add_ld_cvt(I, src0, src1, src2, src3); 6200 case BI_OPCODE_LD_GCLK_U64: 6201 return bi_pack_add_ld_gclk_u64(I, src0, src1, src2, src3); 6202 case BI_OPCODE_LD_TILE: 6203 return bi_pack_add_ld_tile(I, src0, src1, src2, src3); 6204 case BI_OPCODE_LD_VAR: 6205 return bi_pack_add_ld_var(I, src0, src1, src2, src3); 6206 case BI_OPCODE_LD_VAR_FLAT: 6207 return bi_pack_add_ld_var_flat(I, src0, src1, src2, src3); 6208 case BI_OPCODE_LD_VAR_FLAT_IMM: 6209 return bi_pack_add_ld_var_flat_imm(I, src0, src1, src2, src3); 6210 case BI_OPCODE_LD_VAR_IMM: 6211 return bi_pack_add_ld_var_imm(I, src0, src1, src2, src3); 6212 case BI_OPCODE_LD_VAR_SPECIAL: 6213 return bi_pack_add_ld_var_special(I, src0, src1, src2, src3); 6214 case BI_OPCODE_LEA_ATTR: 6215 return bi_pack_add_lea_attr(I, src0, src1, src2, src3); 6216 case BI_OPCODE_LEA_ATTR_IMM: 6217 return bi_pack_add_lea_attr_imm(I, src0, src1, src2, src3); 6218 case BI_OPCODE_LEA_ATTR_TEX: 6219 return bi_pack_add_lea_attr_tex(I, src0, src1, src2, src3); 6220 case BI_OPCODE_LEA_TEX: 6221 return bi_pack_add_lea_tex(I, src0, src1, src2, src3); 6222 case BI_OPCODE_LEA_TEX_IMM: 6223 return bi_pack_add_lea_tex_imm(I, src0, src1, src2, src3); 6224 case BI_OPCODE_LOAD_I128: 6225 return bi_pack_add_load_i128(I, src0, src1, src2, src3); 6226 case BI_OPCODE_LOAD_I16: 6227 return bi_pack_add_load_i16(I, src0, src1, src2, src3); 6228 case BI_OPCODE_LOAD_I24: 6229 return bi_pack_add_load_i24(I, src0, src1, src2, src3); 6230 case BI_OPCODE_LOAD_I32: 6231 return bi_pack_add_load_i32(I, src0, src1, src2, src3); 6232 case BI_OPCODE_LOAD_I48: 6233 return bi_pack_add_load_i48(I, src0, src1, src2, src3); 6234 case BI_OPCODE_LOAD_I64: 6235 return bi_pack_add_load_i64(I, src0, src1, src2, src3); 6236 case BI_OPCODE_LOAD_I8: 6237 return bi_pack_add_load_i8(I, src0, src1, src2, src3); 6238 case BI_OPCODE_LOAD_I96: 6239 return bi_pack_add_load_i96(I, src0, src1, src2, src3); 6240 case BI_OPCODE_LOGB_F32: 6241 return bi_pack_add_logb_f32(I, src0, src1, src2, src3); 6242 case BI_OPCODE_LOGB_V2F16: 6243 return bi_pack_add_logb_v2f16(I, src0, src1, src2, src3); 6244 case BI_OPCODE_MKVEC_V2I16: 6245 return bi_pack_add_mkvec_v2i16(I, src0, src1, src2, src3); 6246 case BI_OPCODE_MOV_I32: 6247 return bi_pack_add_mov_i32(I, src0, src1, src2, src3); 6248 case BI_OPCODE_MUX_I32: 6249 return bi_pack_add_mux_i32(I, src0, src1, src2, src3); 6250 case BI_OPCODE_MUX_V2I16: 6251 return bi_pack_add_mux_v2i16(I, src0, src1, src2, src3); 6252 case BI_OPCODE_MUX_V4I8: 6253 return bi_pack_add_mux_v4i8(I, src0, src1, src2, src3); 6254 case BI_OPCODE_NOP: 6255 return bi_pack_add_nop(I, src0, src1, src2, src3); 6256 case BI_OPCODE_QUIET_F32: 6257 return bi_pack_add_quiet_f32(I, src0, src1, src2, src3); 6258 case BI_OPCODE_QUIET_V2F16: 6259 return bi_pack_add_quiet_v2f16(I, src0, src1, src2, src3); 6260 case BI_OPCODE_S16_TO_F32: 6261 return bi_pack_add_s16_to_f32(I, src0, src1, src2, src3); 6262 case BI_OPCODE_S16_TO_S32: 6263 return bi_pack_add_s16_to_s32(I, src0, src1, src2, src3); 6264 case BI_OPCODE_S32_TO_F32: 6265 return bi_pack_add_s32_to_f32(I, src0, src1, src2, src3); 6266 case BI_OPCODE_S8_TO_F32: 6267 return bi_pack_add_s8_to_f32(I, src0, src1, src2, src3); 6268 case BI_OPCODE_S8_TO_S32: 6269 return bi_pack_add_s8_to_s32(I, src0, src1, src2, src3); 6270 case BI_OPCODE_SEG_ADD: 6271 return bi_pack_add_seg_add(I, src0, src1, src2, src3); 6272 case BI_OPCODE_SHADDXH_I32: 6273 return bi_pack_add_shaddxh_i32(I, src0, src1, src2, src3); 6274 case BI_OPCODE_SHIFT_DOUBLE_I32: 6275 return bi_pack_add_shift_double_i32(I, src0, src1, src2, src3); 6276 case BI_OPCODE_STORE_I128: 6277 return bi_pack_add_store_i128(I, src0, src1, src2, src3); 6278 case BI_OPCODE_STORE_I16: 6279 return bi_pack_add_store_i16(I, src0, src1, src2, src3); 6280 case BI_OPCODE_STORE_I24: 6281 return bi_pack_add_store_i24(I, src0, src1, src2, src3); 6282 case BI_OPCODE_STORE_I32: 6283 return bi_pack_add_store_i32(I, src0, src1, src2, src3); 6284 case BI_OPCODE_STORE_I48: 6285 return bi_pack_add_store_i48(I, src0, src1, src2, src3); 6286 case BI_OPCODE_STORE_I64: 6287 return bi_pack_add_store_i64(I, src0, src1, src2, src3); 6288 case BI_OPCODE_STORE_I8: 6289 return bi_pack_add_store_i8(I, src0, src1, src2, src3); 6290 case BI_OPCODE_STORE_I96: 6291 return bi_pack_add_store_i96(I, src0, src1, src2, src3); 6292 case BI_OPCODE_ST_CVT: 6293 return bi_pack_add_st_cvt(I, src0, src1, src2, src3); 6294 case BI_OPCODE_ST_TILE: 6295 return bi_pack_add_st_tile(I, src0, src1, src2, src3); 6296 case BI_OPCODE_SWZ_V2I16: 6297 return bi_pack_add_swz_v2i16(I, src0, src1, src2, src3); 6298 case BI_OPCODE_SWZ_V4I8: 6299 return bi_pack_add_swz_v4i8(I, src0, src1, src2, src3); 6300 case BI_OPCODE_TEXC: 6301 return bi_pack_add_texc(I, src0, src1, src2, src3); 6302 case BI_OPCODE_TEXS_2D_F16: 6303 return bi_pack_add_texs_2d_f16(I, src0, src1, src2, src3); 6304 case BI_OPCODE_TEXS_2D_F32: 6305 return bi_pack_add_texs_2d_f32(I, src0, src1, src2, src3); 6306 case BI_OPCODE_TEXS_CUBE_F16: 6307 return bi_pack_add_texs_cube_f16(I, src0, src1, src2, src3); 6308 case BI_OPCODE_TEXS_CUBE_F32: 6309 return bi_pack_add_texs_cube_f32(I, src0, src1, src2, src3); 6310 case BI_OPCODE_U16_TO_F32: 6311 return bi_pack_add_u16_to_f32(I, src0, src1, src2, src3); 6312 case BI_OPCODE_U16_TO_U32: 6313 return bi_pack_add_u16_to_u32(I, src0, src1, src2, src3); 6314 case BI_OPCODE_U32_TO_F32: 6315 return bi_pack_add_u32_to_f32(I, src0, src1, src2, src3); 6316 case BI_OPCODE_U8_TO_F32: 6317 return bi_pack_add_u8_to_f32(I, src0, src1, src2, src3); 6318 case BI_OPCODE_U8_TO_U32: 6319 return bi_pack_add_u8_to_u32(I, src0, src1, src2, src3); 6320 case BI_OPCODE_V2F16_TO_V2S16: 6321 return bi_pack_add_v2f16_to_v2s16(I, src0, src1, src2, src3); 6322 case BI_OPCODE_V2F16_TO_V2U16: 6323 return bi_pack_add_v2f16_to_v2u16(I, src0, src1, src2, src3); 6324 case BI_OPCODE_V2F32_TO_V2F16: 6325 return bi_pack_add_v2f32_to_v2f16(I, src0, src1, src2, src3); 6326 case BI_OPCODE_V2S16_TO_V2F16: 6327 return bi_pack_add_v2s16_to_v2f16(I, src0, src1, src2, src3); 6328 case BI_OPCODE_V2S8_TO_V2F16: 6329 return bi_pack_add_v2s8_to_v2f16(I, src0, src1, src2, src3); 6330 case BI_OPCODE_V2S8_TO_V2S16: 6331 return bi_pack_add_v2s8_to_v2s16(I, src0, src1, src2, src3); 6332 case BI_OPCODE_V2U16_TO_V2F16: 6333 return bi_pack_add_v2u16_to_v2f16(I, src0, src1, src2, src3); 6334 case BI_OPCODE_V2U8_TO_V2F16: 6335 return bi_pack_add_v2u8_to_v2f16(I, src0, src1, src2, src3); 6336 case BI_OPCODE_V2U8_TO_V2U16: 6337 return bi_pack_add_v2u8_to_v2u16(I, src0, src1, src2, src3); 6338 case BI_OPCODE_VAR_TEX_F16: 6339 return bi_pack_add_var_tex_f16(I, src0, src1, src2, src3); 6340 case BI_OPCODE_VAR_TEX_F32: 6341 return bi_pack_add_var_tex_f32(I, src0, src1, src2, src3); 6342 case BI_OPCODE_VN_ASST2_F32: 6343 return bi_pack_add_vn_asst2_f32(I, src0, src1, src2, src3); 6344 case BI_OPCODE_VN_ASST2_V2F16: 6345 return bi_pack_add_vn_asst2_v2f16(I, src0, src1, src2, src3); 6346 case BI_OPCODE_WMASK: 6347 return bi_pack_add_wmask(I, src0, src1, src2, src3); 6348 case BI_OPCODE_ZS_EMIT: 6349 return bi_pack_add_zs_emit(I, src0, src1, src2, src3); 6350 default: 6351#ifndef NDEBUG 6352 bi_print_instr(I, stderr); 6353#endif 6354 unreachable("Cannot pack instruction as +"); 6355 } 6356} 6357 6358