1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for swizzling/shuffling. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35#include <inttypes.h> /* for PRIx64 macro */ 36#include "util/compiler.h" 37#include "util/u_debug.h" 38 39#include "lp_bld_type.h" 40#include "lp_bld_const.h" 41#include "lp_bld_init.h" 42#include "lp_bld_logic.h" 43#include "lp_bld_swizzle.h" 44#include "lp_bld_pack.h" 45 46 47LLVMValueRef 48lp_build_broadcast(struct gallivm_state *gallivm, 49 LLVMTypeRef vec_type, 50 LLVMValueRef scalar) 51{ 52 LLVMValueRef res; 53 54 if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) { 55 /* scalar */ 56 assert(vec_type == LLVMTypeOf(scalar)); 57 res = scalar; 58 } else { 59 LLVMBuilderRef builder = gallivm->builder; 60 const unsigned length = LLVMGetVectorSize(vec_type); 61 LLVMValueRef undef = LLVMGetUndef(vec_type); 62 /* The shuffle vector is always made of int32 elements */ 63 LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context); 64 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); 65 66 assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar)); 67 68 res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), ""); 69 res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), ""); 70 } 71 72 return res; 73} 74 75 76/** 77 * Broadcast 78 */ 79LLVMValueRef 80lp_build_broadcast_scalar(struct lp_build_context *bld, 81 LLVMValueRef scalar) 82{ 83 assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar))); 84 85 return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar); 86} 87 88 89/** 90 * Combined extract and broadcast (mere shuffle in most cases) 91 */ 92LLVMValueRef 93lp_build_extract_broadcast(struct gallivm_state *gallivm, 94 struct lp_type src_type, 95 struct lp_type dst_type, 96 LLVMValueRef vector, 97 LLVMValueRef index) 98{ 99 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 100 LLVMValueRef res; 101 102 assert(src_type.floating == dst_type.floating); 103 assert(src_type.width == dst_type.width); 104 105 assert(lp_check_value(src_type, vector)); 106 assert(LLVMTypeOf(index) == i32t); 107 108 if (src_type.length == 1) { 109 if (dst_type.length == 1) { 110 /* 111 * Trivial scalar -> scalar. 112 */ 113 114 res = vector; 115 } 116 else { 117 /* 118 * Broadcast scalar -> vector. 119 */ 120 121 res = lp_build_broadcast(gallivm, 122 lp_build_vec_type(gallivm, dst_type), 123 vector); 124 } 125 } 126 else { 127 if (dst_type.length > 1) { 128 /* 129 * shuffle - result can be of different length. 130 */ 131 132 LLVMValueRef shuffle; 133 shuffle = lp_build_broadcast(gallivm, 134 LLVMVectorType(i32t, dst_type.length), 135 index); 136 res = LLVMBuildShuffleVector(gallivm->builder, vector, 137 LLVMGetUndef(lp_build_vec_type(gallivm, src_type)), 138 shuffle, ""); 139 } 140 else { 141 /* 142 * Trivial extract scalar from vector. 143 */ 144 res = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); 145 } 146 } 147 148 return res; 149} 150 151 152/** 153 * Swizzle one channel into other channels. 154 */ 155LLVMValueRef 156lp_build_swizzle_scalar_aos(struct lp_build_context *bld, 157 LLVMValueRef a, 158 unsigned channel, 159 unsigned num_channels) 160{ 161 LLVMBuilderRef builder = bld->gallivm->builder; 162 const struct lp_type type = bld->type; 163 const unsigned n = type.length; 164 unsigned i, j; 165 166 if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) 167 return a; 168 169 assert(num_channels == 2 || num_channels == 4); 170 171 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing 172 * using shuffles here actually causes worst results. More investigation is 173 * needed. */ 174 if (LLVMIsConstant(a) || 175 type.width >= 16) { 176 /* 177 * Shuffle. 178 */ 179 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 180 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 181 182 for(j = 0; j < n; j += num_channels) 183 for(i = 0; i < num_channels; ++i) 184 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); 185 186 return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); 187 } 188 else if (num_channels == 2) { 189 /* 190 * Bit mask and shifts 191 * 192 * XY XY .... XY <= input 193 * 0Y 0Y .... 0Y 194 * YY YY .... YY 195 * YY YY .... YY <= output 196 */ 197 struct lp_type type2; 198 LLVMValueRef tmp = NULL; 199 int shift; 200 201 a = LLVMBuildAnd(builder, a, 202 lp_build_const_mask_aos(bld->gallivm, 203 type, 1 << channel, num_channels), ""); 204 205 type2 = type; 206 type2.floating = FALSE; 207 type2.width *= 2; 208 type2.length /= 2; 209 210 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); 211 212 /* 213 * Vector element 0 is always channel X. 214 * 215 * 76 54 32 10 (array numbering) 216 * Little endian reg in: YX YX YX YX 217 * Little endian reg out: YY YY YY YY if shift right (shift == -1) 218 * XX XX XX XX if shift left (shift == 1) 219 * 220 * 01 23 45 67 (array numbering) 221 * Big endian reg in: XY XY XY XY 222 * Big endian reg out: YY YY YY YY if shift left (shift == 1) 223 * XX XX XX XX if shift right (shift == -1) 224 * 225 */ 226#if UTIL_ARCH_LITTLE_ENDIAN 227 shift = channel == 0 ? 1 : -1; 228#else 229 shift = channel == 0 ? -1 : 1; 230#endif 231 232 if (shift > 0) { 233 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); 234 } else if (shift < 0) { 235 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); 236 } 237 238 assert(tmp); 239 if (tmp) { 240 a = LLVMBuildOr(builder, a, tmp, ""); 241 } 242 243 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 244 } 245 else { 246 /* 247 * Bit mask and recursive shifts 248 * 249 * Little-endian registers: 250 * 251 * 7654 3210 252 * WZYX WZYX .... WZYX <= input 253 * 00Y0 00Y0 .... 00Y0 <= mask 254 * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) 255 * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) 256 * 257 * Big-endian registers: 258 * 259 * 0123 4567 260 * XYZW XYZW .... XYZW <= input 261 * 0Y00 0Y00 .... 0Y00 <= mask 262 * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) 263 * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) 264 * 265 * shifts[] gives little-endian shift amounts; we need to negate for big-endian. 266 */ 267 struct lp_type type4; 268 const int shifts[4][2] = { 269 { 1, 2}, 270 {-1, 2}, 271 { 1, -2}, 272 {-1, -2} 273 }; 274 unsigned i; 275 276 a = LLVMBuildAnd(builder, a, 277 lp_build_const_mask_aos(bld->gallivm, 278 type, 1 << channel, 4), ""); 279 280 /* 281 * Build a type where each element is an integer that cover the four 282 * channels. 283 */ 284 285 type4 = type; 286 type4.floating = FALSE; 287 type4.width *= 4; 288 type4.length /= 4; 289 290 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 291 292 for(i = 0; i < 2; ++i) { 293 LLVMValueRef tmp = NULL; 294 int shift = shifts[channel][i]; 295 296 /* See endianness diagram above */ 297#if UTIL_ARCH_BIG_ENDIAN 298 shift = -shift; 299#endif 300 301 if(shift > 0) 302 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 303 if(shift < 0) 304 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 305 306 assert(tmp); 307 if(tmp) 308 a = LLVMBuildOr(builder, a, tmp, ""); 309 } 310 311 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 312 } 313} 314 315 316/** 317 * Swizzle a vector consisting of an array of XYZW structs. 318 * 319 * This fills a vector of dst_len length with the swizzled channels from src. 320 * 321 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in 322 * RGBA RGBA = BGR BGR BG 323 * 324 * @param swizzles the swizzle array 325 * @param num_swizzles the number of elements in swizzles 326 * @param dst_len the length of the result 327 */ 328LLVMValueRef 329lp_build_swizzle_aos_n(struct gallivm_state* gallivm, 330 LLVMValueRef src, 331 const unsigned char* swizzles, 332 unsigned num_swizzles, 333 unsigned dst_len) 334{ 335 LLVMBuilderRef builder = gallivm->builder; 336 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; 337 unsigned i; 338 339 assert(dst_len < LP_MAX_VECTOR_WIDTH); 340 341 for (i = 0; i < dst_len; ++i) { 342 int swizzle = swizzles[i % num_swizzles]; 343 344 if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { 345 shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 346 } else { 347 shuffles[i] = lp_build_const_int32(gallivm, swizzle); 348 } 349 } 350 351 return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), ""); 352} 353 354 355LLVMValueRef 356lp_build_swizzle_aos(struct lp_build_context *bld, 357 LLVMValueRef a, 358 const unsigned char swizzles[4]) 359{ 360 LLVMBuilderRef builder = bld->gallivm->builder; 361 const struct lp_type type = bld->type; 362 const unsigned n = type.length; 363 unsigned i, j; 364 365 if (swizzles[0] == PIPE_SWIZZLE_X && 366 swizzles[1] == PIPE_SWIZZLE_Y && 367 swizzles[2] == PIPE_SWIZZLE_Z && 368 swizzles[3] == PIPE_SWIZZLE_W) { 369 return a; 370 } 371 372 if (swizzles[0] == swizzles[1] && 373 swizzles[1] == swizzles[2] && 374 swizzles[2] == swizzles[3]) { 375 switch (swizzles[0]) { 376 case PIPE_SWIZZLE_X: 377 case PIPE_SWIZZLE_Y: 378 case PIPE_SWIZZLE_Z: 379 case PIPE_SWIZZLE_W: 380 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); 381 case PIPE_SWIZZLE_0: 382 return bld->zero; 383 case PIPE_SWIZZLE_1: 384 return bld->one; 385 case LP_BLD_SWIZZLE_DONTCARE: 386 return bld->undef; 387 default: 388 assert(0); 389 return bld->undef; 390 } 391 } 392 393 if (LLVMIsConstant(a) || 394 type.width >= 16) { 395 /* 396 * Shuffle. 397 */ 398 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); 399 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 400 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 401 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; 402 403 memset(aux, 0, sizeof aux); 404 405 for(j = 0; j < n; j += 4) { 406 for(i = 0; i < 4; ++i) { 407 unsigned shuffle; 408 switch (swizzles[i]) { 409 default: 410 assert(0); 411#if defined(NDEBUG) || defined(DEBUG) 412 FALLTHROUGH; 413#endif 414 case PIPE_SWIZZLE_X: 415 case PIPE_SWIZZLE_Y: 416 case PIPE_SWIZZLE_Z: 417 case PIPE_SWIZZLE_W: 418 shuffle = j + swizzles[i]; 419 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 420 break; 421 case PIPE_SWIZZLE_0: 422 shuffle = type.length + 0; 423 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 424 if (!aux[0]) { 425 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); 426 } 427 break; 428 case PIPE_SWIZZLE_1: 429 shuffle = type.length + 1; 430 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 431 if (!aux[1]) { 432 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); 433 } 434 break; 435 case LP_BLD_SWIZZLE_DONTCARE: 436 shuffles[j + i] = LLVMGetUndef(i32t); 437 break; 438 } 439 } 440 } 441 442 for (i = 0; i < n; ++i) { 443 if (!aux[i]) { 444 aux[i] = undef; 445 } 446 } 447 448 return LLVMBuildShuffleVector(builder, a, 449 LLVMConstVector(aux, n), 450 LLVMConstVector(shuffles, n), ""); 451 } else { 452 /* 453 * Bit mask and shifts. 454 * 455 * For example, this will convert BGRA to RGBA by doing 456 * 457 * Little endian: 458 * rgba = (bgra & 0x00ff0000) >> 16 459 * | (bgra & 0xff00ff00) 460 * | (bgra & 0x000000ff) << 16 461 * 462 * Big endian:A 463 * rgba = (bgra & 0x0000ff00) << 16 464 * | (bgra & 0x00ff00ff) 465 * | (bgra & 0xff000000) >> 16 466 * 467 * This is necessary not only for faster cause, but because X86 backend 468 * will refuse shuffles of <4 x i8> vectors 469 */ 470 LLVMValueRef res; 471 struct lp_type type4; 472 unsigned cond = 0; 473 int chan; 474 int shift; 475 476 /* 477 * Start with a mixture of 1 and 0. 478 */ 479 for (chan = 0; chan < 4; ++chan) { 480 if (swizzles[chan] == PIPE_SWIZZLE_1) { 481 cond |= 1 << chan; 482 } 483 } 484 res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); 485 486 /* 487 * Build a type where each element is an integer that cover the four 488 * channels. 489 */ 490 type4 = type; 491 type4.floating = FALSE; 492 type4.width *= 4; 493 type4.length /= 4; 494 495 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 496 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); 497 498 /* 499 * Mask and shift the channels, trying to group as many channels in the 500 * same shift as possible. The shift amount is positive for shifts left 501 * and negative for shifts right. 502 */ 503 for (shift = -3; shift <= 3; ++shift) { 504 uint64_t mask = 0; 505 506 assert(type4.width <= sizeof(mask)*8); 507 508 /* 509 * Vector element numbers follow the XYZW order, so 0 is always X, etc. 510 * After widening 4 times we have: 511 * 512 * 3210 513 * Little-endian register layout: WZYX 514 * 515 * 0123 516 * Big-endian register layout: XYZW 517 * 518 * For little-endian, higher-numbered channels are obtained by a shift right 519 * (negative shift amount) and lower-numbered channels by a shift left 520 * (positive shift amount). The opposite is true for big-endian. 521 */ 522 for (chan = 0; chan < 4; ++chan) { 523 if (swizzles[chan] < 4) { 524 /* We need to move channel swizzles[chan] into channel chan */ 525#if UTIL_ARCH_LITTLE_ENDIAN 526 if (swizzles[chan] - chan == -shift) { 527 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); 528 } 529#else 530 if (swizzles[chan] - chan == shift) { 531 mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width); 532 } 533#endif 534 } 535 } 536 537 if (mask) { 538 LLVMValueRef masked; 539 LLVMValueRef shifted; 540 if (0) 541 debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask); 542 543 masked = LLVMBuildAnd(builder, a, 544 lp_build_const_int_vec(bld->gallivm, type4, mask), ""); 545 if (shift > 0) { 546 shifted = LLVMBuildShl(builder, masked, 547 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 548 } else if (shift < 0) { 549 shifted = LLVMBuildLShr(builder, masked, 550 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 551 } else { 552 shifted = masked; 553 } 554 555 res = LLVMBuildOr(builder, res, shifted, ""); 556 } 557 } 558 559 return LLVMBuildBitCast(builder, res, 560 lp_build_vec_type(bld->gallivm, type), ""); 561 } 562} 563 564 565/** 566 * Extended swizzle of a single channel of a SoA vector. 567 * 568 * @param bld building context 569 * @param unswizzled array with the 4 unswizzled values 570 * @param swizzle one of the PIPE_SWIZZLE_* 571 * 572 * @return the swizzled value. 573 */ 574LLVMValueRef 575lp_build_swizzle_soa_channel(struct lp_build_context *bld, 576 const LLVMValueRef *unswizzled, 577 unsigned swizzle) 578{ 579 switch (swizzle) { 580 case PIPE_SWIZZLE_X: 581 case PIPE_SWIZZLE_Y: 582 case PIPE_SWIZZLE_Z: 583 case PIPE_SWIZZLE_W: 584 return unswizzled[swizzle]; 585 case PIPE_SWIZZLE_0: 586 return bld->zero; 587 case PIPE_SWIZZLE_1: 588 return bld->one; 589 default: 590 assert(0); 591 return bld->undef; 592 } 593} 594 595 596/** 597 * Extended swizzle of a SoA vector. 598 * 599 * @param bld building context 600 * @param unswizzled array with the 4 unswizzled values 601 * @param swizzles array of PIPE_SWIZZLE_* 602 * @param swizzled output swizzled values 603 */ 604void 605lp_build_swizzle_soa(struct lp_build_context *bld, 606 const LLVMValueRef *unswizzled, 607 const unsigned char swizzles[4], 608 LLVMValueRef *swizzled) 609{ 610 unsigned chan; 611 612 for (chan = 0; chan < 4; ++chan) { 613 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, 614 swizzles[chan]); 615 } 616} 617 618 619/** 620 * Do an extended swizzle of a SoA vector inplace. 621 * 622 * @param bld building context 623 * @param values intput/output array with the 4 values 624 * @param swizzles array of PIPE_SWIZZLE_* 625 */ 626void 627lp_build_swizzle_soa_inplace(struct lp_build_context *bld, 628 LLVMValueRef *values, 629 const unsigned char swizzles[4]) 630{ 631 LLVMValueRef unswizzled[4]; 632 unsigned chan; 633 634 for (chan = 0; chan < 4; ++chan) { 635 unswizzled[chan] = values[chan]; 636 } 637 638 lp_build_swizzle_soa(bld, unswizzled, swizzles, values); 639} 640 641 642/** 643 * Transpose from AOS <-> SOA 644 * 645 * @param single_type_lp type of pixels 646 * @param src the 4 * n pixel input 647 * @param dst the 4 * n pixel output 648 */ 649void 650lp_build_transpose_aos(struct gallivm_state *gallivm, 651 struct lp_type single_type_lp, 652 const LLVMValueRef src[4], 653 LLVMValueRef dst[4]) 654{ 655 struct lp_type double_type_lp = single_type_lp; 656 LLVMTypeRef single_type; 657 LLVMTypeRef double_type; 658 LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL; 659 660 double_type_lp.length >>= 1; 661 double_type_lp.width <<= 1; 662 663 double_type = lp_build_vec_type(gallivm, double_type_lp); 664 single_type = lp_build_vec_type(gallivm, single_type_lp); 665 666 LLVMValueRef double_type_zero = LLVMConstNull(double_type); 667 /* Interleave x, y, z, w -> xy and zw */ 668 if (src[0] || src[1]) { 669 LLVMValueRef src0 = src[0]; 670 LLVMValueRef src1 = src[1]; 671 if (!src0) 672 src0 = LLVMConstNull(single_type); 673 if (!src1) 674 src1 = LLVMConstNull(single_type); 675 t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0); 676 t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1); 677 678 /* Cast to double width type for second interleave */ 679 t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0"); 680 t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2"); 681 } 682 if (src[2] || src[3]) { 683 LLVMValueRef src2 = src[2]; 684 LLVMValueRef src3 = src[3]; 685 if (!src2) 686 src2 = LLVMConstNull(single_type); 687 if (!src3) 688 src3 = LLVMConstNull(single_type); 689 t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0); 690 t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1); 691 692 /* Cast to double width type for second interleave */ 693 t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1"); 694 t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3"); 695 } 696 697 if (!t0) 698 t0 = double_type_zero; 699 if (!t1) 700 t1 = double_type_zero; 701 if (!t2) 702 t2 = double_type_zero; 703 if (!t3) 704 t3 = double_type_zero; 705 706 /* Interleave xy, zw -> xyzw */ 707 dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0); 708 dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1); 709 dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0); 710 dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1); 711 712 /* Cast back to original single width type */ 713 dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0"); 714 dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1"); 715 dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2"); 716 dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3"); 717} 718 719 720/** 721 * Transpose from AOS <-> SOA for num_srcs 722 */ 723void 724lp_build_transpose_aos_n(struct gallivm_state *gallivm, 725 struct lp_type type, 726 const LLVMValueRef* src, 727 unsigned num_srcs, 728 LLVMValueRef* dst) 729{ 730 switch (num_srcs) { 731 case 1: 732 dst[0] = src[0]; 733 break; 734 735 case 2: 736 { 737 /* Note: we must use a temporary incase src == dst */ 738 LLVMValueRef lo, hi; 739 740 lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0); 741 hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1); 742 743 dst[0] = lo; 744 dst[1] = hi; 745 break; 746 } 747 748 case 4: 749 lp_build_transpose_aos(gallivm, type, src, dst); 750 break; 751 752 default: 753 assert(0); 754 } 755} 756 757 758/** 759 * Pack n-th element of aos values, 760 * pad out to destination size. 761 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _ 762 */ 763LLVMValueRef 764lp_build_pack_aos_scalars(struct gallivm_state *gallivm, 765 struct lp_type src_type, 766 struct lp_type dst_type, 767 const LLVMValueRef src, 768 unsigned channel) 769{ 770 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 771 LLVMValueRef undef = LLVMGetUndef(i32t); 772 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 773 unsigned num_src = src_type.length / 4; 774 unsigned num_dst = dst_type.length; 775 unsigned i; 776 777 assert(num_src <= num_dst); 778 779 for (i = 0; i < num_src; i++) { 780 shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0); 781 } 782 for (i = num_src; i < num_dst; i++) { 783 shuffles[i] = undef; 784 } 785 786 if (num_dst == 1) { 787 return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], ""); 788 } 789 else { 790 return LLVMBuildShuffleVector(gallivm->builder, src, src, 791 LLVMConstVector(shuffles, num_dst), ""); 792 } 793} 794 795 796/** 797 * Unpack and broadcast packed aos values consisting of only the 798 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2 799 */ 800LLVMValueRef 801lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm, 802 struct lp_type src_type, 803 struct lp_type dst_type, 804 const LLVMValueRef src) 805{ 806 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 807 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 808 unsigned num_dst = dst_type.length; 809 unsigned num_src = dst_type.length / 4; 810 unsigned i; 811 812 assert(num_dst / 4 <= src_type.length); 813 814 for (i = 0; i < num_src; i++) { 815 shuffles[i*4] = LLVMConstInt(i32t, i, 0); 816 shuffles[i*4+1] = LLVMConstInt(i32t, i, 0); 817 shuffles[i*4+2] = LLVMConstInt(i32t, i, 0); 818 shuffles[i*4+3] = LLVMConstInt(i32t, i, 0); 819 } 820 821 if (num_src == 1) { 822 return lp_build_extract_broadcast(gallivm, src_type, dst_type, 823 src, shuffles[0]); 824 } 825 else { 826 return LLVMBuildShuffleVector(gallivm->builder, src, src, 827 LLVMConstVector(shuffles, num_dst), ""); 828 } 829} 830 831