nir_constant_expressions.c revision 10e230b6
1/* 2 * Copyright (C) 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jason Ekstrand (jason@jlekstrand.net) 25 */ 26 27#include <math.h> 28#include "util/rounding.h" /* for _mesa_roundeven */ 29#include "util/half_float.h" 30#include "util/bigmath.h" 31#include "nir_constant_expressions.h" 32 33#define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits))) 34 35/** 36 * Evaluate one component of packSnorm4x8. 37 */ 38static uint8_t 39pack_snorm_1x8(float x) 40{ 41 /* From section 8.4 of the GLSL 4.30 spec: 42 * 43 * packSnorm4x8 44 * ------------ 45 * The conversion for component c of v to fixed point is done as 46 * follows: 47 * 48 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 49 * 50 * We must first cast the float to an int, because casting a negative 51 * float to a uint is undefined. 52 */ 53 return (uint8_t) (int) 54 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); 55} 56 57/** 58 * Evaluate one component of packSnorm2x16. 59 */ 60static uint16_t 61pack_snorm_1x16(float x) 62{ 63 /* From section 8.4 of the GLSL ES 3.00 spec: 64 * 65 * packSnorm2x16 66 * ------------- 67 * The conversion for component c of v to fixed point is done as 68 * follows: 69 * 70 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 71 * 72 * We must first cast the float to an int, because casting a negative 73 * float to a uint is undefined. 74 */ 75 return (uint16_t) (int) 76 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); 77} 78 79/** 80 * Evaluate one component of unpackSnorm4x8. 81 */ 82static float 83unpack_snorm_1x8(uint8_t u) 84{ 85 /* From section 8.4 of the GLSL 4.30 spec: 86 * 87 * unpackSnorm4x8 88 * -------------- 89 * The conversion for unpacked fixed-point value f to floating point is 90 * done as follows: 91 * 92 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 93 */ 94 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); 95} 96 97/** 98 * Evaluate one component of unpackSnorm2x16. 99 */ 100static float 101unpack_snorm_1x16(uint16_t u) 102{ 103 /* From section 8.4 of the GLSL ES 3.00 spec: 104 * 105 * unpackSnorm2x16 106 * --------------- 107 * The conversion for unpacked fixed-point value f to floating point is 108 * done as follows: 109 * 110 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) 111 */ 112 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); 113} 114 115/** 116 * Evaluate one component packUnorm4x8. 117 */ 118static uint8_t 119pack_unorm_1x8(float x) 120{ 121 /* From section 8.4 of the GLSL 4.30 spec: 122 * 123 * packUnorm4x8 124 * ------------ 125 * The conversion for component c of v to fixed point is done as 126 * follows: 127 * 128 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 129 */ 130 return (uint8_t) (int) 131 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); 132} 133 134/** 135 * Evaluate one component packUnorm2x16. 136 */ 137static uint16_t 138pack_unorm_1x16(float x) 139{ 140 /* From section 8.4 of the GLSL ES 3.00 spec: 141 * 142 * packUnorm2x16 143 * ------------- 144 * The conversion for component c of v to fixed point is done as 145 * follows: 146 * 147 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 148 */ 149 return (uint16_t) (int) 150 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); 151} 152 153/** 154 * Evaluate one component of unpackUnorm4x8. 155 */ 156static float 157unpack_unorm_1x8(uint8_t u) 158{ 159 /* From section 8.4 of the GLSL 4.30 spec: 160 * 161 * unpackUnorm4x8 162 * -------------- 163 * The conversion for unpacked fixed-point value f to floating point is 164 * done as follows: 165 * 166 * unpackUnorm4x8: f / 255.0 167 */ 168 return (float) u / 255.0f; 169} 170 171/** 172 * Evaluate one component of unpackUnorm2x16. 173 */ 174static float 175unpack_unorm_1x16(uint16_t u) 176{ 177 /* From section 8.4 of the GLSL ES 3.00 spec: 178 * 179 * unpackUnorm2x16 180 * --------------- 181 * The conversion for unpacked fixed-point value f to floating point is 182 * done as follows: 183 * 184 * unpackUnorm2x16: f / 65535.0 185 */ 186 return (float) u / 65535.0f; 187} 188 189/** 190 * Evaluate one component of packHalf2x16. 191 */ 192static uint16_t 193pack_half_1x16(float x) 194{ 195 return _mesa_float_to_half(x); 196} 197 198/** 199 * Evaluate one component of unpackHalf2x16. 200 */ 201static float 202unpack_half_1x16(uint16_t u) 203{ 204 return _mesa_half_to_float(u); 205} 206 207/* Some typed vector structures to make things like src0.y work */ 208typedef int8_t int1_t; 209typedef uint8_t uint1_t; 210typedef float float16_t; 211typedef float float32_t; 212typedef double float64_t; 213typedef bool bool1_t; 214typedef bool bool8_t; 215typedef bool bool16_t; 216typedef bool bool32_t; 217typedef bool bool64_t; 218struct float16_vec { 219 float16_t x; 220 float16_t y; 221 float16_t z; 222 float16_t w; 223}; 224struct float32_vec { 225 float32_t x; 226 float32_t y; 227 float32_t z; 228 float32_t w; 229}; 230struct float64_vec { 231 float64_t x; 232 float64_t y; 233 float64_t z; 234 float64_t w; 235}; 236struct int1_vec { 237 int1_t x; 238 int1_t y; 239 int1_t z; 240 int1_t w; 241}; 242struct int8_vec { 243 int8_t x; 244 int8_t y; 245 int8_t z; 246 int8_t w; 247}; 248struct int16_vec { 249 int16_t x; 250 int16_t y; 251 int16_t z; 252 int16_t w; 253}; 254struct int32_vec { 255 int32_t x; 256 int32_t y; 257 int32_t z; 258 int32_t w; 259}; 260struct int64_vec { 261 int64_t x; 262 int64_t y; 263 int64_t z; 264 int64_t w; 265}; 266struct uint1_vec { 267 uint1_t x; 268 uint1_t y; 269 uint1_t z; 270 uint1_t w; 271}; 272struct uint8_vec { 273 uint8_t x; 274 uint8_t y; 275 uint8_t z; 276 uint8_t w; 277}; 278struct uint16_vec { 279 uint16_t x; 280 uint16_t y; 281 uint16_t z; 282 uint16_t w; 283}; 284struct uint32_vec { 285 uint32_t x; 286 uint32_t y; 287 uint32_t z; 288 uint32_t w; 289}; 290struct uint64_vec { 291 uint64_t x; 292 uint64_t y; 293 uint64_t z; 294 uint64_t w; 295}; 296struct bool1_vec { 297 bool1_t x; 298 bool1_t y; 299 bool1_t z; 300 bool1_t w; 301}; 302struct bool32_vec { 303 bool32_t x; 304 bool32_t y; 305 bool32_t z; 306 bool32_t w; 307}; 308 309 310 311static void 312evaluate_b2f16(nir_const_value *_dst_val, 313 MAYBE_UNUSED unsigned num_components, 314 unsigned bit_size, 315 MAYBE_UNUSED nir_const_value **_src) 316{ 317 switch (bit_size) { 318 case 1: { 319 320 321 322 323 for (unsigned _i = 0; _i < num_components; _i++) { 324 const bool1_t src0 = 325 _src[0][_i].b; 326 327 float16_t dst = src0; 328 329 _dst_val[_i].u16 = _mesa_float_to_half(dst); 330 } 331 332 break; 333 } 334 case 32: { 335 336 337 338 339 for (unsigned _i = 0; _i < num_components; _i++) { 340 const bool32_t src0 = 341 _src[0][_i].i32; 342 343 float16_t dst = src0; 344 345 _dst_val[_i].u16 = _mesa_float_to_half(dst); 346 } 347 348 break; 349 } 350 351 default: 352 unreachable("unknown bit width"); 353 } 354} 355static void 356evaluate_b2f32(nir_const_value *_dst_val, 357 MAYBE_UNUSED unsigned num_components, 358 unsigned bit_size, 359 MAYBE_UNUSED nir_const_value **_src) 360{ 361 switch (bit_size) { 362 case 1: { 363 364 365 366 367 for (unsigned _i = 0; _i < num_components; _i++) { 368 const bool1_t src0 = 369 _src[0][_i].b; 370 371 float32_t dst = src0; 372 373 _dst_val[_i].f32 = dst; 374 } 375 376 break; 377 } 378 case 32: { 379 380 381 382 383 for (unsigned _i = 0; _i < num_components; _i++) { 384 const bool32_t src0 = 385 _src[0][_i].i32; 386 387 float32_t dst = src0; 388 389 _dst_val[_i].f32 = dst; 390 } 391 392 break; 393 } 394 395 default: 396 unreachable("unknown bit width"); 397 } 398} 399static void 400evaluate_b2f64(nir_const_value *_dst_val, 401 MAYBE_UNUSED unsigned num_components, 402 unsigned bit_size, 403 MAYBE_UNUSED nir_const_value **_src) 404{ 405 switch (bit_size) { 406 case 1: { 407 408 409 410 411 for (unsigned _i = 0; _i < num_components; _i++) { 412 const bool1_t src0 = 413 _src[0][_i].b; 414 415 float64_t dst = src0; 416 417 _dst_val[_i].f64 = dst; 418 } 419 420 break; 421 } 422 case 32: { 423 424 425 426 427 for (unsigned _i = 0; _i < num_components; _i++) { 428 const bool32_t src0 = 429 _src[0][_i].i32; 430 431 float64_t dst = src0; 432 433 _dst_val[_i].f64 = dst; 434 } 435 436 break; 437 } 438 439 default: 440 unreachable("unknown bit width"); 441 } 442} 443static void 444evaluate_b2i1(nir_const_value *_dst_val, 445 MAYBE_UNUSED unsigned num_components, 446 unsigned bit_size, 447 MAYBE_UNUSED nir_const_value **_src) 448{ 449 switch (bit_size) { 450 case 1: { 451 452 453 454 455 for (unsigned _i = 0; _i < num_components; _i++) { 456 const bool1_t src0 = 457 _src[0][_i].b; 458 459 int1_t dst = src0; 460 461 /* 1-bit integers get truncated */ 462 _dst_val[_i].b = dst & 1; 463 } 464 465 break; 466 } 467 case 32: { 468 469 470 471 472 for (unsigned _i = 0; _i < num_components; _i++) { 473 const bool32_t src0 = 474 _src[0][_i].i32; 475 476 int1_t dst = src0; 477 478 /* 1-bit integers get truncated */ 479 _dst_val[_i].b = dst & 1; 480 } 481 482 break; 483 } 484 485 default: 486 unreachable("unknown bit width"); 487 } 488} 489static void 490evaluate_b2i16(nir_const_value *_dst_val, 491 MAYBE_UNUSED unsigned num_components, 492 unsigned bit_size, 493 MAYBE_UNUSED nir_const_value **_src) 494{ 495 switch (bit_size) { 496 case 1: { 497 498 499 500 501 for (unsigned _i = 0; _i < num_components; _i++) { 502 const bool1_t src0 = 503 _src[0][_i].b; 504 505 int16_t dst = src0; 506 507 _dst_val[_i].i16 = dst; 508 } 509 510 break; 511 } 512 case 32: { 513 514 515 516 517 for (unsigned _i = 0; _i < num_components; _i++) { 518 const bool32_t src0 = 519 _src[0][_i].i32; 520 521 int16_t dst = src0; 522 523 _dst_val[_i].i16 = dst; 524 } 525 526 break; 527 } 528 529 default: 530 unreachable("unknown bit width"); 531 } 532} 533static void 534evaluate_b2i32(nir_const_value *_dst_val, 535 MAYBE_UNUSED unsigned num_components, 536 unsigned bit_size, 537 MAYBE_UNUSED nir_const_value **_src) 538{ 539 switch (bit_size) { 540 case 1: { 541 542 543 544 545 for (unsigned _i = 0; _i < num_components; _i++) { 546 const bool1_t src0 = 547 _src[0][_i].b; 548 549 int32_t dst = src0; 550 551 _dst_val[_i].i32 = dst; 552 } 553 554 break; 555 } 556 case 32: { 557 558 559 560 561 for (unsigned _i = 0; _i < num_components; _i++) { 562 const bool32_t src0 = 563 _src[0][_i].i32; 564 565 int32_t dst = src0; 566 567 _dst_val[_i].i32 = dst; 568 } 569 570 break; 571 } 572 573 default: 574 unreachable("unknown bit width"); 575 } 576} 577static void 578evaluate_b2i64(nir_const_value *_dst_val, 579 MAYBE_UNUSED unsigned num_components, 580 unsigned bit_size, 581 MAYBE_UNUSED nir_const_value **_src) 582{ 583 switch (bit_size) { 584 case 1: { 585 586 587 588 589 for (unsigned _i = 0; _i < num_components; _i++) { 590 const bool1_t src0 = 591 _src[0][_i].b; 592 593 int64_t dst = src0; 594 595 _dst_val[_i].i64 = dst; 596 } 597 598 break; 599 } 600 case 32: { 601 602 603 604 605 for (unsigned _i = 0; _i < num_components; _i++) { 606 const bool32_t src0 = 607 _src[0][_i].i32; 608 609 int64_t dst = src0; 610 611 _dst_val[_i].i64 = dst; 612 } 613 614 break; 615 } 616 617 default: 618 unreachable("unknown bit width"); 619 } 620} 621static void 622evaluate_b2i8(nir_const_value *_dst_val, 623 MAYBE_UNUSED unsigned num_components, 624 unsigned bit_size, 625 MAYBE_UNUSED nir_const_value **_src) 626{ 627 switch (bit_size) { 628 case 1: { 629 630 631 632 633 for (unsigned _i = 0; _i < num_components; _i++) { 634 const bool1_t src0 = 635 _src[0][_i].b; 636 637 int8_t dst = src0; 638 639 _dst_val[_i].i8 = dst; 640 } 641 642 break; 643 } 644 case 32: { 645 646 647 648 649 for (unsigned _i = 0; _i < num_components; _i++) { 650 const bool32_t src0 = 651 _src[0][_i].i32; 652 653 int8_t dst = src0; 654 655 _dst_val[_i].i8 = dst; 656 } 657 658 break; 659 } 660 661 default: 662 unreachable("unknown bit width"); 663 } 664} 665static void 666evaluate_b32all_fequal2(nir_const_value *_dst_val, 667 MAYBE_UNUSED unsigned num_components, 668 unsigned bit_size, 669 MAYBE_UNUSED nir_const_value **_src) 670{ 671 switch (bit_size) { 672 case 16: { 673 674 675 676 677 const struct float16_vec src0 = { 678 _mesa_half_to_float(_src[0][0].u16), 679 _mesa_half_to_float(_src[0][1].u16), 680 0, 681 0, 682 }; 683 684 const struct float16_vec src1 = { 685 _mesa_half_to_float(_src[1][0].u16), 686 _mesa_half_to_float(_src[1][1].u16), 687 0, 688 0, 689 }; 690 691 struct bool32_vec dst; 692 693 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 694 695 _dst_val[0].i32 = -(int)dst.x; 696 697 break; 698 } 699 case 32: { 700 701 702 703 704 const struct float32_vec src0 = { 705 _src[0][0].f32, 706 _src[0][1].f32, 707 0, 708 0, 709 }; 710 711 const struct float32_vec src1 = { 712 _src[1][0].f32, 713 _src[1][1].f32, 714 0, 715 0, 716 }; 717 718 struct bool32_vec dst; 719 720 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 721 722 _dst_val[0].i32 = -(int)dst.x; 723 724 break; 725 } 726 case 64: { 727 728 729 730 731 const struct float64_vec src0 = { 732 _src[0][0].f64, 733 _src[0][1].f64, 734 0, 735 0, 736 }; 737 738 const struct float64_vec src1 = { 739 _src[1][0].f64, 740 _src[1][1].f64, 741 0, 742 0, 743 }; 744 745 struct bool32_vec dst; 746 747 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 748 749 _dst_val[0].i32 = -(int)dst.x; 750 751 break; 752 } 753 754 default: 755 unreachable("unknown bit width"); 756 } 757} 758static void 759evaluate_b32all_fequal3(nir_const_value *_dst_val, 760 MAYBE_UNUSED unsigned num_components, 761 unsigned bit_size, 762 MAYBE_UNUSED nir_const_value **_src) 763{ 764 switch (bit_size) { 765 case 16: { 766 767 768 769 770 const struct float16_vec src0 = { 771 _mesa_half_to_float(_src[0][0].u16), 772 _mesa_half_to_float(_src[0][1].u16), 773 _mesa_half_to_float(_src[0][2].u16), 774 0, 775 }; 776 777 const struct float16_vec src1 = { 778 _mesa_half_to_float(_src[1][0].u16), 779 _mesa_half_to_float(_src[1][1].u16), 780 _mesa_half_to_float(_src[1][2].u16), 781 0, 782 }; 783 784 struct bool32_vec dst; 785 786 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 787 788 _dst_val[0].i32 = -(int)dst.x; 789 790 break; 791 } 792 case 32: { 793 794 795 796 797 const struct float32_vec src0 = { 798 _src[0][0].f32, 799 _src[0][1].f32, 800 _src[0][2].f32, 801 0, 802 }; 803 804 const struct float32_vec src1 = { 805 _src[1][0].f32, 806 _src[1][1].f32, 807 _src[1][2].f32, 808 0, 809 }; 810 811 struct bool32_vec dst; 812 813 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 814 815 _dst_val[0].i32 = -(int)dst.x; 816 817 break; 818 } 819 case 64: { 820 821 822 823 824 const struct float64_vec src0 = { 825 _src[0][0].f64, 826 _src[0][1].f64, 827 _src[0][2].f64, 828 0, 829 }; 830 831 const struct float64_vec src1 = { 832 _src[1][0].f64, 833 _src[1][1].f64, 834 _src[1][2].f64, 835 0, 836 }; 837 838 struct bool32_vec dst; 839 840 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 841 842 _dst_val[0].i32 = -(int)dst.x; 843 844 break; 845 } 846 847 default: 848 unreachable("unknown bit width"); 849 } 850} 851static void 852evaluate_b32all_fequal4(nir_const_value *_dst_val, 853 MAYBE_UNUSED unsigned num_components, 854 unsigned bit_size, 855 MAYBE_UNUSED nir_const_value **_src) 856{ 857 switch (bit_size) { 858 case 16: { 859 860 861 862 863 const struct float16_vec src0 = { 864 _mesa_half_to_float(_src[0][0].u16), 865 _mesa_half_to_float(_src[0][1].u16), 866 _mesa_half_to_float(_src[0][2].u16), 867 _mesa_half_to_float(_src[0][3].u16), 868 }; 869 870 const struct float16_vec src1 = { 871 _mesa_half_to_float(_src[1][0].u16), 872 _mesa_half_to_float(_src[1][1].u16), 873 _mesa_half_to_float(_src[1][2].u16), 874 _mesa_half_to_float(_src[1][3].u16), 875 }; 876 877 struct bool32_vec dst; 878 879 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 880 881 _dst_val[0].i32 = -(int)dst.x; 882 883 break; 884 } 885 case 32: { 886 887 888 889 890 const struct float32_vec src0 = { 891 _src[0][0].f32, 892 _src[0][1].f32, 893 _src[0][2].f32, 894 _src[0][3].f32, 895 }; 896 897 const struct float32_vec src1 = { 898 _src[1][0].f32, 899 _src[1][1].f32, 900 _src[1][2].f32, 901 _src[1][3].f32, 902 }; 903 904 struct bool32_vec dst; 905 906 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 907 908 _dst_val[0].i32 = -(int)dst.x; 909 910 break; 911 } 912 case 64: { 913 914 915 916 917 const struct float64_vec src0 = { 918 _src[0][0].f64, 919 _src[0][1].f64, 920 _src[0][2].f64, 921 _src[0][3].f64, 922 }; 923 924 const struct float64_vec src1 = { 925 _src[1][0].f64, 926 _src[1][1].f64, 927 _src[1][2].f64, 928 _src[1][3].f64, 929 }; 930 931 struct bool32_vec dst; 932 933 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 934 935 _dst_val[0].i32 = -(int)dst.x; 936 937 break; 938 } 939 940 default: 941 unreachable("unknown bit width"); 942 } 943} 944static void 945evaluate_b32all_iequal2(nir_const_value *_dst_val, 946 MAYBE_UNUSED unsigned num_components, 947 unsigned bit_size, 948 MAYBE_UNUSED nir_const_value **_src) 949{ 950 switch (bit_size) { 951 case 1: { 952 953 954 955 956 const struct int1_vec src0 = { 957 /* 1-bit integers use a 0/-1 convention */ 958 -(int1_t)_src[0][0].b, 959 /* 1-bit integers use a 0/-1 convention */ 960 -(int1_t)_src[0][1].b, 961 0, 962 0, 963 }; 964 965 const struct int1_vec src1 = { 966 /* 1-bit integers use a 0/-1 convention */ 967 -(int1_t)_src[1][0].b, 968 /* 1-bit integers use a 0/-1 convention */ 969 -(int1_t)_src[1][1].b, 970 0, 971 0, 972 }; 973 974 struct bool32_vec dst; 975 976 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 977 978 _dst_val[0].i32 = -(int)dst.x; 979 980 break; 981 } 982 case 8: { 983 984 985 986 987 const struct int8_vec src0 = { 988 _src[0][0].i8, 989 _src[0][1].i8, 990 0, 991 0, 992 }; 993 994 const struct int8_vec src1 = { 995 _src[1][0].i8, 996 _src[1][1].i8, 997 0, 998 0, 999 }; 1000 1001 struct bool32_vec dst; 1002 1003 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 1004 1005 _dst_val[0].i32 = -(int)dst.x; 1006 1007 break; 1008 } 1009 case 16: { 1010 1011 1012 1013 1014 const struct int16_vec src0 = { 1015 _src[0][0].i16, 1016 _src[0][1].i16, 1017 0, 1018 0, 1019 }; 1020 1021 const struct int16_vec src1 = { 1022 _src[1][0].i16, 1023 _src[1][1].i16, 1024 0, 1025 0, 1026 }; 1027 1028 struct bool32_vec dst; 1029 1030 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 1031 1032 _dst_val[0].i32 = -(int)dst.x; 1033 1034 break; 1035 } 1036 case 32: { 1037 1038 1039 1040 1041 const struct int32_vec src0 = { 1042 _src[0][0].i32, 1043 _src[0][1].i32, 1044 0, 1045 0, 1046 }; 1047 1048 const struct int32_vec src1 = { 1049 _src[1][0].i32, 1050 _src[1][1].i32, 1051 0, 1052 0, 1053 }; 1054 1055 struct bool32_vec dst; 1056 1057 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 1058 1059 _dst_val[0].i32 = -(int)dst.x; 1060 1061 break; 1062 } 1063 case 64: { 1064 1065 1066 1067 1068 const struct int64_vec src0 = { 1069 _src[0][0].i64, 1070 _src[0][1].i64, 1071 0, 1072 0, 1073 }; 1074 1075 const struct int64_vec src1 = { 1076 _src[1][0].i64, 1077 _src[1][1].i64, 1078 0, 1079 0, 1080 }; 1081 1082 struct bool32_vec dst; 1083 1084 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 1085 1086 _dst_val[0].i32 = -(int)dst.x; 1087 1088 break; 1089 } 1090 1091 default: 1092 unreachable("unknown bit width"); 1093 } 1094} 1095static void 1096evaluate_b32all_iequal3(nir_const_value *_dst_val, 1097 MAYBE_UNUSED unsigned num_components, 1098 unsigned bit_size, 1099 MAYBE_UNUSED nir_const_value **_src) 1100{ 1101 switch (bit_size) { 1102 case 1: { 1103 1104 1105 1106 1107 const struct int1_vec src0 = { 1108 /* 1-bit integers use a 0/-1 convention */ 1109 -(int1_t)_src[0][0].b, 1110 /* 1-bit integers use a 0/-1 convention */ 1111 -(int1_t)_src[0][1].b, 1112 /* 1-bit integers use a 0/-1 convention */ 1113 -(int1_t)_src[0][2].b, 1114 0, 1115 }; 1116 1117 const struct int1_vec src1 = { 1118 /* 1-bit integers use a 0/-1 convention */ 1119 -(int1_t)_src[1][0].b, 1120 /* 1-bit integers use a 0/-1 convention */ 1121 -(int1_t)_src[1][1].b, 1122 /* 1-bit integers use a 0/-1 convention */ 1123 -(int1_t)_src[1][2].b, 1124 0, 1125 }; 1126 1127 struct bool32_vec dst; 1128 1129 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 1130 1131 _dst_val[0].i32 = -(int)dst.x; 1132 1133 break; 1134 } 1135 case 8: { 1136 1137 1138 1139 1140 const struct int8_vec src0 = { 1141 _src[0][0].i8, 1142 _src[0][1].i8, 1143 _src[0][2].i8, 1144 0, 1145 }; 1146 1147 const struct int8_vec src1 = { 1148 _src[1][0].i8, 1149 _src[1][1].i8, 1150 _src[1][2].i8, 1151 0, 1152 }; 1153 1154 struct bool32_vec dst; 1155 1156 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 1157 1158 _dst_val[0].i32 = -(int)dst.x; 1159 1160 break; 1161 } 1162 case 16: { 1163 1164 1165 1166 1167 const struct int16_vec src0 = { 1168 _src[0][0].i16, 1169 _src[0][1].i16, 1170 _src[0][2].i16, 1171 0, 1172 }; 1173 1174 const struct int16_vec src1 = { 1175 _src[1][0].i16, 1176 _src[1][1].i16, 1177 _src[1][2].i16, 1178 0, 1179 }; 1180 1181 struct bool32_vec dst; 1182 1183 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 1184 1185 _dst_val[0].i32 = -(int)dst.x; 1186 1187 break; 1188 } 1189 case 32: { 1190 1191 1192 1193 1194 const struct int32_vec src0 = { 1195 _src[0][0].i32, 1196 _src[0][1].i32, 1197 _src[0][2].i32, 1198 0, 1199 }; 1200 1201 const struct int32_vec src1 = { 1202 _src[1][0].i32, 1203 _src[1][1].i32, 1204 _src[1][2].i32, 1205 0, 1206 }; 1207 1208 struct bool32_vec dst; 1209 1210 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 1211 1212 _dst_val[0].i32 = -(int)dst.x; 1213 1214 break; 1215 } 1216 case 64: { 1217 1218 1219 1220 1221 const struct int64_vec src0 = { 1222 _src[0][0].i64, 1223 _src[0][1].i64, 1224 _src[0][2].i64, 1225 0, 1226 }; 1227 1228 const struct int64_vec src1 = { 1229 _src[1][0].i64, 1230 _src[1][1].i64, 1231 _src[1][2].i64, 1232 0, 1233 }; 1234 1235 struct bool32_vec dst; 1236 1237 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 1238 1239 _dst_val[0].i32 = -(int)dst.x; 1240 1241 break; 1242 } 1243 1244 default: 1245 unreachable("unknown bit width"); 1246 } 1247} 1248static void 1249evaluate_b32all_iequal4(nir_const_value *_dst_val, 1250 MAYBE_UNUSED unsigned num_components, 1251 unsigned bit_size, 1252 MAYBE_UNUSED nir_const_value **_src) 1253{ 1254 switch (bit_size) { 1255 case 1: { 1256 1257 1258 1259 1260 const struct int1_vec src0 = { 1261 /* 1-bit integers use a 0/-1 convention */ 1262 -(int1_t)_src[0][0].b, 1263 /* 1-bit integers use a 0/-1 convention */ 1264 -(int1_t)_src[0][1].b, 1265 /* 1-bit integers use a 0/-1 convention */ 1266 -(int1_t)_src[0][2].b, 1267 /* 1-bit integers use a 0/-1 convention */ 1268 -(int1_t)_src[0][3].b, 1269 }; 1270 1271 const struct int1_vec src1 = { 1272 /* 1-bit integers use a 0/-1 convention */ 1273 -(int1_t)_src[1][0].b, 1274 /* 1-bit integers use a 0/-1 convention */ 1275 -(int1_t)_src[1][1].b, 1276 /* 1-bit integers use a 0/-1 convention */ 1277 -(int1_t)_src[1][2].b, 1278 /* 1-bit integers use a 0/-1 convention */ 1279 -(int1_t)_src[1][3].b, 1280 }; 1281 1282 struct bool32_vec dst; 1283 1284 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 1285 1286 _dst_val[0].i32 = -(int)dst.x; 1287 1288 break; 1289 } 1290 case 8: { 1291 1292 1293 1294 1295 const struct int8_vec src0 = { 1296 _src[0][0].i8, 1297 _src[0][1].i8, 1298 _src[0][2].i8, 1299 _src[0][3].i8, 1300 }; 1301 1302 const struct int8_vec src1 = { 1303 _src[1][0].i8, 1304 _src[1][1].i8, 1305 _src[1][2].i8, 1306 _src[1][3].i8, 1307 }; 1308 1309 struct bool32_vec dst; 1310 1311 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 1312 1313 _dst_val[0].i32 = -(int)dst.x; 1314 1315 break; 1316 } 1317 case 16: { 1318 1319 1320 1321 1322 const struct int16_vec src0 = { 1323 _src[0][0].i16, 1324 _src[0][1].i16, 1325 _src[0][2].i16, 1326 _src[0][3].i16, 1327 }; 1328 1329 const struct int16_vec src1 = { 1330 _src[1][0].i16, 1331 _src[1][1].i16, 1332 _src[1][2].i16, 1333 _src[1][3].i16, 1334 }; 1335 1336 struct bool32_vec dst; 1337 1338 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 1339 1340 _dst_val[0].i32 = -(int)dst.x; 1341 1342 break; 1343 } 1344 case 32: { 1345 1346 1347 1348 1349 const struct int32_vec src0 = { 1350 _src[0][0].i32, 1351 _src[0][1].i32, 1352 _src[0][2].i32, 1353 _src[0][3].i32, 1354 }; 1355 1356 const struct int32_vec src1 = { 1357 _src[1][0].i32, 1358 _src[1][1].i32, 1359 _src[1][2].i32, 1360 _src[1][3].i32, 1361 }; 1362 1363 struct bool32_vec dst; 1364 1365 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 1366 1367 _dst_val[0].i32 = -(int)dst.x; 1368 1369 break; 1370 } 1371 case 64: { 1372 1373 1374 1375 1376 const struct int64_vec src0 = { 1377 _src[0][0].i64, 1378 _src[0][1].i64, 1379 _src[0][2].i64, 1380 _src[0][3].i64, 1381 }; 1382 1383 const struct int64_vec src1 = { 1384 _src[1][0].i64, 1385 _src[1][1].i64, 1386 _src[1][2].i64, 1387 _src[1][3].i64, 1388 }; 1389 1390 struct bool32_vec dst; 1391 1392 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 1393 1394 _dst_val[0].i32 = -(int)dst.x; 1395 1396 break; 1397 } 1398 1399 default: 1400 unreachable("unknown bit width"); 1401 } 1402} 1403static void 1404evaluate_b32any_fnequal2(nir_const_value *_dst_val, 1405 MAYBE_UNUSED unsigned num_components, 1406 unsigned bit_size, 1407 MAYBE_UNUSED nir_const_value **_src) 1408{ 1409 switch (bit_size) { 1410 case 16: { 1411 1412 1413 1414 1415 const struct float16_vec src0 = { 1416 _mesa_half_to_float(_src[0][0].u16), 1417 _mesa_half_to_float(_src[0][1].u16), 1418 0, 1419 0, 1420 }; 1421 1422 const struct float16_vec src1 = { 1423 _mesa_half_to_float(_src[1][0].u16), 1424 _mesa_half_to_float(_src[1][1].u16), 1425 0, 1426 0, 1427 }; 1428 1429 struct bool32_vec dst; 1430 1431 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1432 1433 _dst_val[0].i32 = -(int)dst.x; 1434 1435 break; 1436 } 1437 case 32: { 1438 1439 1440 1441 1442 const struct float32_vec src0 = { 1443 _src[0][0].f32, 1444 _src[0][1].f32, 1445 0, 1446 0, 1447 }; 1448 1449 const struct float32_vec src1 = { 1450 _src[1][0].f32, 1451 _src[1][1].f32, 1452 0, 1453 0, 1454 }; 1455 1456 struct bool32_vec dst; 1457 1458 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1459 1460 _dst_val[0].i32 = -(int)dst.x; 1461 1462 break; 1463 } 1464 case 64: { 1465 1466 1467 1468 1469 const struct float64_vec src0 = { 1470 _src[0][0].f64, 1471 _src[0][1].f64, 1472 0, 1473 0, 1474 }; 1475 1476 const struct float64_vec src1 = { 1477 _src[1][0].f64, 1478 _src[1][1].f64, 1479 0, 1480 0, 1481 }; 1482 1483 struct bool32_vec dst; 1484 1485 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1486 1487 _dst_val[0].i32 = -(int)dst.x; 1488 1489 break; 1490 } 1491 1492 default: 1493 unreachable("unknown bit width"); 1494 } 1495} 1496static void 1497evaluate_b32any_fnequal3(nir_const_value *_dst_val, 1498 MAYBE_UNUSED unsigned num_components, 1499 unsigned bit_size, 1500 MAYBE_UNUSED nir_const_value **_src) 1501{ 1502 switch (bit_size) { 1503 case 16: { 1504 1505 1506 1507 1508 const struct float16_vec src0 = { 1509 _mesa_half_to_float(_src[0][0].u16), 1510 _mesa_half_to_float(_src[0][1].u16), 1511 _mesa_half_to_float(_src[0][2].u16), 1512 0, 1513 }; 1514 1515 const struct float16_vec src1 = { 1516 _mesa_half_to_float(_src[1][0].u16), 1517 _mesa_half_to_float(_src[1][1].u16), 1518 _mesa_half_to_float(_src[1][2].u16), 1519 0, 1520 }; 1521 1522 struct bool32_vec dst; 1523 1524 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1525 1526 _dst_val[0].i32 = -(int)dst.x; 1527 1528 break; 1529 } 1530 case 32: { 1531 1532 1533 1534 1535 const struct float32_vec src0 = { 1536 _src[0][0].f32, 1537 _src[0][1].f32, 1538 _src[0][2].f32, 1539 0, 1540 }; 1541 1542 const struct float32_vec src1 = { 1543 _src[1][0].f32, 1544 _src[1][1].f32, 1545 _src[1][2].f32, 1546 0, 1547 }; 1548 1549 struct bool32_vec dst; 1550 1551 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1552 1553 _dst_val[0].i32 = -(int)dst.x; 1554 1555 break; 1556 } 1557 case 64: { 1558 1559 1560 1561 1562 const struct float64_vec src0 = { 1563 _src[0][0].f64, 1564 _src[0][1].f64, 1565 _src[0][2].f64, 1566 0, 1567 }; 1568 1569 const struct float64_vec src1 = { 1570 _src[1][0].f64, 1571 _src[1][1].f64, 1572 _src[1][2].f64, 1573 0, 1574 }; 1575 1576 struct bool32_vec dst; 1577 1578 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1579 1580 _dst_val[0].i32 = -(int)dst.x; 1581 1582 break; 1583 } 1584 1585 default: 1586 unreachable("unknown bit width"); 1587 } 1588} 1589static void 1590evaluate_b32any_fnequal4(nir_const_value *_dst_val, 1591 MAYBE_UNUSED unsigned num_components, 1592 unsigned bit_size, 1593 MAYBE_UNUSED nir_const_value **_src) 1594{ 1595 switch (bit_size) { 1596 case 16: { 1597 1598 1599 1600 1601 const struct float16_vec src0 = { 1602 _mesa_half_to_float(_src[0][0].u16), 1603 _mesa_half_to_float(_src[0][1].u16), 1604 _mesa_half_to_float(_src[0][2].u16), 1605 _mesa_half_to_float(_src[0][3].u16), 1606 }; 1607 1608 const struct float16_vec src1 = { 1609 _mesa_half_to_float(_src[1][0].u16), 1610 _mesa_half_to_float(_src[1][1].u16), 1611 _mesa_half_to_float(_src[1][2].u16), 1612 _mesa_half_to_float(_src[1][3].u16), 1613 }; 1614 1615 struct bool32_vec dst; 1616 1617 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 1618 1619 _dst_val[0].i32 = -(int)dst.x; 1620 1621 break; 1622 } 1623 case 32: { 1624 1625 1626 1627 1628 const struct float32_vec src0 = { 1629 _src[0][0].f32, 1630 _src[0][1].f32, 1631 _src[0][2].f32, 1632 _src[0][3].f32, 1633 }; 1634 1635 const struct float32_vec src1 = { 1636 _src[1][0].f32, 1637 _src[1][1].f32, 1638 _src[1][2].f32, 1639 _src[1][3].f32, 1640 }; 1641 1642 struct bool32_vec dst; 1643 1644 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 1645 1646 _dst_val[0].i32 = -(int)dst.x; 1647 1648 break; 1649 } 1650 case 64: { 1651 1652 1653 1654 1655 const struct float64_vec src0 = { 1656 _src[0][0].f64, 1657 _src[0][1].f64, 1658 _src[0][2].f64, 1659 _src[0][3].f64, 1660 }; 1661 1662 const struct float64_vec src1 = { 1663 _src[1][0].f64, 1664 _src[1][1].f64, 1665 _src[1][2].f64, 1666 _src[1][3].f64, 1667 }; 1668 1669 struct bool32_vec dst; 1670 1671 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 1672 1673 _dst_val[0].i32 = -(int)dst.x; 1674 1675 break; 1676 } 1677 1678 default: 1679 unreachable("unknown bit width"); 1680 } 1681} 1682static void 1683evaluate_b32any_inequal2(nir_const_value *_dst_val, 1684 MAYBE_UNUSED unsigned num_components, 1685 unsigned bit_size, 1686 MAYBE_UNUSED nir_const_value **_src) 1687{ 1688 switch (bit_size) { 1689 case 1: { 1690 1691 1692 1693 1694 const struct int1_vec src0 = { 1695 /* 1-bit integers use a 0/-1 convention */ 1696 -(int1_t)_src[0][0].b, 1697 /* 1-bit integers use a 0/-1 convention */ 1698 -(int1_t)_src[0][1].b, 1699 0, 1700 0, 1701 }; 1702 1703 const struct int1_vec src1 = { 1704 /* 1-bit integers use a 0/-1 convention */ 1705 -(int1_t)_src[1][0].b, 1706 /* 1-bit integers use a 0/-1 convention */ 1707 -(int1_t)_src[1][1].b, 1708 0, 1709 0, 1710 }; 1711 1712 struct bool32_vec dst; 1713 1714 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1715 1716 _dst_val[0].i32 = -(int)dst.x; 1717 1718 break; 1719 } 1720 case 8: { 1721 1722 1723 1724 1725 const struct int8_vec src0 = { 1726 _src[0][0].i8, 1727 _src[0][1].i8, 1728 0, 1729 0, 1730 }; 1731 1732 const struct int8_vec src1 = { 1733 _src[1][0].i8, 1734 _src[1][1].i8, 1735 0, 1736 0, 1737 }; 1738 1739 struct bool32_vec dst; 1740 1741 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1742 1743 _dst_val[0].i32 = -(int)dst.x; 1744 1745 break; 1746 } 1747 case 16: { 1748 1749 1750 1751 1752 const struct int16_vec src0 = { 1753 _src[0][0].i16, 1754 _src[0][1].i16, 1755 0, 1756 0, 1757 }; 1758 1759 const struct int16_vec src1 = { 1760 _src[1][0].i16, 1761 _src[1][1].i16, 1762 0, 1763 0, 1764 }; 1765 1766 struct bool32_vec dst; 1767 1768 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1769 1770 _dst_val[0].i32 = -(int)dst.x; 1771 1772 break; 1773 } 1774 case 32: { 1775 1776 1777 1778 1779 const struct int32_vec src0 = { 1780 _src[0][0].i32, 1781 _src[0][1].i32, 1782 0, 1783 0, 1784 }; 1785 1786 const struct int32_vec src1 = { 1787 _src[1][0].i32, 1788 _src[1][1].i32, 1789 0, 1790 0, 1791 }; 1792 1793 struct bool32_vec dst; 1794 1795 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1796 1797 _dst_val[0].i32 = -(int)dst.x; 1798 1799 break; 1800 } 1801 case 64: { 1802 1803 1804 1805 1806 const struct int64_vec src0 = { 1807 _src[0][0].i64, 1808 _src[0][1].i64, 1809 0, 1810 0, 1811 }; 1812 1813 const struct int64_vec src1 = { 1814 _src[1][0].i64, 1815 _src[1][1].i64, 1816 0, 1817 0, 1818 }; 1819 1820 struct bool32_vec dst; 1821 1822 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 1823 1824 _dst_val[0].i32 = -(int)dst.x; 1825 1826 break; 1827 } 1828 1829 default: 1830 unreachable("unknown bit width"); 1831 } 1832} 1833static void 1834evaluate_b32any_inequal3(nir_const_value *_dst_val, 1835 MAYBE_UNUSED unsigned num_components, 1836 unsigned bit_size, 1837 MAYBE_UNUSED nir_const_value **_src) 1838{ 1839 switch (bit_size) { 1840 case 1: { 1841 1842 1843 1844 1845 const struct int1_vec src0 = { 1846 /* 1-bit integers use a 0/-1 convention */ 1847 -(int1_t)_src[0][0].b, 1848 /* 1-bit integers use a 0/-1 convention */ 1849 -(int1_t)_src[0][1].b, 1850 /* 1-bit integers use a 0/-1 convention */ 1851 -(int1_t)_src[0][2].b, 1852 0, 1853 }; 1854 1855 const struct int1_vec src1 = { 1856 /* 1-bit integers use a 0/-1 convention */ 1857 -(int1_t)_src[1][0].b, 1858 /* 1-bit integers use a 0/-1 convention */ 1859 -(int1_t)_src[1][1].b, 1860 /* 1-bit integers use a 0/-1 convention */ 1861 -(int1_t)_src[1][2].b, 1862 0, 1863 }; 1864 1865 struct bool32_vec dst; 1866 1867 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1868 1869 _dst_val[0].i32 = -(int)dst.x; 1870 1871 break; 1872 } 1873 case 8: { 1874 1875 1876 1877 1878 const struct int8_vec src0 = { 1879 _src[0][0].i8, 1880 _src[0][1].i8, 1881 _src[0][2].i8, 1882 0, 1883 }; 1884 1885 const struct int8_vec src1 = { 1886 _src[1][0].i8, 1887 _src[1][1].i8, 1888 _src[1][2].i8, 1889 0, 1890 }; 1891 1892 struct bool32_vec dst; 1893 1894 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1895 1896 _dst_val[0].i32 = -(int)dst.x; 1897 1898 break; 1899 } 1900 case 16: { 1901 1902 1903 1904 1905 const struct int16_vec src0 = { 1906 _src[0][0].i16, 1907 _src[0][1].i16, 1908 _src[0][2].i16, 1909 0, 1910 }; 1911 1912 const struct int16_vec src1 = { 1913 _src[1][0].i16, 1914 _src[1][1].i16, 1915 _src[1][2].i16, 1916 0, 1917 }; 1918 1919 struct bool32_vec dst; 1920 1921 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1922 1923 _dst_val[0].i32 = -(int)dst.x; 1924 1925 break; 1926 } 1927 case 32: { 1928 1929 1930 1931 1932 const struct int32_vec src0 = { 1933 _src[0][0].i32, 1934 _src[0][1].i32, 1935 _src[0][2].i32, 1936 0, 1937 }; 1938 1939 const struct int32_vec src1 = { 1940 _src[1][0].i32, 1941 _src[1][1].i32, 1942 _src[1][2].i32, 1943 0, 1944 }; 1945 1946 struct bool32_vec dst; 1947 1948 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1949 1950 _dst_val[0].i32 = -(int)dst.x; 1951 1952 break; 1953 } 1954 case 64: { 1955 1956 1957 1958 1959 const struct int64_vec src0 = { 1960 _src[0][0].i64, 1961 _src[0][1].i64, 1962 _src[0][2].i64, 1963 0, 1964 }; 1965 1966 const struct int64_vec src1 = { 1967 _src[1][0].i64, 1968 _src[1][1].i64, 1969 _src[1][2].i64, 1970 0, 1971 }; 1972 1973 struct bool32_vec dst; 1974 1975 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 1976 1977 _dst_val[0].i32 = -(int)dst.x; 1978 1979 break; 1980 } 1981 1982 default: 1983 unreachable("unknown bit width"); 1984 } 1985} 1986static void 1987evaluate_b32any_inequal4(nir_const_value *_dst_val, 1988 MAYBE_UNUSED unsigned num_components, 1989 unsigned bit_size, 1990 MAYBE_UNUSED nir_const_value **_src) 1991{ 1992 switch (bit_size) { 1993 case 1: { 1994 1995 1996 1997 1998 const struct int1_vec src0 = { 1999 /* 1-bit integers use a 0/-1 convention */ 2000 -(int1_t)_src[0][0].b, 2001 /* 1-bit integers use a 0/-1 convention */ 2002 -(int1_t)_src[0][1].b, 2003 /* 1-bit integers use a 0/-1 convention */ 2004 -(int1_t)_src[0][2].b, 2005 /* 1-bit integers use a 0/-1 convention */ 2006 -(int1_t)_src[0][3].b, 2007 }; 2008 2009 const struct int1_vec src1 = { 2010 /* 1-bit integers use a 0/-1 convention */ 2011 -(int1_t)_src[1][0].b, 2012 /* 1-bit integers use a 0/-1 convention */ 2013 -(int1_t)_src[1][1].b, 2014 /* 1-bit integers use a 0/-1 convention */ 2015 -(int1_t)_src[1][2].b, 2016 /* 1-bit integers use a 0/-1 convention */ 2017 -(int1_t)_src[1][3].b, 2018 }; 2019 2020 struct bool32_vec dst; 2021 2022 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 2023 2024 _dst_val[0].i32 = -(int)dst.x; 2025 2026 break; 2027 } 2028 case 8: { 2029 2030 2031 2032 2033 const struct int8_vec src0 = { 2034 _src[0][0].i8, 2035 _src[0][1].i8, 2036 _src[0][2].i8, 2037 _src[0][3].i8, 2038 }; 2039 2040 const struct int8_vec src1 = { 2041 _src[1][0].i8, 2042 _src[1][1].i8, 2043 _src[1][2].i8, 2044 _src[1][3].i8, 2045 }; 2046 2047 struct bool32_vec dst; 2048 2049 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 2050 2051 _dst_val[0].i32 = -(int)dst.x; 2052 2053 break; 2054 } 2055 case 16: { 2056 2057 2058 2059 2060 const struct int16_vec src0 = { 2061 _src[0][0].i16, 2062 _src[0][1].i16, 2063 _src[0][2].i16, 2064 _src[0][3].i16, 2065 }; 2066 2067 const struct int16_vec src1 = { 2068 _src[1][0].i16, 2069 _src[1][1].i16, 2070 _src[1][2].i16, 2071 _src[1][3].i16, 2072 }; 2073 2074 struct bool32_vec dst; 2075 2076 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 2077 2078 _dst_val[0].i32 = -(int)dst.x; 2079 2080 break; 2081 } 2082 case 32: { 2083 2084 2085 2086 2087 const struct int32_vec src0 = { 2088 _src[0][0].i32, 2089 _src[0][1].i32, 2090 _src[0][2].i32, 2091 _src[0][3].i32, 2092 }; 2093 2094 const struct int32_vec src1 = { 2095 _src[1][0].i32, 2096 _src[1][1].i32, 2097 _src[1][2].i32, 2098 _src[1][3].i32, 2099 }; 2100 2101 struct bool32_vec dst; 2102 2103 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 2104 2105 _dst_val[0].i32 = -(int)dst.x; 2106 2107 break; 2108 } 2109 case 64: { 2110 2111 2112 2113 2114 const struct int64_vec src0 = { 2115 _src[0][0].i64, 2116 _src[0][1].i64, 2117 _src[0][2].i64, 2118 _src[0][3].i64, 2119 }; 2120 2121 const struct int64_vec src1 = { 2122 _src[1][0].i64, 2123 _src[1][1].i64, 2124 _src[1][2].i64, 2125 _src[1][3].i64, 2126 }; 2127 2128 struct bool32_vec dst; 2129 2130 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 2131 2132 _dst_val[0].i32 = -(int)dst.x; 2133 2134 break; 2135 } 2136 2137 default: 2138 unreachable("unknown bit width"); 2139 } 2140} 2141static void 2142evaluate_b32csel(nir_const_value *_dst_val, 2143 MAYBE_UNUSED unsigned num_components, 2144 unsigned bit_size, 2145 MAYBE_UNUSED nir_const_value **_src) 2146{ 2147 switch (bit_size) { 2148 case 1: { 2149 2150 2151 2152 2153 for (unsigned _i = 0; _i < num_components; _i++) { 2154 const bool32_t src0 = 2155 _src[0][_i].i32; 2156 const uint1_t src1 = 2157 _src[1][_i].b; 2158 const uint1_t src2 = 2159 _src[2][_i].b; 2160 2161 uint1_t dst = src0 ? src1 : src2; 2162 2163 /* 1-bit integers get truncated */ 2164 _dst_val[_i].b = dst & 1; 2165 } 2166 2167 break; 2168 } 2169 case 8: { 2170 2171 2172 2173 2174 for (unsigned _i = 0; _i < num_components; _i++) { 2175 const bool32_t src0 = 2176 _src[0][_i].i32; 2177 const uint8_t src1 = 2178 _src[1][_i].u8; 2179 const uint8_t src2 = 2180 _src[2][_i].u8; 2181 2182 uint8_t dst = src0 ? src1 : src2; 2183 2184 _dst_val[_i].u8 = dst; 2185 } 2186 2187 break; 2188 } 2189 case 16: { 2190 2191 2192 2193 2194 for (unsigned _i = 0; _i < num_components; _i++) { 2195 const bool32_t src0 = 2196 _src[0][_i].i32; 2197 const uint16_t src1 = 2198 _src[1][_i].u16; 2199 const uint16_t src2 = 2200 _src[2][_i].u16; 2201 2202 uint16_t dst = src0 ? src1 : src2; 2203 2204 _dst_val[_i].u16 = dst; 2205 } 2206 2207 break; 2208 } 2209 case 32: { 2210 2211 2212 2213 2214 for (unsigned _i = 0; _i < num_components; _i++) { 2215 const bool32_t src0 = 2216 _src[0][_i].i32; 2217 const uint32_t src1 = 2218 _src[1][_i].u32; 2219 const uint32_t src2 = 2220 _src[2][_i].u32; 2221 2222 uint32_t dst = src0 ? src1 : src2; 2223 2224 _dst_val[_i].u32 = dst; 2225 } 2226 2227 break; 2228 } 2229 case 64: { 2230 2231 2232 2233 2234 for (unsigned _i = 0; _i < num_components; _i++) { 2235 const bool32_t src0 = 2236 _src[0][_i].i32; 2237 const uint64_t src1 = 2238 _src[1][_i].u64; 2239 const uint64_t src2 = 2240 _src[2][_i].u64; 2241 2242 uint64_t dst = src0 ? src1 : src2; 2243 2244 _dst_val[_i].u64 = dst; 2245 } 2246 2247 break; 2248 } 2249 2250 default: 2251 unreachable("unknown bit width"); 2252 } 2253} 2254static void 2255evaluate_ball_fequal2(nir_const_value *_dst_val, 2256 MAYBE_UNUSED unsigned num_components, 2257 unsigned bit_size, 2258 MAYBE_UNUSED nir_const_value **_src) 2259{ 2260 switch (bit_size) { 2261 case 16: { 2262 2263 2264 2265 2266 const struct float16_vec src0 = { 2267 _mesa_half_to_float(_src[0][0].u16), 2268 _mesa_half_to_float(_src[0][1].u16), 2269 0, 2270 0, 2271 }; 2272 2273 const struct float16_vec src1 = { 2274 _mesa_half_to_float(_src[1][0].u16), 2275 _mesa_half_to_float(_src[1][1].u16), 2276 0, 2277 0, 2278 }; 2279 2280 struct bool1_vec dst; 2281 2282 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2283 2284 _dst_val[0].b = -(int)dst.x; 2285 2286 break; 2287 } 2288 case 32: { 2289 2290 2291 2292 2293 const struct float32_vec src0 = { 2294 _src[0][0].f32, 2295 _src[0][1].f32, 2296 0, 2297 0, 2298 }; 2299 2300 const struct float32_vec src1 = { 2301 _src[1][0].f32, 2302 _src[1][1].f32, 2303 0, 2304 0, 2305 }; 2306 2307 struct bool1_vec dst; 2308 2309 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2310 2311 _dst_val[0].b = -(int)dst.x; 2312 2313 break; 2314 } 2315 case 64: { 2316 2317 2318 2319 2320 const struct float64_vec src0 = { 2321 _src[0][0].f64, 2322 _src[0][1].f64, 2323 0, 2324 0, 2325 }; 2326 2327 const struct float64_vec src1 = { 2328 _src[1][0].f64, 2329 _src[1][1].f64, 2330 0, 2331 0, 2332 }; 2333 2334 struct bool1_vec dst; 2335 2336 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2337 2338 _dst_val[0].b = -(int)dst.x; 2339 2340 break; 2341 } 2342 2343 default: 2344 unreachable("unknown bit width"); 2345 } 2346} 2347static void 2348evaluate_ball_fequal3(nir_const_value *_dst_val, 2349 MAYBE_UNUSED unsigned num_components, 2350 unsigned bit_size, 2351 MAYBE_UNUSED nir_const_value **_src) 2352{ 2353 switch (bit_size) { 2354 case 16: { 2355 2356 2357 2358 2359 const struct float16_vec src0 = { 2360 _mesa_half_to_float(_src[0][0].u16), 2361 _mesa_half_to_float(_src[0][1].u16), 2362 _mesa_half_to_float(_src[0][2].u16), 2363 0, 2364 }; 2365 2366 const struct float16_vec src1 = { 2367 _mesa_half_to_float(_src[1][0].u16), 2368 _mesa_half_to_float(_src[1][1].u16), 2369 _mesa_half_to_float(_src[1][2].u16), 2370 0, 2371 }; 2372 2373 struct bool1_vec dst; 2374 2375 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2376 2377 _dst_val[0].b = -(int)dst.x; 2378 2379 break; 2380 } 2381 case 32: { 2382 2383 2384 2385 2386 const struct float32_vec src0 = { 2387 _src[0][0].f32, 2388 _src[0][1].f32, 2389 _src[0][2].f32, 2390 0, 2391 }; 2392 2393 const struct float32_vec src1 = { 2394 _src[1][0].f32, 2395 _src[1][1].f32, 2396 _src[1][2].f32, 2397 0, 2398 }; 2399 2400 struct bool1_vec dst; 2401 2402 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2403 2404 _dst_val[0].b = -(int)dst.x; 2405 2406 break; 2407 } 2408 case 64: { 2409 2410 2411 2412 2413 const struct float64_vec src0 = { 2414 _src[0][0].f64, 2415 _src[0][1].f64, 2416 _src[0][2].f64, 2417 0, 2418 }; 2419 2420 const struct float64_vec src1 = { 2421 _src[1][0].f64, 2422 _src[1][1].f64, 2423 _src[1][2].f64, 2424 0, 2425 }; 2426 2427 struct bool1_vec dst; 2428 2429 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2430 2431 _dst_val[0].b = -(int)dst.x; 2432 2433 break; 2434 } 2435 2436 default: 2437 unreachable("unknown bit width"); 2438 } 2439} 2440static void 2441evaluate_ball_fequal4(nir_const_value *_dst_val, 2442 MAYBE_UNUSED unsigned num_components, 2443 unsigned bit_size, 2444 MAYBE_UNUSED nir_const_value **_src) 2445{ 2446 switch (bit_size) { 2447 case 16: { 2448 2449 2450 2451 2452 const struct float16_vec src0 = { 2453 _mesa_half_to_float(_src[0][0].u16), 2454 _mesa_half_to_float(_src[0][1].u16), 2455 _mesa_half_to_float(_src[0][2].u16), 2456 _mesa_half_to_float(_src[0][3].u16), 2457 }; 2458 2459 const struct float16_vec src1 = { 2460 _mesa_half_to_float(_src[1][0].u16), 2461 _mesa_half_to_float(_src[1][1].u16), 2462 _mesa_half_to_float(_src[1][2].u16), 2463 _mesa_half_to_float(_src[1][3].u16), 2464 }; 2465 2466 struct bool1_vec dst; 2467 2468 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2469 2470 _dst_val[0].b = -(int)dst.x; 2471 2472 break; 2473 } 2474 case 32: { 2475 2476 2477 2478 2479 const struct float32_vec src0 = { 2480 _src[0][0].f32, 2481 _src[0][1].f32, 2482 _src[0][2].f32, 2483 _src[0][3].f32, 2484 }; 2485 2486 const struct float32_vec src1 = { 2487 _src[1][0].f32, 2488 _src[1][1].f32, 2489 _src[1][2].f32, 2490 _src[1][3].f32, 2491 }; 2492 2493 struct bool1_vec dst; 2494 2495 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2496 2497 _dst_val[0].b = -(int)dst.x; 2498 2499 break; 2500 } 2501 case 64: { 2502 2503 2504 2505 2506 const struct float64_vec src0 = { 2507 _src[0][0].f64, 2508 _src[0][1].f64, 2509 _src[0][2].f64, 2510 _src[0][3].f64, 2511 }; 2512 2513 const struct float64_vec src1 = { 2514 _src[1][0].f64, 2515 _src[1][1].f64, 2516 _src[1][2].f64, 2517 _src[1][3].f64, 2518 }; 2519 2520 struct bool1_vec dst; 2521 2522 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2523 2524 _dst_val[0].b = -(int)dst.x; 2525 2526 break; 2527 } 2528 2529 default: 2530 unreachable("unknown bit width"); 2531 } 2532} 2533static void 2534evaluate_ball_iequal2(nir_const_value *_dst_val, 2535 MAYBE_UNUSED unsigned num_components, 2536 unsigned bit_size, 2537 MAYBE_UNUSED nir_const_value **_src) 2538{ 2539 switch (bit_size) { 2540 case 1: { 2541 2542 2543 2544 2545 const struct int1_vec src0 = { 2546 /* 1-bit integers use a 0/-1 convention */ 2547 -(int1_t)_src[0][0].b, 2548 /* 1-bit integers use a 0/-1 convention */ 2549 -(int1_t)_src[0][1].b, 2550 0, 2551 0, 2552 }; 2553 2554 const struct int1_vec src1 = { 2555 /* 1-bit integers use a 0/-1 convention */ 2556 -(int1_t)_src[1][0].b, 2557 /* 1-bit integers use a 0/-1 convention */ 2558 -(int1_t)_src[1][1].b, 2559 0, 2560 0, 2561 }; 2562 2563 struct bool1_vec dst; 2564 2565 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2566 2567 _dst_val[0].b = -(int)dst.x; 2568 2569 break; 2570 } 2571 case 8: { 2572 2573 2574 2575 2576 const struct int8_vec src0 = { 2577 _src[0][0].i8, 2578 _src[0][1].i8, 2579 0, 2580 0, 2581 }; 2582 2583 const struct int8_vec src1 = { 2584 _src[1][0].i8, 2585 _src[1][1].i8, 2586 0, 2587 0, 2588 }; 2589 2590 struct bool1_vec dst; 2591 2592 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2593 2594 _dst_val[0].b = -(int)dst.x; 2595 2596 break; 2597 } 2598 case 16: { 2599 2600 2601 2602 2603 const struct int16_vec src0 = { 2604 _src[0][0].i16, 2605 _src[0][1].i16, 2606 0, 2607 0, 2608 }; 2609 2610 const struct int16_vec src1 = { 2611 _src[1][0].i16, 2612 _src[1][1].i16, 2613 0, 2614 0, 2615 }; 2616 2617 struct bool1_vec dst; 2618 2619 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2620 2621 _dst_val[0].b = -(int)dst.x; 2622 2623 break; 2624 } 2625 case 32: { 2626 2627 2628 2629 2630 const struct int32_vec src0 = { 2631 _src[0][0].i32, 2632 _src[0][1].i32, 2633 0, 2634 0, 2635 }; 2636 2637 const struct int32_vec src1 = { 2638 _src[1][0].i32, 2639 _src[1][1].i32, 2640 0, 2641 0, 2642 }; 2643 2644 struct bool1_vec dst; 2645 2646 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2647 2648 _dst_val[0].b = -(int)dst.x; 2649 2650 break; 2651 } 2652 case 64: { 2653 2654 2655 2656 2657 const struct int64_vec src0 = { 2658 _src[0][0].i64, 2659 _src[0][1].i64, 2660 0, 2661 0, 2662 }; 2663 2664 const struct int64_vec src1 = { 2665 _src[1][0].i64, 2666 _src[1][1].i64, 2667 0, 2668 0, 2669 }; 2670 2671 struct bool1_vec dst; 2672 2673 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)); 2674 2675 _dst_val[0].b = -(int)dst.x; 2676 2677 break; 2678 } 2679 2680 default: 2681 unreachable("unknown bit width"); 2682 } 2683} 2684static void 2685evaluate_ball_iequal3(nir_const_value *_dst_val, 2686 MAYBE_UNUSED unsigned num_components, 2687 unsigned bit_size, 2688 MAYBE_UNUSED nir_const_value **_src) 2689{ 2690 switch (bit_size) { 2691 case 1: { 2692 2693 2694 2695 2696 const struct int1_vec src0 = { 2697 /* 1-bit integers use a 0/-1 convention */ 2698 -(int1_t)_src[0][0].b, 2699 /* 1-bit integers use a 0/-1 convention */ 2700 -(int1_t)_src[0][1].b, 2701 /* 1-bit integers use a 0/-1 convention */ 2702 -(int1_t)_src[0][2].b, 2703 0, 2704 }; 2705 2706 const struct int1_vec src1 = { 2707 /* 1-bit integers use a 0/-1 convention */ 2708 -(int1_t)_src[1][0].b, 2709 /* 1-bit integers use a 0/-1 convention */ 2710 -(int1_t)_src[1][1].b, 2711 /* 1-bit integers use a 0/-1 convention */ 2712 -(int1_t)_src[1][2].b, 2713 0, 2714 }; 2715 2716 struct bool1_vec dst; 2717 2718 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2719 2720 _dst_val[0].b = -(int)dst.x; 2721 2722 break; 2723 } 2724 case 8: { 2725 2726 2727 2728 2729 const struct int8_vec src0 = { 2730 _src[0][0].i8, 2731 _src[0][1].i8, 2732 _src[0][2].i8, 2733 0, 2734 }; 2735 2736 const struct int8_vec src1 = { 2737 _src[1][0].i8, 2738 _src[1][1].i8, 2739 _src[1][2].i8, 2740 0, 2741 }; 2742 2743 struct bool1_vec dst; 2744 2745 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2746 2747 _dst_val[0].b = -(int)dst.x; 2748 2749 break; 2750 } 2751 case 16: { 2752 2753 2754 2755 2756 const struct int16_vec src0 = { 2757 _src[0][0].i16, 2758 _src[0][1].i16, 2759 _src[0][2].i16, 2760 0, 2761 }; 2762 2763 const struct int16_vec src1 = { 2764 _src[1][0].i16, 2765 _src[1][1].i16, 2766 _src[1][2].i16, 2767 0, 2768 }; 2769 2770 struct bool1_vec dst; 2771 2772 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2773 2774 _dst_val[0].b = -(int)dst.x; 2775 2776 break; 2777 } 2778 case 32: { 2779 2780 2781 2782 2783 const struct int32_vec src0 = { 2784 _src[0][0].i32, 2785 _src[0][1].i32, 2786 _src[0][2].i32, 2787 0, 2788 }; 2789 2790 const struct int32_vec src1 = { 2791 _src[1][0].i32, 2792 _src[1][1].i32, 2793 _src[1][2].i32, 2794 0, 2795 }; 2796 2797 struct bool1_vec dst; 2798 2799 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2800 2801 _dst_val[0].b = -(int)dst.x; 2802 2803 break; 2804 } 2805 case 64: { 2806 2807 2808 2809 2810 const struct int64_vec src0 = { 2811 _src[0][0].i64, 2812 _src[0][1].i64, 2813 _src[0][2].i64, 2814 0, 2815 }; 2816 2817 const struct int64_vec src1 = { 2818 _src[1][0].i64, 2819 _src[1][1].i64, 2820 _src[1][2].i64, 2821 0, 2822 }; 2823 2824 struct bool1_vec dst; 2825 2826 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)); 2827 2828 _dst_val[0].b = -(int)dst.x; 2829 2830 break; 2831 } 2832 2833 default: 2834 unreachable("unknown bit width"); 2835 } 2836} 2837static void 2838evaluate_ball_iequal4(nir_const_value *_dst_val, 2839 MAYBE_UNUSED unsigned num_components, 2840 unsigned bit_size, 2841 MAYBE_UNUSED nir_const_value **_src) 2842{ 2843 switch (bit_size) { 2844 case 1: { 2845 2846 2847 2848 2849 const struct int1_vec src0 = { 2850 /* 1-bit integers use a 0/-1 convention */ 2851 -(int1_t)_src[0][0].b, 2852 /* 1-bit integers use a 0/-1 convention */ 2853 -(int1_t)_src[0][1].b, 2854 /* 1-bit integers use a 0/-1 convention */ 2855 -(int1_t)_src[0][2].b, 2856 /* 1-bit integers use a 0/-1 convention */ 2857 -(int1_t)_src[0][3].b, 2858 }; 2859 2860 const struct int1_vec src1 = { 2861 /* 1-bit integers use a 0/-1 convention */ 2862 -(int1_t)_src[1][0].b, 2863 /* 1-bit integers use a 0/-1 convention */ 2864 -(int1_t)_src[1][1].b, 2865 /* 1-bit integers use a 0/-1 convention */ 2866 -(int1_t)_src[1][2].b, 2867 /* 1-bit integers use a 0/-1 convention */ 2868 -(int1_t)_src[1][3].b, 2869 }; 2870 2871 struct bool1_vec dst; 2872 2873 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2874 2875 _dst_val[0].b = -(int)dst.x; 2876 2877 break; 2878 } 2879 case 8: { 2880 2881 2882 2883 2884 const struct int8_vec src0 = { 2885 _src[0][0].i8, 2886 _src[0][1].i8, 2887 _src[0][2].i8, 2888 _src[0][3].i8, 2889 }; 2890 2891 const struct int8_vec src1 = { 2892 _src[1][0].i8, 2893 _src[1][1].i8, 2894 _src[1][2].i8, 2895 _src[1][3].i8, 2896 }; 2897 2898 struct bool1_vec dst; 2899 2900 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2901 2902 _dst_val[0].b = -(int)dst.x; 2903 2904 break; 2905 } 2906 case 16: { 2907 2908 2909 2910 2911 const struct int16_vec src0 = { 2912 _src[0][0].i16, 2913 _src[0][1].i16, 2914 _src[0][2].i16, 2915 _src[0][3].i16, 2916 }; 2917 2918 const struct int16_vec src1 = { 2919 _src[1][0].i16, 2920 _src[1][1].i16, 2921 _src[1][2].i16, 2922 _src[1][3].i16, 2923 }; 2924 2925 struct bool1_vec dst; 2926 2927 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2928 2929 _dst_val[0].b = -(int)dst.x; 2930 2931 break; 2932 } 2933 case 32: { 2934 2935 2936 2937 2938 const struct int32_vec src0 = { 2939 _src[0][0].i32, 2940 _src[0][1].i32, 2941 _src[0][2].i32, 2942 _src[0][3].i32, 2943 }; 2944 2945 const struct int32_vec src1 = { 2946 _src[1][0].i32, 2947 _src[1][1].i32, 2948 _src[1][2].i32, 2949 _src[1][3].i32, 2950 }; 2951 2952 struct bool1_vec dst; 2953 2954 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2955 2956 _dst_val[0].b = -(int)dst.x; 2957 2958 break; 2959 } 2960 case 64: { 2961 2962 2963 2964 2965 const struct int64_vec src0 = { 2966 _src[0][0].i64, 2967 _src[0][1].i64, 2968 _src[0][2].i64, 2969 _src[0][3].i64, 2970 }; 2971 2972 const struct int64_vec src1 = { 2973 _src[1][0].i64, 2974 _src[1][1].i64, 2975 _src[1][2].i64, 2976 _src[1][3].i64, 2977 }; 2978 2979 struct bool1_vec dst; 2980 2981 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)); 2982 2983 _dst_val[0].b = -(int)dst.x; 2984 2985 break; 2986 } 2987 2988 default: 2989 unreachable("unknown bit width"); 2990 } 2991} 2992static void 2993evaluate_bany_fnequal2(nir_const_value *_dst_val, 2994 MAYBE_UNUSED unsigned num_components, 2995 unsigned bit_size, 2996 MAYBE_UNUSED nir_const_value **_src) 2997{ 2998 switch (bit_size) { 2999 case 16: { 3000 3001 3002 3003 3004 const struct float16_vec src0 = { 3005 _mesa_half_to_float(_src[0][0].u16), 3006 _mesa_half_to_float(_src[0][1].u16), 3007 0, 3008 0, 3009 }; 3010 3011 const struct float16_vec src1 = { 3012 _mesa_half_to_float(_src[1][0].u16), 3013 _mesa_half_to_float(_src[1][1].u16), 3014 0, 3015 0, 3016 }; 3017 3018 struct bool1_vec dst; 3019 3020 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3021 3022 _dst_val[0].b = -(int)dst.x; 3023 3024 break; 3025 } 3026 case 32: { 3027 3028 3029 3030 3031 const struct float32_vec src0 = { 3032 _src[0][0].f32, 3033 _src[0][1].f32, 3034 0, 3035 0, 3036 }; 3037 3038 const struct float32_vec src1 = { 3039 _src[1][0].f32, 3040 _src[1][1].f32, 3041 0, 3042 0, 3043 }; 3044 3045 struct bool1_vec dst; 3046 3047 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3048 3049 _dst_val[0].b = -(int)dst.x; 3050 3051 break; 3052 } 3053 case 64: { 3054 3055 3056 3057 3058 const struct float64_vec src0 = { 3059 _src[0][0].f64, 3060 _src[0][1].f64, 3061 0, 3062 0, 3063 }; 3064 3065 const struct float64_vec src1 = { 3066 _src[1][0].f64, 3067 _src[1][1].f64, 3068 0, 3069 0, 3070 }; 3071 3072 struct bool1_vec dst; 3073 3074 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3075 3076 _dst_val[0].b = -(int)dst.x; 3077 3078 break; 3079 } 3080 3081 default: 3082 unreachable("unknown bit width"); 3083 } 3084} 3085static void 3086evaluate_bany_fnequal3(nir_const_value *_dst_val, 3087 MAYBE_UNUSED unsigned num_components, 3088 unsigned bit_size, 3089 MAYBE_UNUSED nir_const_value **_src) 3090{ 3091 switch (bit_size) { 3092 case 16: { 3093 3094 3095 3096 3097 const struct float16_vec src0 = { 3098 _mesa_half_to_float(_src[0][0].u16), 3099 _mesa_half_to_float(_src[0][1].u16), 3100 _mesa_half_to_float(_src[0][2].u16), 3101 0, 3102 }; 3103 3104 const struct float16_vec src1 = { 3105 _mesa_half_to_float(_src[1][0].u16), 3106 _mesa_half_to_float(_src[1][1].u16), 3107 _mesa_half_to_float(_src[1][2].u16), 3108 0, 3109 }; 3110 3111 struct bool1_vec dst; 3112 3113 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3114 3115 _dst_val[0].b = -(int)dst.x; 3116 3117 break; 3118 } 3119 case 32: { 3120 3121 3122 3123 3124 const struct float32_vec src0 = { 3125 _src[0][0].f32, 3126 _src[0][1].f32, 3127 _src[0][2].f32, 3128 0, 3129 }; 3130 3131 const struct float32_vec src1 = { 3132 _src[1][0].f32, 3133 _src[1][1].f32, 3134 _src[1][2].f32, 3135 0, 3136 }; 3137 3138 struct bool1_vec dst; 3139 3140 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3141 3142 _dst_val[0].b = -(int)dst.x; 3143 3144 break; 3145 } 3146 case 64: { 3147 3148 3149 3150 3151 const struct float64_vec src0 = { 3152 _src[0][0].f64, 3153 _src[0][1].f64, 3154 _src[0][2].f64, 3155 0, 3156 }; 3157 3158 const struct float64_vec src1 = { 3159 _src[1][0].f64, 3160 _src[1][1].f64, 3161 _src[1][2].f64, 3162 0, 3163 }; 3164 3165 struct bool1_vec dst; 3166 3167 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3168 3169 _dst_val[0].b = -(int)dst.x; 3170 3171 break; 3172 } 3173 3174 default: 3175 unreachable("unknown bit width"); 3176 } 3177} 3178static void 3179evaluate_bany_fnequal4(nir_const_value *_dst_val, 3180 MAYBE_UNUSED unsigned num_components, 3181 unsigned bit_size, 3182 MAYBE_UNUSED nir_const_value **_src) 3183{ 3184 switch (bit_size) { 3185 case 16: { 3186 3187 3188 3189 3190 const struct float16_vec src0 = { 3191 _mesa_half_to_float(_src[0][0].u16), 3192 _mesa_half_to_float(_src[0][1].u16), 3193 _mesa_half_to_float(_src[0][2].u16), 3194 _mesa_half_to_float(_src[0][3].u16), 3195 }; 3196 3197 const struct float16_vec src1 = { 3198 _mesa_half_to_float(_src[1][0].u16), 3199 _mesa_half_to_float(_src[1][1].u16), 3200 _mesa_half_to_float(_src[1][2].u16), 3201 _mesa_half_to_float(_src[1][3].u16), 3202 }; 3203 3204 struct bool1_vec dst; 3205 3206 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3207 3208 _dst_val[0].b = -(int)dst.x; 3209 3210 break; 3211 } 3212 case 32: { 3213 3214 3215 3216 3217 const struct float32_vec src0 = { 3218 _src[0][0].f32, 3219 _src[0][1].f32, 3220 _src[0][2].f32, 3221 _src[0][3].f32, 3222 }; 3223 3224 const struct float32_vec src1 = { 3225 _src[1][0].f32, 3226 _src[1][1].f32, 3227 _src[1][2].f32, 3228 _src[1][3].f32, 3229 }; 3230 3231 struct bool1_vec dst; 3232 3233 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3234 3235 _dst_val[0].b = -(int)dst.x; 3236 3237 break; 3238 } 3239 case 64: { 3240 3241 3242 3243 3244 const struct float64_vec src0 = { 3245 _src[0][0].f64, 3246 _src[0][1].f64, 3247 _src[0][2].f64, 3248 _src[0][3].f64, 3249 }; 3250 3251 const struct float64_vec src1 = { 3252 _src[1][0].f64, 3253 _src[1][1].f64, 3254 _src[1][2].f64, 3255 _src[1][3].f64, 3256 }; 3257 3258 struct bool1_vec dst; 3259 3260 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3261 3262 _dst_val[0].b = -(int)dst.x; 3263 3264 break; 3265 } 3266 3267 default: 3268 unreachable("unknown bit width"); 3269 } 3270} 3271static void 3272evaluate_bany_inequal2(nir_const_value *_dst_val, 3273 MAYBE_UNUSED unsigned num_components, 3274 unsigned bit_size, 3275 MAYBE_UNUSED nir_const_value **_src) 3276{ 3277 switch (bit_size) { 3278 case 1: { 3279 3280 3281 3282 3283 const struct int1_vec src0 = { 3284 /* 1-bit integers use a 0/-1 convention */ 3285 -(int1_t)_src[0][0].b, 3286 /* 1-bit integers use a 0/-1 convention */ 3287 -(int1_t)_src[0][1].b, 3288 0, 3289 0, 3290 }; 3291 3292 const struct int1_vec src1 = { 3293 /* 1-bit integers use a 0/-1 convention */ 3294 -(int1_t)_src[1][0].b, 3295 /* 1-bit integers use a 0/-1 convention */ 3296 -(int1_t)_src[1][1].b, 3297 0, 3298 0, 3299 }; 3300 3301 struct bool1_vec dst; 3302 3303 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3304 3305 _dst_val[0].b = -(int)dst.x; 3306 3307 break; 3308 } 3309 case 8: { 3310 3311 3312 3313 3314 const struct int8_vec src0 = { 3315 _src[0][0].i8, 3316 _src[0][1].i8, 3317 0, 3318 0, 3319 }; 3320 3321 const struct int8_vec src1 = { 3322 _src[1][0].i8, 3323 _src[1][1].i8, 3324 0, 3325 0, 3326 }; 3327 3328 struct bool1_vec dst; 3329 3330 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3331 3332 _dst_val[0].b = -(int)dst.x; 3333 3334 break; 3335 } 3336 case 16: { 3337 3338 3339 3340 3341 const struct int16_vec src0 = { 3342 _src[0][0].i16, 3343 _src[0][1].i16, 3344 0, 3345 0, 3346 }; 3347 3348 const struct int16_vec src1 = { 3349 _src[1][0].i16, 3350 _src[1][1].i16, 3351 0, 3352 0, 3353 }; 3354 3355 struct bool1_vec dst; 3356 3357 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3358 3359 _dst_val[0].b = -(int)dst.x; 3360 3361 break; 3362 } 3363 case 32: { 3364 3365 3366 3367 3368 const struct int32_vec src0 = { 3369 _src[0][0].i32, 3370 _src[0][1].i32, 3371 0, 3372 0, 3373 }; 3374 3375 const struct int32_vec src1 = { 3376 _src[1][0].i32, 3377 _src[1][1].i32, 3378 0, 3379 0, 3380 }; 3381 3382 struct bool1_vec dst; 3383 3384 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3385 3386 _dst_val[0].b = -(int)dst.x; 3387 3388 break; 3389 } 3390 case 64: { 3391 3392 3393 3394 3395 const struct int64_vec src0 = { 3396 _src[0][0].i64, 3397 _src[0][1].i64, 3398 0, 3399 0, 3400 }; 3401 3402 const struct int64_vec src1 = { 3403 _src[1][0].i64, 3404 _src[1][1].i64, 3405 0, 3406 0, 3407 }; 3408 3409 struct bool1_vec dst; 3410 3411 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)); 3412 3413 _dst_val[0].b = -(int)dst.x; 3414 3415 break; 3416 } 3417 3418 default: 3419 unreachable("unknown bit width"); 3420 } 3421} 3422static void 3423evaluate_bany_inequal3(nir_const_value *_dst_val, 3424 MAYBE_UNUSED unsigned num_components, 3425 unsigned bit_size, 3426 MAYBE_UNUSED nir_const_value **_src) 3427{ 3428 switch (bit_size) { 3429 case 1: { 3430 3431 3432 3433 3434 const struct int1_vec src0 = { 3435 /* 1-bit integers use a 0/-1 convention */ 3436 -(int1_t)_src[0][0].b, 3437 /* 1-bit integers use a 0/-1 convention */ 3438 -(int1_t)_src[0][1].b, 3439 /* 1-bit integers use a 0/-1 convention */ 3440 -(int1_t)_src[0][2].b, 3441 0, 3442 }; 3443 3444 const struct int1_vec src1 = { 3445 /* 1-bit integers use a 0/-1 convention */ 3446 -(int1_t)_src[1][0].b, 3447 /* 1-bit integers use a 0/-1 convention */ 3448 -(int1_t)_src[1][1].b, 3449 /* 1-bit integers use a 0/-1 convention */ 3450 -(int1_t)_src[1][2].b, 3451 0, 3452 }; 3453 3454 struct bool1_vec dst; 3455 3456 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3457 3458 _dst_val[0].b = -(int)dst.x; 3459 3460 break; 3461 } 3462 case 8: { 3463 3464 3465 3466 3467 const struct int8_vec src0 = { 3468 _src[0][0].i8, 3469 _src[0][1].i8, 3470 _src[0][2].i8, 3471 0, 3472 }; 3473 3474 const struct int8_vec src1 = { 3475 _src[1][0].i8, 3476 _src[1][1].i8, 3477 _src[1][2].i8, 3478 0, 3479 }; 3480 3481 struct bool1_vec dst; 3482 3483 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3484 3485 _dst_val[0].b = -(int)dst.x; 3486 3487 break; 3488 } 3489 case 16: { 3490 3491 3492 3493 3494 const struct int16_vec src0 = { 3495 _src[0][0].i16, 3496 _src[0][1].i16, 3497 _src[0][2].i16, 3498 0, 3499 }; 3500 3501 const struct int16_vec src1 = { 3502 _src[1][0].i16, 3503 _src[1][1].i16, 3504 _src[1][2].i16, 3505 0, 3506 }; 3507 3508 struct bool1_vec dst; 3509 3510 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3511 3512 _dst_val[0].b = -(int)dst.x; 3513 3514 break; 3515 } 3516 case 32: { 3517 3518 3519 3520 3521 const struct int32_vec src0 = { 3522 _src[0][0].i32, 3523 _src[0][1].i32, 3524 _src[0][2].i32, 3525 0, 3526 }; 3527 3528 const struct int32_vec src1 = { 3529 _src[1][0].i32, 3530 _src[1][1].i32, 3531 _src[1][2].i32, 3532 0, 3533 }; 3534 3535 struct bool1_vec dst; 3536 3537 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3538 3539 _dst_val[0].b = -(int)dst.x; 3540 3541 break; 3542 } 3543 case 64: { 3544 3545 3546 3547 3548 const struct int64_vec src0 = { 3549 _src[0][0].i64, 3550 _src[0][1].i64, 3551 _src[0][2].i64, 3552 0, 3553 }; 3554 3555 const struct int64_vec src1 = { 3556 _src[1][0].i64, 3557 _src[1][1].i64, 3558 _src[1][2].i64, 3559 0, 3560 }; 3561 3562 struct bool1_vec dst; 3563 3564 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)); 3565 3566 _dst_val[0].b = -(int)dst.x; 3567 3568 break; 3569 } 3570 3571 default: 3572 unreachable("unknown bit width"); 3573 } 3574} 3575static void 3576evaluate_bany_inequal4(nir_const_value *_dst_val, 3577 MAYBE_UNUSED unsigned num_components, 3578 unsigned bit_size, 3579 MAYBE_UNUSED nir_const_value **_src) 3580{ 3581 switch (bit_size) { 3582 case 1: { 3583 3584 3585 3586 3587 const struct int1_vec src0 = { 3588 /* 1-bit integers use a 0/-1 convention */ 3589 -(int1_t)_src[0][0].b, 3590 /* 1-bit integers use a 0/-1 convention */ 3591 -(int1_t)_src[0][1].b, 3592 /* 1-bit integers use a 0/-1 convention */ 3593 -(int1_t)_src[0][2].b, 3594 /* 1-bit integers use a 0/-1 convention */ 3595 -(int1_t)_src[0][3].b, 3596 }; 3597 3598 const struct int1_vec src1 = { 3599 /* 1-bit integers use a 0/-1 convention */ 3600 -(int1_t)_src[1][0].b, 3601 /* 1-bit integers use a 0/-1 convention */ 3602 -(int1_t)_src[1][1].b, 3603 /* 1-bit integers use a 0/-1 convention */ 3604 -(int1_t)_src[1][2].b, 3605 /* 1-bit integers use a 0/-1 convention */ 3606 -(int1_t)_src[1][3].b, 3607 }; 3608 3609 struct bool1_vec dst; 3610 3611 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3612 3613 _dst_val[0].b = -(int)dst.x; 3614 3615 break; 3616 } 3617 case 8: { 3618 3619 3620 3621 3622 const struct int8_vec src0 = { 3623 _src[0][0].i8, 3624 _src[0][1].i8, 3625 _src[0][2].i8, 3626 _src[0][3].i8, 3627 }; 3628 3629 const struct int8_vec src1 = { 3630 _src[1][0].i8, 3631 _src[1][1].i8, 3632 _src[1][2].i8, 3633 _src[1][3].i8, 3634 }; 3635 3636 struct bool1_vec dst; 3637 3638 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3639 3640 _dst_val[0].b = -(int)dst.x; 3641 3642 break; 3643 } 3644 case 16: { 3645 3646 3647 3648 3649 const struct int16_vec src0 = { 3650 _src[0][0].i16, 3651 _src[0][1].i16, 3652 _src[0][2].i16, 3653 _src[0][3].i16, 3654 }; 3655 3656 const struct int16_vec src1 = { 3657 _src[1][0].i16, 3658 _src[1][1].i16, 3659 _src[1][2].i16, 3660 _src[1][3].i16, 3661 }; 3662 3663 struct bool1_vec dst; 3664 3665 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3666 3667 _dst_val[0].b = -(int)dst.x; 3668 3669 break; 3670 } 3671 case 32: { 3672 3673 3674 3675 3676 const struct int32_vec src0 = { 3677 _src[0][0].i32, 3678 _src[0][1].i32, 3679 _src[0][2].i32, 3680 _src[0][3].i32, 3681 }; 3682 3683 const struct int32_vec src1 = { 3684 _src[1][0].i32, 3685 _src[1][1].i32, 3686 _src[1][2].i32, 3687 _src[1][3].i32, 3688 }; 3689 3690 struct bool1_vec dst; 3691 3692 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3693 3694 _dst_val[0].b = -(int)dst.x; 3695 3696 break; 3697 } 3698 case 64: { 3699 3700 3701 3702 3703 const struct int64_vec src0 = { 3704 _src[0][0].i64, 3705 _src[0][1].i64, 3706 _src[0][2].i64, 3707 _src[0][3].i64, 3708 }; 3709 3710 const struct int64_vec src1 = { 3711 _src[1][0].i64, 3712 _src[1][1].i64, 3713 _src[1][2].i64, 3714 _src[1][3].i64, 3715 }; 3716 3717 struct bool1_vec dst; 3718 3719 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)); 3720 3721 _dst_val[0].b = -(int)dst.x; 3722 3723 break; 3724 } 3725 3726 default: 3727 unreachable("unknown bit width"); 3728 } 3729} 3730static void 3731evaluate_bcsel(nir_const_value *_dst_val, 3732 MAYBE_UNUSED unsigned num_components, 3733 unsigned bit_size, 3734 MAYBE_UNUSED nir_const_value **_src) 3735{ 3736 switch (bit_size) { 3737 case 1: { 3738 3739 3740 3741 3742 for (unsigned _i = 0; _i < num_components; _i++) { 3743 const bool1_t src0 = 3744 _src[0][_i].b; 3745 const uint1_t src1 = 3746 _src[1][_i].b; 3747 const uint1_t src2 = 3748 _src[2][_i].b; 3749 3750 uint1_t dst = src0 ? src1 : src2; 3751 3752 /* 1-bit integers get truncated */ 3753 _dst_val[_i].b = dst & 1; 3754 } 3755 3756 break; 3757 } 3758 case 8: { 3759 3760 3761 3762 3763 for (unsigned _i = 0; _i < num_components; _i++) { 3764 const bool1_t src0 = 3765 _src[0][_i].b; 3766 const uint8_t src1 = 3767 _src[1][_i].u8; 3768 const uint8_t src2 = 3769 _src[2][_i].u8; 3770 3771 uint8_t dst = src0 ? src1 : src2; 3772 3773 _dst_val[_i].u8 = dst; 3774 } 3775 3776 break; 3777 } 3778 case 16: { 3779 3780 3781 3782 3783 for (unsigned _i = 0; _i < num_components; _i++) { 3784 const bool1_t src0 = 3785 _src[0][_i].b; 3786 const uint16_t src1 = 3787 _src[1][_i].u16; 3788 const uint16_t src2 = 3789 _src[2][_i].u16; 3790 3791 uint16_t dst = src0 ? src1 : src2; 3792 3793 _dst_val[_i].u16 = dst; 3794 } 3795 3796 break; 3797 } 3798 case 32: { 3799 3800 3801 3802 3803 for (unsigned _i = 0; _i < num_components; _i++) { 3804 const bool1_t src0 = 3805 _src[0][_i].b; 3806 const uint32_t src1 = 3807 _src[1][_i].u32; 3808 const uint32_t src2 = 3809 _src[2][_i].u32; 3810 3811 uint32_t dst = src0 ? src1 : src2; 3812 3813 _dst_val[_i].u32 = dst; 3814 } 3815 3816 break; 3817 } 3818 case 64: { 3819 3820 3821 3822 3823 for (unsigned _i = 0; _i < num_components; _i++) { 3824 const bool1_t src0 = 3825 _src[0][_i].b; 3826 const uint64_t src1 = 3827 _src[1][_i].u64; 3828 const uint64_t src2 = 3829 _src[2][_i].u64; 3830 3831 uint64_t dst = src0 ? src1 : src2; 3832 3833 _dst_val[_i].u64 = dst; 3834 } 3835 3836 break; 3837 } 3838 3839 default: 3840 unreachable("unknown bit width"); 3841 } 3842} 3843static void 3844evaluate_bfi(nir_const_value *_dst_val, 3845 MAYBE_UNUSED unsigned num_components, 3846 UNUSED unsigned bit_size, 3847 MAYBE_UNUSED nir_const_value **_src) 3848{ 3849 3850 3851 3852 3853 for (unsigned _i = 0; _i < num_components; _i++) { 3854 const uint32_t src0 = 3855 _src[0][_i].u32; 3856 const uint32_t src1 = 3857 _src[1][_i].u32; 3858 const uint32_t src2 = 3859 _src[2][_i].u32; 3860 3861 uint32_t dst; 3862 3863 3864unsigned mask = src0, insert = src1, base = src2; 3865if (mask == 0) { 3866 dst = base; 3867} else { 3868 unsigned tmp = mask; 3869 while (!(tmp & 1)) { 3870 tmp >>= 1; 3871 insert <<= 1; 3872 } 3873 dst = (base & ~mask) | (insert & mask); 3874} 3875 3876 3877 _dst_val[_i].u32 = dst; 3878 } 3879 3880} 3881static void 3882evaluate_bfm(nir_const_value *_dst_val, 3883 MAYBE_UNUSED unsigned num_components, 3884 UNUSED unsigned bit_size, 3885 MAYBE_UNUSED nir_const_value **_src) 3886{ 3887 3888 3889 3890 3891 for (unsigned _i = 0; _i < num_components; _i++) { 3892 const int32_t src0 = 3893 _src[0][_i].i32; 3894 const int32_t src1 = 3895 _src[1][_i].i32; 3896 3897 uint32_t dst; 3898 3899 3900int bits = src0, offset = src1; 3901if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32) 3902 dst = 0; /* undefined */ 3903else 3904 dst = ((1u << bits) - 1) << offset; 3905 3906 3907 _dst_val[_i].u32 = dst; 3908 } 3909 3910} 3911static void 3912evaluate_bit_count(nir_const_value *_dst_val, 3913 MAYBE_UNUSED unsigned num_components, 3914 unsigned bit_size, 3915 MAYBE_UNUSED nir_const_value **_src) 3916{ 3917 switch (bit_size) { 3918 case 1: { 3919 3920 3921 3922 3923 for (unsigned _i = 0; _i < num_components; _i++) { 3924 const uint1_t src0 = 3925 _src[0][_i].b; 3926 3927 uint32_t dst; 3928 3929 3930dst = 0; 3931for (unsigned bit = 0; bit < bit_size; bit++) { 3932 if ((src0 >> bit) & 1) 3933 dst++; 3934} 3935 3936 3937 _dst_val[_i].u32 = dst; 3938 } 3939 3940 break; 3941 } 3942 case 8: { 3943 3944 3945 3946 3947 for (unsigned _i = 0; _i < num_components; _i++) { 3948 const uint8_t src0 = 3949 _src[0][_i].u8; 3950 3951 uint32_t dst; 3952 3953 3954dst = 0; 3955for (unsigned bit = 0; bit < bit_size; bit++) { 3956 if ((src0 >> bit) & 1) 3957 dst++; 3958} 3959 3960 3961 _dst_val[_i].u32 = dst; 3962 } 3963 3964 break; 3965 } 3966 case 16: { 3967 3968 3969 3970 3971 for (unsigned _i = 0; _i < num_components; _i++) { 3972 const uint16_t src0 = 3973 _src[0][_i].u16; 3974 3975 uint32_t dst; 3976 3977 3978dst = 0; 3979for (unsigned bit = 0; bit < bit_size; bit++) { 3980 if ((src0 >> bit) & 1) 3981 dst++; 3982} 3983 3984 3985 _dst_val[_i].u32 = dst; 3986 } 3987 3988 break; 3989 } 3990 case 32: { 3991 3992 3993 3994 3995 for (unsigned _i = 0; _i < num_components; _i++) { 3996 const uint32_t src0 = 3997 _src[0][_i].u32; 3998 3999 uint32_t dst; 4000 4001 4002dst = 0; 4003for (unsigned bit = 0; bit < bit_size; bit++) { 4004 if ((src0 >> bit) & 1) 4005 dst++; 4006} 4007 4008 4009 _dst_val[_i].u32 = dst; 4010 } 4011 4012 break; 4013 } 4014 case 64: { 4015 4016 4017 4018 4019 for (unsigned _i = 0; _i < num_components; _i++) { 4020 const uint64_t src0 = 4021 _src[0][_i].u64; 4022 4023 uint32_t dst; 4024 4025 4026dst = 0; 4027for (unsigned bit = 0; bit < bit_size; bit++) { 4028 if ((src0 >> bit) & 1) 4029 dst++; 4030} 4031 4032 4033 _dst_val[_i].u32 = dst; 4034 } 4035 4036 break; 4037 } 4038 4039 default: 4040 unreachable("unknown bit width"); 4041 } 4042} 4043static void 4044evaluate_bitfield_insert(nir_const_value *_dst_val, 4045 MAYBE_UNUSED unsigned num_components, 4046 UNUSED unsigned bit_size, 4047 MAYBE_UNUSED nir_const_value **_src) 4048{ 4049 4050 4051 4052 4053 for (unsigned _i = 0; _i < num_components; _i++) { 4054 const uint32_t src0 = 4055 _src[0][_i].u32; 4056 const uint32_t src1 = 4057 _src[1][_i].u32; 4058 const int32_t src2 = 4059 _src[2][_i].i32; 4060 const int32_t src3 = 4061 _src[3][_i].i32; 4062 4063 uint32_t dst; 4064 4065 4066unsigned base = src0, insert = src1; 4067int offset = src2, bits = src3; 4068if (bits == 0) { 4069 dst = base; 4070} else if (offset < 0 || bits < 0 || bits + offset > 32) { 4071 dst = 0; 4072} else { 4073 unsigned mask = ((1ull << bits) - 1) << offset; 4074 dst = (base & ~mask) | ((insert << offset) & mask); 4075} 4076 4077 4078 _dst_val[_i].u32 = dst; 4079 } 4080 4081} 4082static void 4083evaluate_bitfield_reverse(nir_const_value *_dst_val, 4084 MAYBE_UNUSED unsigned num_components, 4085 UNUSED unsigned bit_size, 4086 MAYBE_UNUSED nir_const_value **_src) 4087{ 4088 4089 4090 4091 4092 for (unsigned _i = 0; _i < num_components; _i++) { 4093 const uint32_t src0 = 4094 _src[0][_i].u32; 4095 4096 uint32_t dst; 4097 4098 4099/* we're not winning any awards for speed here, but that's ok */ 4100dst = 0; 4101for (unsigned bit = 0; bit < 32; bit++) 4102 dst |= ((src0 >> bit) & 1) << (31 - bit); 4103 4104 4105 _dst_val[_i].u32 = dst; 4106 } 4107 4108} 4109static void 4110evaluate_cube_face_coord(nir_const_value *_dst_val, 4111 MAYBE_UNUSED unsigned num_components, 4112 UNUSED unsigned bit_size, 4113 MAYBE_UNUSED nir_const_value **_src) 4114{ 4115 4116 4117 4118 4119 const struct float32_vec src0 = { 4120 _src[0][0].f32, 4121 _src[0][1].f32, 4122 _src[0][2].f32, 4123 0, 4124 }; 4125 4126 struct float32_vec dst; 4127 4128 4129dst.x = dst.y = 0.0; 4130float absX = fabs(src0.x); 4131float absY = fabs(src0.y); 4132float absZ = fabs(src0.z); 4133 4134float ma = 0.0; 4135if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; } 4136if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; } 4137if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; } 4138 4139if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; } 4140if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; } 4141if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; } 4142if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; } 4143if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; } 4144if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; } 4145 4146dst.x = dst.x / ma + 0.5; 4147dst.y = dst.y / ma + 0.5; 4148 4149 4150 _dst_val[0].f32 = dst.x; 4151 _dst_val[1].f32 = dst.y; 4152 4153} 4154static void 4155evaluate_cube_face_index(nir_const_value *_dst_val, 4156 MAYBE_UNUSED unsigned num_components, 4157 UNUSED unsigned bit_size, 4158 MAYBE_UNUSED nir_const_value **_src) 4159{ 4160 4161 4162 4163 4164 const struct float32_vec src0 = { 4165 _src[0][0].f32, 4166 _src[0][1].f32, 4167 _src[0][2].f32, 4168 0, 4169 }; 4170 4171 struct float32_vec dst; 4172 4173 4174float absX = fabs(src0.x); 4175float absY = fabs(src0.y); 4176float absZ = fabs(src0.z); 4177if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0; 4178if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1; 4179if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2; 4180if (src0.y < 0 && absY >= absX && absY >= absZ) dst.x = 3; 4181if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4; 4182if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5; 4183 4184 4185 _dst_val[0].f32 = dst.x; 4186 4187} 4188static void 4189evaluate_extract_i16(nir_const_value *_dst_val, 4190 MAYBE_UNUSED unsigned num_components, 4191 unsigned bit_size, 4192 MAYBE_UNUSED nir_const_value **_src) 4193{ 4194 switch (bit_size) { 4195 case 1: { 4196 4197 4198 4199 4200 for (unsigned _i = 0; _i < num_components; _i++) { 4201 /* 1-bit integers use a 0/-1 convention */ 4202 const int1_t src0 = -(int1_t)_src[0][_i].b; 4203 /* 1-bit integers use a 0/-1 convention */ 4204 const int1_t src1 = -(int1_t)_src[1][_i].b; 4205 4206 int1_t dst = (int16_t)(src0 >> (src1 * 16)); 4207 4208 /* 1-bit integers get truncated */ 4209 _dst_val[_i].b = dst & 1; 4210 } 4211 4212 break; 4213 } 4214 case 8: { 4215 4216 4217 4218 4219 for (unsigned _i = 0; _i < num_components; _i++) { 4220 const int8_t src0 = 4221 _src[0][_i].i8; 4222 const int8_t src1 = 4223 _src[1][_i].i8; 4224 4225 int8_t dst = (int16_t)(src0 >> (src1 * 16)); 4226 4227 _dst_val[_i].i8 = dst; 4228 } 4229 4230 break; 4231 } 4232 case 16: { 4233 4234 4235 4236 4237 for (unsigned _i = 0; _i < num_components; _i++) { 4238 const int16_t src0 = 4239 _src[0][_i].i16; 4240 const int16_t src1 = 4241 _src[1][_i].i16; 4242 4243 int16_t dst = (int16_t)(src0 >> (src1 * 16)); 4244 4245 _dst_val[_i].i16 = dst; 4246 } 4247 4248 break; 4249 } 4250 case 32: { 4251 4252 4253 4254 4255 for (unsigned _i = 0; _i < num_components; _i++) { 4256 const int32_t src0 = 4257 _src[0][_i].i32; 4258 const int32_t src1 = 4259 _src[1][_i].i32; 4260 4261 int32_t dst = (int16_t)(src0 >> (src1 * 16)); 4262 4263 _dst_val[_i].i32 = dst; 4264 } 4265 4266 break; 4267 } 4268 case 64: { 4269 4270 4271 4272 4273 for (unsigned _i = 0; _i < num_components; _i++) { 4274 const int64_t src0 = 4275 _src[0][_i].i64; 4276 const int64_t src1 = 4277 _src[1][_i].i64; 4278 4279 int64_t dst = (int16_t)(src0 >> (src1 * 16)); 4280 4281 _dst_val[_i].i64 = dst; 4282 } 4283 4284 break; 4285 } 4286 4287 default: 4288 unreachable("unknown bit width"); 4289 } 4290} 4291static void 4292evaluate_extract_i8(nir_const_value *_dst_val, 4293 MAYBE_UNUSED unsigned num_components, 4294 unsigned bit_size, 4295 MAYBE_UNUSED nir_const_value **_src) 4296{ 4297 switch (bit_size) { 4298 case 1: { 4299 4300 4301 4302 4303 for (unsigned _i = 0; _i < num_components; _i++) { 4304 /* 1-bit integers use a 0/-1 convention */ 4305 const int1_t src0 = -(int1_t)_src[0][_i].b; 4306 /* 1-bit integers use a 0/-1 convention */ 4307 const int1_t src1 = -(int1_t)_src[1][_i].b; 4308 4309 int1_t dst = (int8_t)(src0 >> (src1 * 8)); 4310 4311 /* 1-bit integers get truncated */ 4312 _dst_val[_i].b = dst & 1; 4313 } 4314 4315 break; 4316 } 4317 case 8: { 4318 4319 4320 4321 4322 for (unsigned _i = 0; _i < num_components; _i++) { 4323 const int8_t src0 = 4324 _src[0][_i].i8; 4325 const int8_t src1 = 4326 _src[1][_i].i8; 4327 4328 int8_t dst = (int8_t)(src0 >> (src1 * 8)); 4329 4330 _dst_val[_i].i8 = dst; 4331 } 4332 4333 break; 4334 } 4335 case 16: { 4336 4337 4338 4339 4340 for (unsigned _i = 0; _i < num_components; _i++) { 4341 const int16_t src0 = 4342 _src[0][_i].i16; 4343 const int16_t src1 = 4344 _src[1][_i].i16; 4345 4346 int16_t dst = (int8_t)(src0 >> (src1 * 8)); 4347 4348 _dst_val[_i].i16 = dst; 4349 } 4350 4351 break; 4352 } 4353 case 32: { 4354 4355 4356 4357 4358 for (unsigned _i = 0; _i < num_components; _i++) { 4359 const int32_t src0 = 4360 _src[0][_i].i32; 4361 const int32_t src1 = 4362 _src[1][_i].i32; 4363 4364 int32_t dst = (int8_t)(src0 >> (src1 * 8)); 4365 4366 _dst_val[_i].i32 = dst; 4367 } 4368 4369 break; 4370 } 4371 case 64: { 4372 4373 4374 4375 4376 for (unsigned _i = 0; _i < num_components; _i++) { 4377 const int64_t src0 = 4378 _src[0][_i].i64; 4379 const int64_t src1 = 4380 _src[1][_i].i64; 4381 4382 int64_t dst = (int8_t)(src0 >> (src1 * 8)); 4383 4384 _dst_val[_i].i64 = dst; 4385 } 4386 4387 break; 4388 } 4389 4390 default: 4391 unreachable("unknown bit width"); 4392 } 4393} 4394static void 4395evaluate_extract_u16(nir_const_value *_dst_val, 4396 MAYBE_UNUSED unsigned num_components, 4397 unsigned bit_size, 4398 MAYBE_UNUSED nir_const_value **_src) 4399{ 4400 switch (bit_size) { 4401 case 1: { 4402 4403 4404 4405 4406 for (unsigned _i = 0; _i < num_components; _i++) { 4407 const uint1_t src0 = 4408 _src[0][_i].b; 4409 const uint1_t src1 = 4410 _src[1][_i].b; 4411 4412 uint1_t dst = (uint16_t)(src0 >> (src1 * 16)); 4413 4414 /* 1-bit integers get truncated */ 4415 _dst_val[_i].b = dst & 1; 4416 } 4417 4418 break; 4419 } 4420 case 8: { 4421 4422 4423 4424 4425 for (unsigned _i = 0; _i < num_components; _i++) { 4426 const uint8_t src0 = 4427 _src[0][_i].u8; 4428 const uint8_t src1 = 4429 _src[1][_i].u8; 4430 4431 uint8_t dst = (uint16_t)(src0 >> (src1 * 16)); 4432 4433 _dst_val[_i].u8 = dst; 4434 } 4435 4436 break; 4437 } 4438 case 16: { 4439 4440 4441 4442 4443 for (unsigned _i = 0; _i < num_components; _i++) { 4444 const uint16_t src0 = 4445 _src[0][_i].u16; 4446 const uint16_t src1 = 4447 _src[1][_i].u16; 4448 4449 uint16_t dst = (uint16_t)(src0 >> (src1 * 16)); 4450 4451 _dst_val[_i].u16 = dst; 4452 } 4453 4454 break; 4455 } 4456 case 32: { 4457 4458 4459 4460 4461 for (unsigned _i = 0; _i < num_components; _i++) { 4462 const uint32_t src0 = 4463 _src[0][_i].u32; 4464 const uint32_t src1 = 4465 _src[1][_i].u32; 4466 4467 uint32_t dst = (uint16_t)(src0 >> (src1 * 16)); 4468 4469 _dst_val[_i].u32 = dst; 4470 } 4471 4472 break; 4473 } 4474 case 64: { 4475 4476 4477 4478 4479 for (unsigned _i = 0; _i < num_components; _i++) { 4480 const uint64_t src0 = 4481 _src[0][_i].u64; 4482 const uint64_t src1 = 4483 _src[1][_i].u64; 4484 4485 uint64_t dst = (uint16_t)(src0 >> (src1 * 16)); 4486 4487 _dst_val[_i].u64 = dst; 4488 } 4489 4490 break; 4491 } 4492 4493 default: 4494 unreachable("unknown bit width"); 4495 } 4496} 4497static void 4498evaluate_extract_u8(nir_const_value *_dst_val, 4499 MAYBE_UNUSED unsigned num_components, 4500 unsigned bit_size, 4501 MAYBE_UNUSED nir_const_value **_src) 4502{ 4503 switch (bit_size) { 4504 case 1: { 4505 4506 4507 4508 4509 for (unsigned _i = 0; _i < num_components; _i++) { 4510 const uint1_t src0 = 4511 _src[0][_i].b; 4512 const uint1_t src1 = 4513 _src[1][_i].b; 4514 4515 uint1_t dst = (uint8_t)(src0 >> (src1 * 8)); 4516 4517 /* 1-bit integers get truncated */ 4518 _dst_val[_i].b = dst & 1; 4519 } 4520 4521 break; 4522 } 4523 case 8: { 4524 4525 4526 4527 4528 for (unsigned _i = 0; _i < num_components; _i++) { 4529 const uint8_t src0 = 4530 _src[0][_i].u8; 4531 const uint8_t src1 = 4532 _src[1][_i].u8; 4533 4534 uint8_t dst = (uint8_t)(src0 >> (src1 * 8)); 4535 4536 _dst_val[_i].u8 = dst; 4537 } 4538 4539 break; 4540 } 4541 case 16: { 4542 4543 4544 4545 4546 for (unsigned _i = 0; _i < num_components; _i++) { 4547 const uint16_t src0 = 4548 _src[0][_i].u16; 4549 const uint16_t src1 = 4550 _src[1][_i].u16; 4551 4552 uint16_t dst = (uint8_t)(src0 >> (src1 * 8)); 4553 4554 _dst_val[_i].u16 = dst; 4555 } 4556 4557 break; 4558 } 4559 case 32: { 4560 4561 4562 4563 4564 for (unsigned _i = 0; _i < num_components; _i++) { 4565 const uint32_t src0 = 4566 _src[0][_i].u32; 4567 const uint32_t src1 = 4568 _src[1][_i].u32; 4569 4570 uint32_t dst = (uint8_t)(src0 >> (src1 * 8)); 4571 4572 _dst_val[_i].u32 = dst; 4573 } 4574 4575 break; 4576 } 4577 case 64: { 4578 4579 4580 4581 4582 for (unsigned _i = 0; _i < num_components; _i++) { 4583 const uint64_t src0 = 4584 _src[0][_i].u64; 4585 const uint64_t src1 = 4586 _src[1][_i].u64; 4587 4588 uint64_t dst = (uint8_t)(src0 >> (src1 * 8)); 4589 4590 _dst_val[_i].u64 = dst; 4591 } 4592 4593 break; 4594 } 4595 4596 default: 4597 unreachable("unknown bit width"); 4598 } 4599} 4600static void 4601evaluate_f2b1(nir_const_value *_dst_val, 4602 MAYBE_UNUSED unsigned num_components, 4603 unsigned bit_size, 4604 MAYBE_UNUSED nir_const_value **_src) 4605{ 4606 switch (bit_size) { 4607 case 16: { 4608 4609 4610 4611 4612 for (unsigned _i = 0; _i < num_components; _i++) { 4613 const float src0 = 4614 _mesa_half_to_float(_src[0][_i].u16); 4615 4616 bool1_t dst = src0 != 0; 4617 4618 _dst_val[_i].b = -(int)dst; 4619 } 4620 4621 break; 4622 } 4623 case 32: { 4624 4625 4626 4627 4628 for (unsigned _i = 0; _i < num_components; _i++) { 4629 const float32_t src0 = 4630 _src[0][_i].f32; 4631 4632 bool1_t dst = src0 != 0; 4633 4634 _dst_val[_i].b = -(int)dst; 4635 } 4636 4637 break; 4638 } 4639 case 64: { 4640 4641 4642 4643 4644 for (unsigned _i = 0; _i < num_components; _i++) { 4645 const float64_t src0 = 4646 _src[0][_i].f64; 4647 4648 bool1_t dst = src0 != 0; 4649 4650 _dst_val[_i].b = -(int)dst; 4651 } 4652 4653 break; 4654 } 4655 4656 default: 4657 unreachable("unknown bit width"); 4658 } 4659} 4660static void 4661evaluate_f2b32(nir_const_value *_dst_val, 4662 MAYBE_UNUSED unsigned num_components, 4663 unsigned bit_size, 4664 MAYBE_UNUSED nir_const_value **_src) 4665{ 4666 switch (bit_size) { 4667 case 16: { 4668 4669 4670 4671 4672 for (unsigned _i = 0; _i < num_components; _i++) { 4673 const float src0 = 4674 _mesa_half_to_float(_src[0][_i].u16); 4675 4676 bool32_t dst = src0 != 0; 4677 4678 _dst_val[_i].i32 = -(int)dst; 4679 } 4680 4681 break; 4682 } 4683 case 32: { 4684 4685 4686 4687 4688 for (unsigned _i = 0; _i < num_components; _i++) { 4689 const float32_t src0 = 4690 _src[0][_i].f32; 4691 4692 bool32_t dst = src0 != 0; 4693 4694 _dst_val[_i].i32 = -(int)dst; 4695 } 4696 4697 break; 4698 } 4699 case 64: { 4700 4701 4702 4703 4704 for (unsigned _i = 0; _i < num_components; _i++) { 4705 const float64_t src0 = 4706 _src[0][_i].f64; 4707 4708 bool32_t dst = src0 != 0; 4709 4710 _dst_val[_i].i32 = -(int)dst; 4711 } 4712 4713 break; 4714 } 4715 4716 default: 4717 unreachable("unknown bit width"); 4718 } 4719} 4720static void 4721evaluate_f2f16(nir_const_value *_dst_val, 4722 MAYBE_UNUSED unsigned num_components, 4723 unsigned bit_size, 4724 MAYBE_UNUSED nir_const_value **_src) 4725{ 4726 switch (bit_size) { 4727 case 16: { 4728 4729 4730 4731 4732 for (unsigned _i = 0; _i < num_components; _i++) { 4733 const float src0 = 4734 _mesa_half_to_float(_src[0][_i].u16); 4735 4736 float16_t dst = src0; 4737 4738 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4739 } 4740 4741 break; 4742 } 4743 case 32: { 4744 4745 4746 4747 4748 for (unsigned _i = 0; _i < num_components; _i++) { 4749 const float32_t src0 = 4750 _src[0][_i].f32; 4751 4752 float16_t dst = src0; 4753 4754 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4755 } 4756 4757 break; 4758 } 4759 case 64: { 4760 4761 4762 4763 4764 for (unsigned _i = 0; _i < num_components; _i++) { 4765 const float64_t src0 = 4766 _src[0][_i].f64; 4767 4768 float16_t dst = src0; 4769 4770 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4771 } 4772 4773 break; 4774 } 4775 4776 default: 4777 unreachable("unknown bit width"); 4778 } 4779} 4780static void 4781evaluate_f2f16_rtne(nir_const_value *_dst_val, 4782 MAYBE_UNUSED unsigned num_components, 4783 unsigned bit_size, 4784 MAYBE_UNUSED nir_const_value **_src) 4785{ 4786 switch (bit_size) { 4787 case 16: { 4788 4789 4790 4791 4792 for (unsigned _i = 0; _i < num_components; _i++) { 4793 const float src0 = 4794 _mesa_half_to_float(_src[0][_i].u16); 4795 4796 float16_t dst = src0; 4797 4798 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4799 } 4800 4801 break; 4802 } 4803 case 32: { 4804 4805 4806 4807 4808 for (unsigned _i = 0; _i < num_components; _i++) { 4809 const float32_t src0 = 4810 _src[0][_i].f32; 4811 4812 float16_t dst = src0; 4813 4814 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4815 } 4816 4817 break; 4818 } 4819 case 64: { 4820 4821 4822 4823 4824 for (unsigned _i = 0; _i < num_components; _i++) { 4825 const float64_t src0 = 4826 _src[0][_i].f64; 4827 4828 float16_t dst = src0; 4829 4830 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4831 } 4832 4833 break; 4834 } 4835 4836 default: 4837 unreachable("unknown bit width"); 4838 } 4839} 4840static void 4841evaluate_f2f16_rtz(nir_const_value *_dst_val, 4842 MAYBE_UNUSED unsigned num_components, 4843 unsigned bit_size, 4844 MAYBE_UNUSED nir_const_value **_src) 4845{ 4846 switch (bit_size) { 4847 case 16: { 4848 4849 4850 4851 4852 for (unsigned _i = 0; _i < num_components; _i++) { 4853 const float src0 = 4854 _mesa_half_to_float(_src[0][_i].u16); 4855 4856 float16_t dst = src0; 4857 4858 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4859 } 4860 4861 break; 4862 } 4863 case 32: { 4864 4865 4866 4867 4868 for (unsigned _i = 0; _i < num_components; _i++) { 4869 const float32_t src0 = 4870 _src[0][_i].f32; 4871 4872 float16_t dst = src0; 4873 4874 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4875 } 4876 4877 break; 4878 } 4879 case 64: { 4880 4881 4882 4883 4884 for (unsigned _i = 0; _i < num_components; _i++) { 4885 const float64_t src0 = 4886 _src[0][_i].f64; 4887 4888 float16_t dst = src0; 4889 4890 _dst_val[_i].u16 = _mesa_float_to_half(dst); 4891 } 4892 4893 break; 4894 } 4895 4896 default: 4897 unreachable("unknown bit width"); 4898 } 4899} 4900static void 4901evaluate_f2f32(nir_const_value *_dst_val, 4902 MAYBE_UNUSED unsigned num_components, 4903 unsigned bit_size, 4904 MAYBE_UNUSED nir_const_value **_src) 4905{ 4906 switch (bit_size) { 4907 case 16: { 4908 4909 4910 4911 4912 for (unsigned _i = 0; _i < num_components; _i++) { 4913 const float src0 = 4914 _mesa_half_to_float(_src[0][_i].u16); 4915 4916 float32_t dst = src0; 4917 4918 _dst_val[_i].f32 = dst; 4919 } 4920 4921 break; 4922 } 4923 case 32: { 4924 4925 4926 4927 4928 for (unsigned _i = 0; _i < num_components; _i++) { 4929 const float32_t src0 = 4930 _src[0][_i].f32; 4931 4932 float32_t dst = src0; 4933 4934 _dst_val[_i].f32 = dst; 4935 } 4936 4937 break; 4938 } 4939 case 64: { 4940 4941 4942 4943 4944 for (unsigned _i = 0; _i < num_components; _i++) { 4945 const float64_t src0 = 4946 _src[0][_i].f64; 4947 4948 float32_t dst = src0; 4949 4950 _dst_val[_i].f32 = dst; 4951 } 4952 4953 break; 4954 } 4955 4956 default: 4957 unreachable("unknown bit width"); 4958 } 4959} 4960static void 4961evaluate_f2f64(nir_const_value *_dst_val, 4962 MAYBE_UNUSED unsigned num_components, 4963 unsigned bit_size, 4964 MAYBE_UNUSED nir_const_value **_src) 4965{ 4966 switch (bit_size) { 4967 case 16: { 4968 4969 4970 4971 4972 for (unsigned _i = 0; _i < num_components; _i++) { 4973 const float src0 = 4974 _mesa_half_to_float(_src[0][_i].u16); 4975 4976 float64_t dst = src0; 4977 4978 _dst_val[_i].f64 = dst; 4979 } 4980 4981 break; 4982 } 4983 case 32: { 4984 4985 4986 4987 4988 for (unsigned _i = 0; _i < num_components; _i++) { 4989 const float32_t src0 = 4990 _src[0][_i].f32; 4991 4992 float64_t dst = src0; 4993 4994 _dst_val[_i].f64 = dst; 4995 } 4996 4997 break; 4998 } 4999 case 64: { 5000 5001 5002 5003 5004 for (unsigned _i = 0; _i < num_components; _i++) { 5005 const float64_t src0 = 5006 _src[0][_i].f64; 5007 5008 float64_t dst = src0; 5009 5010 _dst_val[_i].f64 = dst; 5011 } 5012 5013 break; 5014 } 5015 5016 default: 5017 unreachable("unknown bit width"); 5018 } 5019} 5020static void 5021evaluate_f2i1(nir_const_value *_dst_val, 5022 MAYBE_UNUSED unsigned num_components, 5023 unsigned bit_size, 5024 MAYBE_UNUSED nir_const_value **_src) 5025{ 5026 switch (bit_size) { 5027 case 16: { 5028 5029 5030 5031 5032 for (unsigned _i = 0; _i < num_components; _i++) { 5033 const float src0 = 5034 _mesa_half_to_float(_src[0][_i].u16); 5035 5036 int1_t dst = src0; 5037 5038 /* 1-bit integers get truncated */ 5039 _dst_val[_i].b = dst & 1; 5040 } 5041 5042 break; 5043 } 5044 case 32: { 5045 5046 5047 5048 5049 for (unsigned _i = 0; _i < num_components; _i++) { 5050 const float32_t src0 = 5051 _src[0][_i].f32; 5052 5053 int1_t dst = src0; 5054 5055 /* 1-bit integers get truncated */ 5056 _dst_val[_i].b = dst & 1; 5057 } 5058 5059 break; 5060 } 5061 case 64: { 5062 5063 5064 5065 5066 for (unsigned _i = 0; _i < num_components; _i++) { 5067 const float64_t src0 = 5068 _src[0][_i].f64; 5069 5070 int1_t dst = src0; 5071 5072 /* 1-bit integers get truncated */ 5073 _dst_val[_i].b = dst & 1; 5074 } 5075 5076 break; 5077 } 5078 5079 default: 5080 unreachable("unknown bit width"); 5081 } 5082} 5083static void 5084evaluate_f2i16(nir_const_value *_dst_val, 5085 MAYBE_UNUSED unsigned num_components, 5086 unsigned bit_size, 5087 MAYBE_UNUSED nir_const_value **_src) 5088{ 5089 switch (bit_size) { 5090 case 16: { 5091 5092 5093 5094 5095 for (unsigned _i = 0; _i < num_components; _i++) { 5096 const float src0 = 5097 _mesa_half_to_float(_src[0][_i].u16); 5098 5099 int16_t dst = src0; 5100 5101 _dst_val[_i].i16 = dst; 5102 } 5103 5104 break; 5105 } 5106 case 32: { 5107 5108 5109 5110 5111 for (unsigned _i = 0; _i < num_components; _i++) { 5112 const float32_t src0 = 5113 _src[0][_i].f32; 5114 5115 int16_t dst = src0; 5116 5117 _dst_val[_i].i16 = dst; 5118 } 5119 5120 break; 5121 } 5122 case 64: { 5123 5124 5125 5126 5127 for (unsigned _i = 0; _i < num_components; _i++) { 5128 const float64_t src0 = 5129 _src[0][_i].f64; 5130 5131 int16_t dst = src0; 5132 5133 _dst_val[_i].i16 = dst; 5134 } 5135 5136 break; 5137 } 5138 5139 default: 5140 unreachable("unknown bit width"); 5141 } 5142} 5143static void 5144evaluate_f2i32(nir_const_value *_dst_val, 5145 MAYBE_UNUSED unsigned num_components, 5146 unsigned bit_size, 5147 MAYBE_UNUSED nir_const_value **_src) 5148{ 5149 switch (bit_size) { 5150 case 16: { 5151 5152 5153 5154 5155 for (unsigned _i = 0; _i < num_components; _i++) { 5156 const float src0 = 5157 _mesa_half_to_float(_src[0][_i].u16); 5158 5159 int32_t dst = src0; 5160 5161 _dst_val[_i].i32 = dst; 5162 } 5163 5164 break; 5165 } 5166 case 32: { 5167 5168 5169 5170 5171 for (unsigned _i = 0; _i < num_components; _i++) { 5172 const float32_t src0 = 5173 _src[0][_i].f32; 5174 5175 int32_t dst = src0; 5176 5177 _dst_val[_i].i32 = dst; 5178 } 5179 5180 break; 5181 } 5182 case 64: { 5183 5184 5185 5186 5187 for (unsigned _i = 0; _i < num_components; _i++) { 5188 const float64_t src0 = 5189 _src[0][_i].f64; 5190 5191 int32_t dst = src0; 5192 5193 _dst_val[_i].i32 = dst; 5194 } 5195 5196 break; 5197 } 5198 5199 default: 5200 unreachable("unknown bit width"); 5201 } 5202} 5203static void 5204evaluate_f2i64(nir_const_value *_dst_val, 5205 MAYBE_UNUSED unsigned num_components, 5206 unsigned bit_size, 5207 MAYBE_UNUSED nir_const_value **_src) 5208{ 5209 switch (bit_size) { 5210 case 16: { 5211 5212 5213 5214 5215 for (unsigned _i = 0; _i < num_components; _i++) { 5216 const float src0 = 5217 _mesa_half_to_float(_src[0][_i].u16); 5218 5219 int64_t dst = src0; 5220 5221 _dst_val[_i].i64 = dst; 5222 } 5223 5224 break; 5225 } 5226 case 32: { 5227 5228 5229 5230 5231 for (unsigned _i = 0; _i < num_components; _i++) { 5232 const float32_t src0 = 5233 _src[0][_i].f32; 5234 5235 int64_t dst = src0; 5236 5237 _dst_val[_i].i64 = dst; 5238 } 5239 5240 break; 5241 } 5242 case 64: { 5243 5244 5245 5246 5247 for (unsigned _i = 0; _i < num_components; _i++) { 5248 const float64_t src0 = 5249 _src[0][_i].f64; 5250 5251 int64_t dst = src0; 5252 5253 _dst_val[_i].i64 = dst; 5254 } 5255 5256 break; 5257 } 5258 5259 default: 5260 unreachable("unknown bit width"); 5261 } 5262} 5263static void 5264evaluate_f2i8(nir_const_value *_dst_val, 5265 MAYBE_UNUSED unsigned num_components, 5266 unsigned bit_size, 5267 MAYBE_UNUSED nir_const_value **_src) 5268{ 5269 switch (bit_size) { 5270 case 16: { 5271 5272 5273 5274 5275 for (unsigned _i = 0; _i < num_components; _i++) { 5276 const float src0 = 5277 _mesa_half_to_float(_src[0][_i].u16); 5278 5279 int8_t dst = src0; 5280 5281 _dst_val[_i].i8 = dst; 5282 } 5283 5284 break; 5285 } 5286 case 32: { 5287 5288 5289 5290 5291 for (unsigned _i = 0; _i < num_components; _i++) { 5292 const float32_t src0 = 5293 _src[0][_i].f32; 5294 5295 int8_t dst = src0; 5296 5297 _dst_val[_i].i8 = dst; 5298 } 5299 5300 break; 5301 } 5302 case 64: { 5303 5304 5305 5306 5307 for (unsigned _i = 0; _i < num_components; _i++) { 5308 const float64_t src0 = 5309 _src[0][_i].f64; 5310 5311 int8_t dst = src0; 5312 5313 _dst_val[_i].i8 = dst; 5314 } 5315 5316 break; 5317 } 5318 5319 default: 5320 unreachable("unknown bit width"); 5321 } 5322} 5323static void 5324evaluate_f2u1(nir_const_value *_dst_val, 5325 MAYBE_UNUSED unsigned num_components, 5326 unsigned bit_size, 5327 MAYBE_UNUSED nir_const_value **_src) 5328{ 5329 switch (bit_size) { 5330 case 16: { 5331 5332 5333 5334 5335 for (unsigned _i = 0; _i < num_components; _i++) { 5336 const float src0 = 5337 _mesa_half_to_float(_src[0][_i].u16); 5338 5339 uint1_t dst = src0; 5340 5341 /* 1-bit integers get truncated */ 5342 _dst_val[_i].b = dst & 1; 5343 } 5344 5345 break; 5346 } 5347 case 32: { 5348 5349 5350 5351 5352 for (unsigned _i = 0; _i < num_components; _i++) { 5353 const float32_t src0 = 5354 _src[0][_i].f32; 5355 5356 uint1_t dst = src0; 5357 5358 /* 1-bit integers get truncated */ 5359 _dst_val[_i].b = dst & 1; 5360 } 5361 5362 break; 5363 } 5364 case 64: { 5365 5366 5367 5368 5369 for (unsigned _i = 0; _i < num_components; _i++) { 5370 const float64_t src0 = 5371 _src[0][_i].f64; 5372 5373 uint1_t dst = src0; 5374 5375 /* 1-bit integers get truncated */ 5376 _dst_val[_i].b = dst & 1; 5377 } 5378 5379 break; 5380 } 5381 5382 default: 5383 unreachable("unknown bit width"); 5384 } 5385} 5386static void 5387evaluate_f2u16(nir_const_value *_dst_val, 5388 MAYBE_UNUSED unsigned num_components, 5389 unsigned bit_size, 5390 MAYBE_UNUSED nir_const_value **_src) 5391{ 5392 switch (bit_size) { 5393 case 16: { 5394 5395 5396 5397 5398 for (unsigned _i = 0; _i < num_components; _i++) { 5399 const float src0 = 5400 _mesa_half_to_float(_src[0][_i].u16); 5401 5402 uint16_t dst = src0; 5403 5404 _dst_val[_i].u16 = dst; 5405 } 5406 5407 break; 5408 } 5409 case 32: { 5410 5411 5412 5413 5414 for (unsigned _i = 0; _i < num_components; _i++) { 5415 const float32_t src0 = 5416 _src[0][_i].f32; 5417 5418 uint16_t dst = src0; 5419 5420 _dst_val[_i].u16 = dst; 5421 } 5422 5423 break; 5424 } 5425 case 64: { 5426 5427 5428 5429 5430 for (unsigned _i = 0; _i < num_components; _i++) { 5431 const float64_t src0 = 5432 _src[0][_i].f64; 5433 5434 uint16_t dst = src0; 5435 5436 _dst_val[_i].u16 = dst; 5437 } 5438 5439 break; 5440 } 5441 5442 default: 5443 unreachable("unknown bit width"); 5444 } 5445} 5446static void 5447evaluate_f2u32(nir_const_value *_dst_val, 5448 MAYBE_UNUSED unsigned num_components, 5449 unsigned bit_size, 5450 MAYBE_UNUSED nir_const_value **_src) 5451{ 5452 switch (bit_size) { 5453 case 16: { 5454 5455 5456 5457 5458 for (unsigned _i = 0; _i < num_components; _i++) { 5459 const float src0 = 5460 _mesa_half_to_float(_src[0][_i].u16); 5461 5462 uint32_t dst = src0; 5463 5464 _dst_val[_i].u32 = dst; 5465 } 5466 5467 break; 5468 } 5469 case 32: { 5470 5471 5472 5473 5474 for (unsigned _i = 0; _i < num_components; _i++) { 5475 const float32_t src0 = 5476 _src[0][_i].f32; 5477 5478 uint32_t dst = src0; 5479 5480 _dst_val[_i].u32 = dst; 5481 } 5482 5483 break; 5484 } 5485 case 64: { 5486 5487 5488 5489 5490 for (unsigned _i = 0; _i < num_components; _i++) { 5491 const float64_t src0 = 5492 _src[0][_i].f64; 5493 5494 uint32_t dst = src0; 5495 5496 _dst_val[_i].u32 = dst; 5497 } 5498 5499 break; 5500 } 5501 5502 default: 5503 unreachable("unknown bit width"); 5504 } 5505} 5506static void 5507evaluate_f2u64(nir_const_value *_dst_val, 5508 MAYBE_UNUSED unsigned num_components, 5509 unsigned bit_size, 5510 MAYBE_UNUSED nir_const_value **_src) 5511{ 5512 switch (bit_size) { 5513 case 16: { 5514 5515 5516 5517 5518 for (unsigned _i = 0; _i < num_components; _i++) { 5519 const float src0 = 5520 _mesa_half_to_float(_src[0][_i].u16); 5521 5522 uint64_t dst = src0; 5523 5524 _dst_val[_i].u64 = dst; 5525 } 5526 5527 break; 5528 } 5529 case 32: { 5530 5531 5532 5533 5534 for (unsigned _i = 0; _i < num_components; _i++) { 5535 const float32_t src0 = 5536 _src[0][_i].f32; 5537 5538 uint64_t dst = src0; 5539 5540 _dst_val[_i].u64 = dst; 5541 } 5542 5543 break; 5544 } 5545 case 64: { 5546 5547 5548 5549 5550 for (unsigned _i = 0; _i < num_components; _i++) { 5551 const float64_t src0 = 5552 _src[0][_i].f64; 5553 5554 uint64_t dst = src0; 5555 5556 _dst_val[_i].u64 = dst; 5557 } 5558 5559 break; 5560 } 5561 5562 default: 5563 unreachable("unknown bit width"); 5564 } 5565} 5566static void 5567evaluate_f2u8(nir_const_value *_dst_val, 5568 MAYBE_UNUSED unsigned num_components, 5569 unsigned bit_size, 5570 MAYBE_UNUSED nir_const_value **_src) 5571{ 5572 switch (bit_size) { 5573 case 16: { 5574 5575 5576 5577 5578 for (unsigned _i = 0; _i < num_components; _i++) { 5579 const float src0 = 5580 _mesa_half_to_float(_src[0][_i].u16); 5581 5582 uint8_t dst = src0; 5583 5584 _dst_val[_i].u8 = dst; 5585 } 5586 5587 break; 5588 } 5589 case 32: { 5590 5591 5592 5593 5594 for (unsigned _i = 0; _i < num_components; _i++) { 5595 const float32_t src0 = 5596 _src[0][_i].f32; 5597 5598 uint8_t dst = src0; 5599 5600 _dst_val[_i].u8 = dst; 5601 } 5602 5603 break; 5604 } 5605 case 64: { 5606 5607 5608 5609 5610 for (unsigned _i = 0; _i < num_components; _i++) { 5611 const float64_t src0 = 5612 _src[0][_i].f64; 5613 5614 uint8_t dst = src0; 5615 5616 _dst_val[_i].u8 = dst; 5617 } 5618 5619 break; 5620 } 5621 5622 default: 5623 unreachable("unknown bit width"); 5624 } 5625} 5626static void 5627evaluate_fabs(nir_const_value *_dst_val, 5628 MAYBE_UNUSED unsigned num_components, 5629 unsigned bit_size, 5630 MAYBE_UNUSED nir_const_value **_src) 5631{ 5632 switch (bit_size) { 5633 case 16: { 5634 5635 5636 5637 5638 for (unsigned _i = 0; _i < num_components; _i++) { 5639 const float src0 = 5640 _mesa_half_to_float(_src[0][_i].u16); 5641 5642 float16_t dst = fabs(src0); 5643 5644 _dst_val[_i].u16 = _mesa_float_to_half(dst); 5645 } 5646 5647 break; 5648 } 5649 case 32: { 5650 5651 5652 5653 5654 for (unsigned _i = 0; _i < num_components; _i++) { 5655 const float32_t src0 = 5656 _src[0][_i].f32; 5657 5658 float32_t dst = fabs(src0); 5659 5660 _dst_val[_i].f32 = dst; 5661 } 5662 5663 break; 5664 } 5665 case 64: { 5666 5667 5668 5669 5670 for (unsigned _i = 0; _i < num_components; _i++) { 5671 const float64_t src0 = 5672 _src[0][_i].f64; 5673 5674 float64_t dst = fabs(src0); 5675 5676 _dst_val[_i].f64 = dst; 5677 } 5678 5679 break; 5680 } 5681 5682 default: 5683 unreachable("unknown bit width"); 5684 } 5685} 5686static void 5687evaluate_fadd(nir_const_value *_dst_val, 5688 MAYBE_UNUSED unsigned num_components, 5689 unsigned bit_size, 5690 MAYBE_UNUSED nir_const_value **_src) 5691{ 5692 switch (bit_size) { 5693 case 16: { 5694 5695 5696 5697 5698 for (unsigned _i = 0; _i < num_components; _i++) { 5699 const float src0 = 5700 _mesa_half_to_float(_src[0][_i].u16); 5701 const float src1 = 5702 _mesa_half_to_float(_src[1][_i].u16); 5703 5704 float16_t dst = src0 + src1; 5705 5706 _dst_val[_i].u16 = _mesa_float_to_half(dst); 5707 } 5708 5709 break; 5710 } 5711 case 32: { 5712 5713 5714 5715 5716 for (unsigned _i = 0; _i < num_components; _i++) { 5717 const float32_t src0 = 5718 _src[0][_i].f32; 5719 const float32_t src1 = 5720 _src[1][_i].f32; 5721 5722 float32_t dst = src0 + src1; 5723 5724 _dst_val[_i].f32 = dst; 5725 } 5726 5727 break; 5728 } 5729 case 64: { 5730 5731 5732 5733 5734 for (unsigned _i = 0; _i < num_components; _i++) { 5735 const float64_t src0 = 5736 _src[0][_i].f64; 5737 const float64_t src1 = 5738 _src[1][_i].f64; 5739 5740 float64_t dst = src0 + src1; 5741 5742 _dst_val[_i].f64 = dst; 5743 } 5744 5745 break; 5746 } 5747 5748 default: 5749 unreachable("unknown bit width"); 5750 } 5751} 5752static void 5753evaluate_fall_equal2(nir_const_value *_dst_val, 5754 MAYBE_UNUSED unsigned num_components, 5755 UNUSED unsigned bit_size, 5756 MAYBE_UNUSED nir_const_value **_src) 5757{ 5758 5759 5760 5761 5762 const struct float32_vec src0 = { 5763 _src[0][0].f32, 5764 _src[0][1].f32, 5765 0, 5766 0, 5767 }; 5768 5769 const struct float32_vec src1 = { 5770 _src[1][0].f32, 5771 _src[1][1].f32, 5772 0, 5773 0, 5774 }; 5775 5776 struct float32_vec dst; 5777 5778 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y)) ? 1.0f : 0.0f; 5779 5780 _dst_val[0].f32 = dst.x; 5781 5782} 5783static void 5784evaluate_fall_equal3(nir_const_value *_dst_val, 5785 MAYBE_UNUSED unsigned num_components, 5786 UNUSED unsigned bit_size, 5787 MAYBE_UNUSED nir_const_value **_src) 5788{ 5789 5790 5791 5792 5793 const struct float32_vec src0 = { 5794 _src[0][0].f32, 5795 _src[0][1].f32, 5796 _src[0][2].f32, 5797 0, 5798 }; 5799 5800 const struct float32_vec src1 = { 5801 _src[1][0].f32, 5802 _src[1][1].f32, 5803 _src[1][2].f32, 5804 0, 5805 }; 5806 5807 struct float32_vec dst; 5808 5809 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z)) ? 1.0f : 0.0f; 5810 5811 _dst_val[0].f32 = dst.x; 5812 5813} 5814static void 5815evaluate_fall_equal4(nir_const_value *_dst_val, 5816 MAYBE_UNUSED unsigned num_components, 5817 UNUSED unsigned bit_size, 5818 MAYBE_UNUSED nir_const_value **_src) 5819{ 5820 5821 5822 5823 5824 const struct float32_vec src0 = { 5825 _src[0][0].f32, 5826 _src[0][1].f32, 5827 _src[0][2].f32, 5828 _src[0][3].f32, 5829 }; 5830 5831 const struct float32_vec src1 = { 5832 _src[1][0].f32, 5833 _src[1][1].f32, 5834 _src[1][2].f32, 5835 _src[1][3].f32, 5836 }; 5837 5838 struct float32_vec dst; 5839 5840 dst.x = dst.y = dst.z = dst.w = ((src0.x == src1.x) && (src0.y == src1.y) && (src0.z == src1.z) && (src0.w == src1.w)) ? 1.0f : 0.0f; 5841 5842 _dst_val[0].f32 = dst.x; 5843 5844} 5845static void 5846evaluate_fand(nir_const_value *_dst_val, 5847 MAYBE_UNUSED unsigned num_components, 5848 UNUSED unsigned bit_size, 5849 MAYBE_UNUSED nir_const_value **_src) 5850{ 5851 5852 5853 5854 5855 for (unsigned _i = 0; _i < num_components; _i++) { 5856 const float32_t src0 = 5857 _src[0][_i].f32; 5858 const float32_t src1 = 5859 _src[1][_i].f32; 5860 5861 float32_t dst = ((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f; 5862 5863 _dst_val[_i].f32 = dst; 5864 } 5865 5866} 5867static void 5868evaluate_fany_nequal2(nir_const_value *_dst_val, 5869 MAYBE_UNUSED unsigned num_components, 5870 UNUSED unsigned bit_size, 5871 MAYBE_UNUSED nir_const_value **_src) 5872{ 5873 5874 5875 5876 5877 const struct float32_vec src0 = { 5878 _src[0][0].f32, 5879 _src[0][1].f32, 5880 0, 5881 0, 5882 }; 5883 5884 const struct float32_vec src1 = { 5885 _src[1][0].f32, 5886 _src[1][1].f32, 5887 0, 5888 0, 5889 }; 5890 5891 struct float32_vec dst; 5892 5893 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y)) ? 1.0f : 0.0f; 5894 5895 _dst_val[0].f32 = dst.x; 5896 5897} 5898static void 5899evaluate_fany_nequal3(nir_const_value *_dst_val, 5900 MAYBE_UNUSED unsigned num_components, 5901 UNUSED unsigned bit_size, 5902 MAYBE_UNUSED nir_const_value **_src) 5903{ 5904 5905 5906 5907 5908 const struct float32_vec src0 = { 5909 _src[0][0].f32, 5910 _src[0][1].f32, 5911 _src[0][2].f32, 5912 0, 5913 }; 5914 5915 const struct float32_vec src1 = { 5916 _src[1][0].f32, 5917 _src[1][1].f32, 5918 _src[1][2].f32, 5919 0, 5920 }; 5921 5922 struct float32_vec dst; 5923 5924 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z)) ? 1.0f : 0.0f; 5925 5926 _dst_val[0].f32 = dst.x; 5927 5928} 5929static void 5930evaluate_fany_nequal4(nir_const_value *_dst_val, 5931 MAYBE_UNUSED unsigned num_components, 5932 UNUSED unsigned bit_size, 5933 MAYBE_UNUSED nir_const_value **_src) 5934{ 5935 5936 5937 5938 5939 const struct float32_vec src0 = { 5940 _src[0][0].f32, 5941 _src[0][1].f32, 5942 _src[0][2].f32, 5943 _src[0][3].f32, 5944 }; 5945 5946 const struct float32_vec src1 = { 5947 _src[1][0].f32, 5948 _src[1][1].f32, 5949 _src[1][2].f32, 5950 _src[1][3].f32, 5951 }; 5952 5953 struct float32_vec dst; 5954 5955 dst.x = dst.y = dst.z = dst.w = ((src0.x != src1.x) || (src0.y != src1.y) || (src0.z != src1.z) || (src0.w != src1.w)) ? 1.0f : 0.0f; 5956 5957 _dst_val[0].f32 = dst.x; 5958 5959} 5960static void 5961evaluate_fceil(nir_const_value *_dst_val, 5962 MAYBE_UNUSED unsigned num_components, 5963 unsigned bit_size, 5964 MAYBE_UNUSED nir_const_value **_src) 5965{ 5966 switch (bit_size) { 5967 case 16: { 5968 5969 5970 5971 5972 for (unsigned _i = 0; _i < num_components; _i++) { 5973 const float src0 = 5974 _mesa_half_to_float(_src[0][_i].u16); 5975 5976 float16_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); 5977 5978 _dst_val[_i].u16 = _mesa_float_to_half(dst); 5979 } 5980 5981 break; 5982 } 5983 case 32: { 5984 5985 5986 5987 5988 for (unsigned _i = 0; _i < num_components; _i++) { 5989 const float32_t src0 = 5990 _src[0][_i].f32; 5991 5992 float32_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); 5993 5994 _dst_val[_i].f32 = dst; 5995 } 5996 5997 break; 5998 } 5999 case 64: { 6000 6001 6002 6003 6004 for (unsigned _i = 0; _i < num_components; _i++) { 6005 const float64_t src0 = 6006 _src[0][_i].f64; 6007 6008 float64_t dst = bit_size == 64 ? ceil(src0) : ceilf(src0); 6009 6010 _dst_val[_i].f64 = dst; 6011 } 6012 6013 break; 6014 } 6015 6016 default: 6017 unreachable("unknown bit width"); 6018 } 6019} 6020static void 6021evaluate_fcos(nir_const_value *_dst_val, 6022 MAYBE_UNUSED unsigned num_components, 6023 unsigned bit_size, 6024 MAYBE_UNUSED nir_const_value **_src) 6025{ 6026 switch (bit_size) { 6027 case 16: { 6028 6029 6030 6031 6032 for (unsigned _i = 0; _i < num_components; _i++) { 6033 const float src0 = 6034 _mesa_half_to_float(_src[0][_i].u16); 6035 6036 float16_t dst = bit_size == 64 ? cos(src0) : cosf(src0); 6037 6038 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6039 } 6040 6041 break; 6042 } 6043 case 32: { 6044 6045 6046 6047 6048 for (unsigned _i = 0; _i < num_components; _i++) { 6049 const float32_t src0 = 6050 _src[0][_i].f32; 6051 6052 float32_t dst = bit_size == 64 ? cos(src0) : cosf(src0); 6053 6054 _dst_val[_i].f32 = dst; 6055 } 6056 6057 break; 6058 } 6059 case 64: { 6060 6061 6062 6063 6064 for (unsigned _i = 0; _i < num_components; _i++) { 6065 const float64_t src0 = 6066 _src[0][_i].f64; 6067 6068 float64_t dst = bit_size == 64 ? cos(src0) : cosf(src0); 6069 6070 _dst_val[_i].f64 = dst; 6071 } 6072 6073 break; 6074 } 6075 6076 default: 6077 unreachable("unknown bit width"); 6078 } 6079} 6080static void 6081evaluate_fcsel(nir_const_value *_dst_val, 6082 MAYBE_UNUSED unsigned num_components, 6083 UNUSED unsigned bit_size, 6084 MAYBE_UNUSED nir_const_value **_src) 6085{ 6086 6087 6088 6089 6090 for (unsigned _i = 0; _i < num_components; _i++) { 6091 const float32_t src0 = 6092 _src[0][_i].f32; 6093 const float32_t src1 = 6094 _src[1][_i].f32; 6095 const float32_t src2 = 6096 _src[2][_i].f32; 6097 6098 float32_t dst = (src0 != 0.0f) ? src1 : src2; 6099 6100 _dst_val[_i].f32 = dst; 6101 } 6102 6103} 6104static void 6105evaluate_fddx(nir_const_value *_dst_val, 6106 MAYBE_UNUSED unsigned num_components, 6107 unsigned bit_size, 6108 MAYBE_UNUSED nir_const_value **_src) 6109{ 6110 switch (bit_size) { 6111 case 16: { 6112 6113 6114 6115 6116 for (unsigned _i = 0; _i < num_components; _i++) { 6117 6118 float16_t dst = 0.0; 6119 6120 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6121 } 6122 6123 break; 6124 } 6125 case 32: { 6126 6127 6128 6129 6130 for (unsigned _i = 0; _i < num_components; _i++) { 6131 6132 float32_t dst = 0.0; 6133 6134 _dst_val[_i].f32 = dst; 6135 } 6136 6137 break; 6138 } 6139 case 64: { 6140 6141 6142 6143 6144 for (unsigned _i = 0; _i < num_components; _i++) { 6145 6146 float64_t dst = 0.0; 6147 6148 _dst_val[_i].f64 = dst; 6149 } 6150 6151 break; 6152 } 6153 6154 default: 6155 unreachable("unknown bit width"); 6156 } 6157} 6158static void 6159evaluate_fddx_coarse(nir_const_value *_dst_val, 6160 MAYBE_UNUSED unsigned num_components, 6161 unsigned bit_size, 6162 MAYBE_UNUSED nir_const_value **_src) 6163{ 6164 switch (bit_size) { 6165 case 16: { 6166 6167 6168 6169 6170 for (unsigned _i = 0; _i < num_components; _i++) { 6171 6172 float16_t dst = 0.0; 6173 6174 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6175 } 6176 6177 break; 6178 } 6179 case 32: { 6180 6181 6182 6183 6184 for (unsigned _i = 0; _i < num_components; _i++) { 6185 6186 float32_t dst = 0.0; 6187 6188 _dst_val[_i].f32 = dst; 6189 } 6190 6191 break; 6192 } 6193 case 64: { 6194 6195 6196 6197 6198 for (unsigned _i = 0; _i < num_components; _i++) { 6199 6200 float64_t dst = 0.0; 6201 6202 _dst_val[_i].f64 = dst; 6203 } 6204 6205 break; 6206 } 6207 6208 default: 6209 unreachable("unknown bit width"); 6210 } 6211} 6212static void 6213evaluate_fddx_fine(nir_const_value *_dst_val, 6214 MAYBE_UNUSED unsigned num_components, 6215 unsigned bit_size, 6216 MAYBE_UNUSED nir_const_value **_src) 6217{ 6218 switch (bit_size) { 6219 case 16: { 6220 6221 6222 6223 6224 for (unsigned _i = 0; _i < num_components; _i++) { 6225 6226 float16_t dst = 0.0; 6227 6228 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6229 } 6230 6231 break; 6232 } 6233 case 32: { 6234 6235 6236 6237 6238 for (unsigned _i = 0; _i < num_components; _i++) { 6239 6240 float32_t dst = 0.0; 6241 6242 _dst_val[_i].f32 = dst; 6243 } 6244 6245 break; 6246 } 6247 case 64: { 6248 6249 6250 6251 6252 for (unsigned _i = 0; _i < num_components; _i++) { 6253 6254 float64_t dst = 0.0; 6255 6256 _dst_val[_i].f64 = dst; 6257 } 6258 6259 break; 6260 } 6261 6262 default: 6263 unreachable("unknown bit width"); 6264 } 6265} 6266static void 6267evaluate_fddy(nir_const_value *_dst_val, 6268 MAYBE_UNUSED unsigned num_components, 6269 unsigned bit_size, 6270 MAYBE_UNUSED nir_const_value **_src) 6271{ 6272 switch (bit_size) { 6273 case 16: { 6274 6275 6276 6277 6278 for (unsigned _i = 0; _i < num_components; _i++) { 6279 6280 float16_t dst = 0.0; 6281 6282 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6283 } 6284 6285 break; 6286 } 6287 case 32: { 6288 6289 6290 6291 6292 for (unsigned _i = 0; _i < num_components; _i++) { 6293 6294 float32_t dst = 0.0; 6295 6296 _dst_val[_i].f32 = dst; 6297 } 6298 6299 break; 6300 } 6301 case 64: { 6302 6303 6304 6305 6306 for (unsigned _i = 0; _i < num_components; _i++) { 6307 6308 float64_t dst = 0.0; 6309 6310 _dst_val[_i].f64 = dst; 6311 } 6312 6313 break; 6314 } 6315 6316 default: 6317 unreachable("unknown bit width"); 6318 } 6319} 6320static void 6321evaluate_fddy_coarse(nir_const_value *_dst_val, 6322 MAYBE_UNUSED unsigned num_components, 6323 unsigned bit_size, 6324 MAYBE_UNUSED nir_const_value **_src) 6325{ 6326 switch (bit_size) { 6327 case 16: { 6328 6329 6330 6331 6332 for (unsigned _i = 0; _i < num_components; _i++) { 6333 6334 float16_t dst = 0.0; 6335 6336 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6337 } 6338 6339 break; 6340 } 6341 case 32: { 6342 6343 6344 6345 6346 for (unsigned _i = 0; _i < num_components; _i++) { 6347 6348 float32_t dst = 0.0; 6349 6350 _dst_val[_i].f32 = dst; 6351 } 6352 6353 break; 6354 } 6355 case 64: { 6356 6357 6358 6359 6360 for (unsigned _i = 0; _i < num_components; _i++) { 6361 6362 float64_t dst = 0.0; 6363 6364 _dst_val[_i].f64 = dst; 6365 } 6366 6367 break; 6368 } 6369 6370 default: 6371 unreachable("unknown bit width"); 6372 } 6373} 6374static void 6375evaluate_fddy_fine(nir_const_value *_dst_val, 6376 MAYBE_UNUSED unsigned num_components, 6377 unsigned bit_size, 6378 MAYBE_UNUSED nir_const_value **_src) 6379{ 6380 switch (bit_size) { 6381 case 16: { 6382 6383 6384 6385 6386 for (unsigned _i = 0; _i < num_components; _i++) { 6387 6388 float16_t dst = 0.0; 6389 6390 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6391 } 6392 6393 break; 6394 } 6395 case 32: { 6396 6397 6398 6399 6400 for (unsigned _i = 0; _i < num_components; _i++) { 6401 6402 float32_t dst = 0.0; 6403 6404 _dst_val[_i].f32 = dst; 6405 } 6406 6407 break; 6408 } 6409 case 64: { 6410 6411 6412 6413 6414 for (unsigned _i = 0; _i < num_components; _i++) { 6415 6416 float64_t dst = 0.0; 6417 6418 _dst_val[_i].f64 = dst; 6419 } 6420 6421 break; 6422 } 6423 6424 default: 6425 unreachable("unknown bit width"); 6426 } 6427} 6428static void 6429evaluate_fdiv(nir_const_value *_dst_val, 6430 MAYBE_UNUSED unsigned num_components, 6431 unsigned bit_size, 6432 MAYBE_UNUSED nir_const_value **_src) 6433{ 6434 switch (bit_size) { 6435 case 16: { 6436 6437 6438 6439 6440 for (unsigned _i = 0; _i < num_components; _i++) { 6441 const float src0 = 6442 _mesa_half_to_float(_src[0][_i].u16); 6443 const float src1 = 6444 _mesa_half_to_float(_src[1][_i].u16); 6445 6446 float16_t dst = src0 / src1; 6447 6448 _dst_val[_i].u16 = _mesa_float_to_half(dst); 6449 } 6450 6451 break; 6452 } 6453 case 32: { 6454 6455 6456 6457 6458 for (unsigned _i = 0; _i < num_components; _i++) { 6459 const float32_t src0 = 6460 _src[0][_i].f32; 6461 const float32_t src1 = 6462 _src[1][_i].f32; 6463 6464 float32_t dst = src0 / src1; 6465 6466 _dst_val[_i].f32 = dst; 6467 } 6468 6469 break; 6470 } 6471 case 64: { 6472 6473 6474 6475 6476 for (unsigned _i = 0; _i < num_components; _i++) { 6477 const float64_t src0 = 6478 _src[0][_i].f64; 6479 const float64_t src1 = 6480 _src[1][_i].f64; 6481 6482 float64_t dst = src0 / src1; 6483 6484 _dst_val[_i].f64 = dst; 6485 } 6486 6487 break; 6488 } 6489 6490 default: 6491 unreachable("unknown bit width"); 6492 } 6493} 6494static void 6495evaluate_fdot2(nir_const_value *_dst_val, 6496 MAYBE_UNUSED unsigned num_components, 6497 unsigned bit_size, 6498 MAYBE_UNUSED nir_const_value **_src) 6499{ 6500 switch (bit_size) { 6501 case 16: { 6502 6503 6504 6505 6506 const struct float16_vec src0 = { 6507 _mesa_half_to_float(_src[0][0].u16), 6508 _mesa_half_to_float(_src[0][1].u16), 6509 0, 6510 0, 6511 }; 6512 6513 const struct float16_vec src1 = { 6514 _mesa_half_to_float(_src[1][0].u16), 6515 _mesa_half_to_float(_src[1][1].u16), 6516 0, 6517 0, 6518 }; 6519 6520 struct float16_vec dst; 6521 6522 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y)); 6523 6524 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 6525 6526 break; 6527 } 6528 case 32: { 6529 6530 6531 6532 6533 const struct float32_vec src0 = { 6534 _src[0][0].f32, 6535 _src[0][1].f32, 6536 0, 6537 0, 6538 }; 6539 6540 const struct float32_vec src1 = { 6541 _src[1][0].f32, 6542 _src[1][1].f32, 6543 0, 6544 0, 6545 }; 6546 6547 struct float32_vec dst; 6548 6549 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y)); 6550 6551 _dst_val[0].f32 = dst.x; 6552 6553 break; 6554 } 6555 case 64: { 6556 6557 6558 6559 6560 const struct float64_vec src0 = { 6561 _src[0][0].f64, 6562 _src[0][1].f64, 6563 0, 6564 0, 6565 }; 6566 6567 const struct float64_vec src1 = { 6568 _src[1][0].f64, 6569 _src[1][1].f64, 6570 0, 6571 0, 6572 }; 6573 6574 struct float64_vec dst; 6575 6576 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y)); 6577 6578 _dst_val[0].f64 = dst.x; 6579 6580 break; 6581 } 6582 6583 default: 6584 unreachable("unknown bit width"); 6585 } 6586} 6587static void 6588evaluate_fdot3(nir_const_value *_dst_val, 6589 MAYBE_UNUSED unsigned num_components, 6590 unsigned bit_size, 6591 MAYBE_UNUSED nir_const_value **_src) 6592{ 6593 switch (bit_size) { 6594 case 16: { 6595 6596 6597 6598 6599 const struct float16_vec src0 = { 6600 _mesa_half_to_float(_src[0][0].u16), 6601 _mesa_half_to_float(_src[0][1].u16), 6602 _mesa_half_to_float(_src[0][2].u16), 6603 0, 6604 }; 6605 6606 const struct float16_vec src1 = { 6607 _mesa_half_to_float(_src[1][0].u16), 6608 _mesa_half_to_float(_src[1][1].u16), 6609 _mesa_half_to_float(_src[1][2].u16), 6610 0, 6611 }; 6612 6613 struct float16_vec dst; 6614 6615 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z)); 6616 6617 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 6618 6619 break; 6620 } 6621 case 32: { 6622 6623 6624 6625 6626 const struct float32_vec src0 = { 6627 _src[0][0].f32, 6628 _src[0][1].f32, 6629 _src[0][2].f32, 6630 0, 6631 }; 6632 6633 const struct float32_vec src1 = { 6634 _src[1][0].f32, 6635 _src[1][1].f32, 6636 _src[1][2].f32, 6637 0, 6638 }; 6639 6640 struct float32_vec dst; 6641 6642 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z)); 6643 6644 _dst_val[0].f32 = dst.x; 6645 6646 break; 6647 } 6648 case 64: { 6649 6650 6651 6652 6653 const struct float64_vec src0 = { 6654 _src[0][0].f64, 6655 _src[0][1].f64, 6656 _src[0][2].f64, 6657 0, 6658 }; 6659 6660 const struct float64_vec src1 = { 6661 _src[1][0].f64, 6662 _src[1][1].f64, 6663 _src[1][2].f64, 6664 0, 6665 }; 6666 6667 struct float64_vec dst; 6668 6669 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z)); 6670 6671 _dst_val[0].f64 = dst.x; 6672 6673 break; 6674 } 6675 6676 default: 6677 unreachable("unknown bit width"); 6678 } 6679} 6680static void 6681evaluate_fdot4(nir_const_value *_dst_val, 6682 MAYBE_UNUSED unsigned num_components, 6683 unsigned bit_size, 6684 MAYBE_UNUSED nir_const_value **_src) 6685{ 6686 switch (bit_size) { 6687 case 16: { 6688 6689 6690 6691 6692 const struct float16_vec src0 = { 6693 _mesa_half_to_float(_src[0][0].u16), 6694 _mesa_half_to_float(_src[0][1].u16), 6695 _mesa_half_to_float(_src[0][2].u16), 6696 _mesa_half_to_float(_src[0][3].u16), 6697 }; 6698 6699 const struct float16_vec src1 = { 6700 _mesa_half_to_float(_src[1][0].u16), 6701 _mesa_half_to_float(_src[1][1].u16), 6702 _mesa_half_to_float(_src[1][2].u16), 6703 _mesa_half_to_float(_src[1][3].u16), 6704 }; 6705 6706 struct float16_vec dst; 6707 6708 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w)); 6709 6710 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 6711 6712 break; 6713 } 6714 case 32: { 6715 6716 6717 6718 6719 const struct float32_vec src0 = { 6720 _src[0][0].f32, 6721 _src[0][1].f32, 6722 _src[0][2].f32, 6723 _src[0][3].f32, 6724 }; 6725 6726 const struct float32_vec src1 = { 6727 _src[1][0].f32, 6728 _src[1][1].f32, 6729 _src[1][2].f32, 6730 _src[1][3].f32, 6731 }; 6732 6733 struct float32_vec dst; 6734 6735 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w)); 6736 6737 _dst_val[0].f32 = dst.x; 6738 6739 break; 6740 } 6741 case 64: { 6742 6743 6744 6745 6746 const struct float64_vec src0 = { 6747 _src[0][0].f64, 6748 _src[0][1].f64, 6749 _src[0][2].f64, 6750 _src[0][3].f64, 6751 }; 6752 6753 const struct float64_vec src1 = { 6754 _src[1][0].f64, 6755 _src[1][1].f64, 6756 _src[1][2].f64, 6757 _src[1][3].f64, 6758 }; 6759 6760 struct float64_vec dst; 6761 6762 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w)); 6763 6764 _dst_val[0].f64 = dst.x; 6765 6766 break; 6767 } 6768 6769 default: 6770 unreachable("unknown bit width"); 6771 } 6772} 6773static void 6774evaluate_fdot_replicated2(nir_const_value *_dst_val, 6775 MAYBE_UNUSED unsigned num_components, 6776 unsigned bit_size, 6777 MAYBE_UNUSED nir_const_value **_src) 6778{ 6779 switch (bit_size) { 6780 case 16: { 6781 6782 6783 6784 6785 const struct float16_vec src0 = { 6786 _mesa_half_to_float(_src[0][0].u16), 6787 _mesa_half_to_float(_src[0][1].u16), 6788 0, 6789 0, 6790 }; 6791 6792 const struct float16_vec src1 = { 6793 _mesa_half_to_float(_src[1][0].u16), 6794 _mesa_half_to_float(_src[1][1].u16), 6795 0, 6796 0, 6797 }; 6798 6799 struct float16_vec dst; 6800 6801 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y)); 6802 6803 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 6804 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 6805 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 6806 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 6807 6808 break; 6809 } 6810 case 32: { 6811 6812 6813 6814 6815 const struct float32_vec src0 = { 6816 _src[0][0].f32, 6817 _src[0][1].f32, 6818 0, 6819 0, 6820 }; 6821 6822 const struct float32_vec src1 = { 6823 _src[1][0].f32, 6824 _src[1][1].f32, 6825 0, 6826 0, 6827 }; 6828 6829 struct float32_vec dst; 6830 6831 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y)); 6832 6833 _dst_val[0].f32 = dst.x; 6834 _dst_val[1].f32 = dst.y; 6835 _dst_val[2].f32 = dst.z; 6836 _dst_val[3].f32 = dst.w; 6837 6838 break; 6839 } 6840 case 64: { 6841 6842 6843 6844 6845 const struct float64_vec src0 = { 6846 _src[0][0].f64, 6847 _src[0][1].f64, 6848 0, 6849 0, 6850 }; 6851 6852 const struct float64_vec src1 = { 6853 _src[1][0].f64, 6854 _src[1][1].f64, 6855 0, 6856 0, 6857 }; 6858 6859 struct float64_vec dst; 6860 6861 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y)); 6862 6863 _dst_val[0].f64 = dst.x; 6864 _dst_val[1].f64 = dst.y; 6865 _dst_val[2].f64 = dst.z; 6866 _dst_val[3].f64 = dst.w; 6867 6868 break; 6869 } 6870 6871 default: 6872 unreachable("unknown bit width"); 6873 } 6874} 6875static void 6876evaluate_fdot_replicated3(nir_const_value *_dst_val, 6877 MAYBE_UNUSED unsigned num_components, 6878 unsigned bit_size, 6879 MAYBE_UNUSED nir_const_value **_src) 6880{ 6881 switch (bit_size) { 6882 case 16: { 6883 6884 6885 6886 6887 const struct float16_vec src0 = { 6888 _mesa_half_to_float(_src[0][0].u16), 6889 _mesa_half_to_float(_src[0][1].u16), 6890 _mesa_half_to_float(_src[0][2].u16), 6891 0, 6892 }; 6893 6894 const struct float16_vec src1 = { 6895 _mesa_half_to_float(_src[1][0].u16), 6896 _mesa_half_to_float(_src[1][1].u16), 6897 _mesa_half_to_float(_src[1][2].u16), 6898 0, 6899 }; 6900 6901 struct float16_vec dst; 6902 6903 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z)); 6904 6905 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 6906 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 6907 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 6908 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 6909 6910 break; 6911 } 6912 case 32: { 6913 6914 6915 6916 6917 const struct float32_vec src0 = { 6918 _src[0][0].f32, 6919 _src[0][1].f32, 6920 _src[0][2].f32, 6921 0, 6922 }; 6923 6924 const struct float32_vec src1 = { 6925 _src[1][0].f32, 6926 _src[1][1].f32, 6927 _src[1][2].f32, 6928 0, 6929 }; 6930 6931 struct float32_vec dst; 6932 6933 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z)); 6934 6935 _dst_val[0].f32 = dst.x; 6936 _dst_val[1].f32 = dst.y; 6937 _dst_val[2].f32 = dst.z; 6938 _dst_val[3].f32 = dst.w; 6939 6940 break; 6941 } 6942 case 64: { 6943 6944 6945 6946 6947 const struct float64_vec src0 = { 6948 _src[0][0].f64, 6949 _src[0][1].f64, 6950 _src[0][2].f64, 6951 0, 6952 }; 6953 6954 const struct float64_vec src1 = { 6955 _src[1][0].f64, 6956 _src[1][1].f64, 6957 _src[1][2].f64, 6958 0, 6959 }; 6960 6961 struct float64_vec dst; 6962 6963 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z)); 6964 6965 _dst_val[0].f64 = dst.x; 6966 _dst_val[1].f64 = dst.y; 6967 _dst_val[2].f64 = dst.z; 6968 _dst_val[3].f64 = dst.w; 6969 6970 break; 6971 } 6972 6973 default: 6974 unreachable("unknown bit width"); 6975 } 6976} 6977static void 6978evaluate_fdot_replicated4(nir_const_value *_dst_val, 6979 MAYBE_UNUSED unsigned num_components, 6980 unsigned bit_size, 6981 MAYBE_UNUSED nir_const_value **_src) 6982{ 6983 switch (bit_size) { 6984 case 16: { 6985 6986 6987 6988 6989 const struct float16_vec src0 = { 6990 _mesa_half_to_float(_src[0][0].u16), 6991 _mesa_half_to_float(_src[0][1].u16), 6992 _mesa_half_to_float(_src[0][2].u16), 6993 _mesa_half_to_float(_src[0][3].u16), 6994 }; 6995 6996 const struct float16_vec src1 = { 6997 _mesa_half_to_float(_src[1][0].u16), 6998 _mesa_half_to_float(_src[1][1].u16), 6999 _mesa_half_to_float(_src[1][2].u16), 7000 _mesa_half_to_float(_src[1][3].u16), 7001 }; 7002 7003 struct float16_vec dst; 7004 7005 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w)); 7006 7007 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 7008 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 7009 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 7010 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 7011 7012 break; 7013 } 7014 case 32: { 7015 7016 7017 7018 7019 const struct float32_vec src0 = { 7020 _src[0][0].f32, 7021 _src[0][1].f32, 7022 _src[0][2].f32, 7023 _src[0][3].f32, 7024 }; 7025 7026 const struct float32_vec src1 = { 7027 _src[1][0].f32, 7028 _src[1][1].f32, 7029 _src[1][2].f32, 7030 _src[1][3].f32, 7031 }; 7032 7033 struct float32_vec dst; 7034 7035 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w)); 7036 7037 _dst_val[0].f32 = dst.x; 7038 _dst_val[1].f32 = dst.y; 7039 _dst_val[2].f32 = dst.z; 7040 _dst_val[3].f32 = dst.w; 7041 7042 break; 7043 } 7044 case 64: { 7045 7046 7047 7048 7049 const struct float64_vec src0 = { 7050 _src[0][0].f64, 7051 _src[0][1].f64, 7052 _src[0][2].f64, 7053 _src[0][3].f64, 7054 }; 7055 7056 const struct float64_vec src1 = { 7057 _src[1][0].f64, 7058 _src[1][1].f64, 7059 _src[1][2].f64, 7060 _src[1][3].f64, 7061 }; 7062 7063 struct float64_vec dst; 7064 7065 dst.x = dst.y = dst.z = dst.w = ((src0.x * src1.x) + (src0.y * src1.y) + (src0.z * src1.z) + (src0.w * src1.w)); 7066 7067 _dst_val[0].f64 = dst.x; 7068 _dst_val[1].f64 = dst.y; 7069 _dst_val[2].f64 = dst.z; 7070 _dst_val[3].f64 = dst.w; 7071 7072 break; 7073 } 7074 7075 default: 7076 unreachable("unknown bit width"); 7077 } 7078} 7079static void 7080evaluate_fdph(nir_const_value *_dst_val, 7081 MAYBE_UNUSED unsigned num_components, 7082 unsigned bit_size, 7083 MAYBE_UNUSED nir_const_value **_src) 7084{ 7085 switch (bit_size) { 7086 case 16: { 7087 7088 7089 7090 7091 const struct float16_vec src0 = { 7092 _mesa_half_to_float(_src[0][0].u16), 7093 _mesa_half_to_float(_src[0][1].u16), 7094 _mesa_half_to_float(_src[0][2].u16), 7095 0, 7096 }; 7097 7098 const struct float16_vec src1 = { 7099 _mesa_half_to_float(_src[1][0].u16), 7100 _mesa_half_to_float(_src[1][1].u16), 7101 _mesa_half_to_float(_src[1][2].u16), 7102 _mesa_half_to_float(_src[1][3].u16), 7103 }; 7104 7105 struct float16_vec dst; 7106 7107 dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; 7108 7109 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 7110 7111 break; 7112 } 7113 case 32: { 7114 7115 7116 7117 7118 const struct float32_vec src0 = { 7119 _src[0][0].f32, 7120 _src[0][1].f32, 7121 _src[0][2].f32, 7122 0, 7123 }; 7124 7125 const struct float32_vec src1 = { 7126 _src[1][0].f32, 7127 _src[1][1].f32, 7128 _src[1][2].f32, 7129 _src[1][3].f32, 7130 }; 7131 7132 struct float32_vec dst; 7133 7134 dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; 7135 7136 _dst_val[0].f32 = dst.x; 7137 7138 break; 7139 } 7140 case 64: { 7141 7142 7143 7144 7145 const struct float64_vec src0 = { 7146 _src[0][0].f64, 7147 _src[0][1].f64, 7148 _src[0][2].f64, 7149 0, 7150 }; 7151 7152 const struct float64_vec src1 = { 7153 _src[1][0].f64, 7154 _src[1][1].f64, 7155 _src[1][2].f64, 7156 _src[1][3].f64, 7157 }; 7158 7159 struct float64_vec dst; 7160 7161 dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; 7162 7163 _dst_val[0].f64 = dst.x; 7164 7165 break; 7166 } 7167 7168 default: 7169 unreachable("unknown bit width"); 7170 } 7171} 7172static void 7173evaluate_fdph_replicated(nir_const_value *_dst_val, 7174 MAYBE_UNUSED unsigned num_components, 7175 unsigned bit_size, 7176 MAYBE_UNUSED nir_const_value **_src) 7177{ 7178 switch (bit_size) { 7179 case 16: { 7180 7181 7182 7183 7184 const struct float16_vec src0 = { 7185 _mesa_half_to_float(_src[0][0].u16), 7186 _mesa_half_to_float(_src[0][1].u16), 7187 _mesa_half_to_float(_src[0][2].u16), 7188 0, 7189 }; 7190 7191 const struct float16_vec src1 = { 7192 _mesa_half_to_float(_src[1][0].u16), 7193 _mesa_half_to_float(_src[1][1].u16), 7194 _mesa_half_to_float(_src[1][2].u16), 7195 _mesa_half_to_float(_src[1][3].u16), 7196 }; 7197 7198 struct float16_vec dst; 7199 7200 dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; 7201 7202 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 7203 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 7204 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 7205 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 7206 7207 break; 7208 } 7209 case 32: { 7210 7211 7212 7213 7214 const struct float32_vec src0 = { 7215 _src[0][0].f32, 7216 _src[0][1].f32, 7217 _src[0][2].f32, 7218 0, 7219 }; 7220 7221 const struct float32_vec src1 = { 7222 _src[1][0].f32, 7223 _src[1][1].f32, 7224 _src[1][2].f32, 7225 _src[1][3].f32, 7226 }; 7227 7228 struct float32_vec dst; 7229 7230 dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; 7231 7232 _dst_val[0].f32 = dst.x; 7233 _dst_val[1].f32 = dst.y; 7234 _dst_val[2].f32 = dst.z; 7235 _dst_val[3].f32 = dst.w; 7236 7237 break; 7238 } 7239 case 64: { 7240 7241 7242 7243 7244 const struct float64_vec src0 = { 7245 _src[0][0].f64, 7246 _src[0][1].f64, 7247 _src[0][2].f64, 7248 0, 7249 }; 7250 7251 const struct float64_vec src1 = { 7252 _src[1][0].f64, 7253 _src[1][1].f64, 7254 _src[1][2].f64, 7255 _src[1][3].f64, 7256 }; 7257 7258 struct float64_vec dst; 7259 7260 dst.x = dst.y = dst.z = dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w; 7261 7262 _dst_val[0].f64 = dst.x; 7263 _dst_val[1].f64 = dst.y; 7264 _dst_val[2].f64 = dst.z; 7265 _dst_val[3].f64 = dst.w; 7266 7267 break; 7268 } 7269 7270 default: 7271 unreachable("unknown bit width"); 7272 } 7273} 7274static void 7275evaluate_feq(nir_const_value *_dst_val, 7276 MAYBE_UNUSED unsigned num_components, 7277 unsigned bit_size, 7278 MAYBE_UNUSED nir_const_value **_src) 7279{ 7280 switch (bit_size) { 7281 case 16: { 7282 7283 7284 7285 7286 for (unsigned _i = 0; _i < num_components; _i++) { 7287 const float src0 = 7288 _mesa_half_to_float(_src[0][_i].u16); 7289 const float src1 = 7290 _mesa_half_to_float(_src[1][_i].u16); 7291 7292 bool1_t dst = src0 == src1; 7293 7294 _dst_val[_i].b = -(int)dst; 7295 } 7296 7297 break; 7298 } 7299 case 32: { 7300 7301 7302 7303 7304 for (unsigned _i = 0; _i < num_components; _i++) { 7305 const float32_t src0 = 7306 _src[0][_i].f32; 7307 const float32_t src1 = 7308 _src[1][_i].f32; 7309 7310 bool1_t dst = src0 == src1; 7311 7312 _dst_val[_i].b = -(int)dst; 7313 } 7314 7315 break; 7316 } 7317 case 64: { 7318 7319 7320 7321 7322 for (unsigned _i = 0; _i < num_components; _i++) { 7323 const float64_t src0 = 7324 _src[0][_i].f64; 7325 const float64_t src1 = 7326 _src[1][_i].f64; 7327 7328 bool1_t dst = src0 == src1; 7329 7330 _dst_val[_i].b = -(int)dst; 7331 } 7332 7333 break; 7334 } 7335 7336 default: 7337 unreachable("unknown bit width"); 7338 } 7339} 7340static void 7341evaluate_feq32(nir_const_value *_dst_val, 7342 MAYBE_UNUSED unsigned num_components, 7343 unsigned bit_size, 7344 MAYBE_UNUSED nir_const_value **_src) 7345{ 7346 switch (bit_size) { 7347 case 16: { 7348 7349 7350 7351 7352 for (unsigned _i = 0; _i < num_components; _i++) { 7353 const float src0 = 7354 _mesa_half_to_float(_src[0][_i].u16); 7355 const float src1 = 7356 _mesa_half_to_float(_src[1][_i].u16); 7357 7358 bool32_t dst = src0 == src1; 7359 7360 _dst_val[_i].i32 = -(int)dst; 7361 } 7362 7363 break; 7364 } 7365 case 32: { 7366 7367 7368 7369 7370 for (unsigned _i = 0; _i < num_components; _i++) { 7371 const float32_t src0 = 7372 _src[0][_i].f32; 7373 const float32_t src1 = 7374 _src[1][_i].f32; 7375 7376 bool32_t dst = src0 == src1; 7377 7378 _dst_val[_i].i32 = -(int)dst; 7379 } 7380 7381 break; 7382 } 7383 case 64: { 7384 7385 7386 7387 7388 for (unsigned _i = 0; _i < num_components; _i++) { 7389 const float64_t src0 = 7390 _src[0][_i].f64; 7391 const float64_t src1 = 7392 _src[1][_i].f64; 7393 7394 bool32_t dst = src0 == src1; 7395 7396 _dst_val[_i].i32 = -(int)dst; 7397 } 7398 7399 break; 7400 } 7401 7402 default: 7403 unreachable("unknown bit width"); 7404 } 7405} 7406static void 7407evaluate_fexp2(nir_const_value *_dst_val, 7408 MAYBE_UNUSED unsigned num_components, 7409 unsigned bit_size, 7410 MAYBE_UNUSED nir_const_value **_src) 7411{ 7412 switch (bit_size) { 7413 case 16: { 7414 7415 7416 7417 7418 for (unsigned _i = 0; _i < num_components; _i++) { 7419 const float src0 = 7420 _mesa_half_to_float(_src[0][_i].u16); 7421 7422 float16_t dst = exp2f(src0); 7423 7424 _dst_val[_i].u16 = _mesa_float_to_half(dst); 7425 } 7426 7427 break; 7428 } 7429 case 32: { 7430 7431 7432 7433 7434 for (unsigned _i = 0; _i < num_components; _i++) { 7435 const float32_t src0 = 7436 _src[0][_i].f32; 7437 7438 float32_t dst = exp2f(src0); 7439 7440 _dst_val[_i].f32 = dst; 7441 } 7442 7443 break; 7444 } 7445 case 64: { 7446 7447 7448 7449 7450 for (unsigned _i = 0; _i < num_components; _i++) { 7451 const float64_t src0 = 7452 _src[0][_i].f64; 7453 7454 float64_t dst = exp2f(src0); 7455 7456 _dst_val[_i].f64 = dst; 7457 } 7458 7459 break; 7460 } 7461 7462 default: 7463 unreachable("unknown bit width"); 7464 } 7465} 7466static void 7467evaluate_ffloor(nir_const_value *_dst_val, 7468 MAYBE_UNUSED unsigned num_components, 7469 unsigned bit_size, 7470 MAYBE_UNUSED nir_const_value **_src) 7471{ 7472 switch (bit_size) { 7473 case 16: { 7474 7475 7476 7477 7478 for (unsigned _i = 0; _i < num_components; _i++) { 7479 const float src0 = 7480 _mesa_half_to_float(_src[0][_i].u16); 7481 7482 float16_t dst = bit_size == 64 ? floor(src0) : floorf(src0); 7483 7484 _dst_val[_i].u16 = _mesa_float_to_half(dst); 7485 } 7486 7487 break; 7488 } 7489 case 32: { 7490 7491 7492 7493 7494 for (unsigned _i = 0; _i < num_components; _i++) { 7495 const float32_t src0 = 7496 _src[0][_i].f32; 7497 7498 float32_t dst = bit_size == 64 ? floor(src0) : floorf(src0); 7499 7500 _dst_val[_i].f32 = dst; 7501 } 7502 7503 break; 7504 } 7505 case 64: { 7506 7507 7508 7509 7510 for (unsigned _i = 0; _i < num_components; _i++) { 7511 const float64_t src0 = 7512 _src[0][_i].f64; 7513 7514 float64_t dst = bit_size == 64 ? floor(src0) : floorf(src0); 7515 7516 _dst_val[_i].f64 = dst; 7517 } 7518 7519 break; 7520 } 7521 7522 default: 7523 unreachable("unknown bit width"); 7524 } 7525} 7526static void 7527evaluate_ffma(nir_const_value *_dst_val, 7528 MAYBE_UNUSED unsigned num_components, 7529 unsigned bit_size, 7530 MAYBE_UNUSED nir_const_value **_src) 7531{ 7532 switch (bit_size) { 7533 case 16: { 7534 7535 7536 7537 7538 for (unsigned _i = 0; _i < num_components; _i++) { 7539 const float src0 = 7540 _mesa_half_to_float(_src[0][_i].u16); 7541 const float src1 = 7542 _mesa_half_to_float(_src[1][_i].u16); 7543 const float src2 = 7544 _mesa_half_to_float(_src[2][_i].u16); 7545 7546 float16_t dst = src0 * src1 + src2; 7547 7548 _dst_val[_i].u16 = _mesa_float_to_half(dst); 7549 } 7550 7551 break; 7552 } 7553 case 32: { 7554 7555 7556 7557 7558 for (unsigned _i = 0; _i < num_components; _i++) { 7559 const float32_t src0 = 7560 _src[0][_i].f32; 7561 const float32_t src1 = 7562 _src[1][_i].f32; 7563 const float32_t src2 = 7564 _src[2][_i].f32; 7565 7566 float32_t dst = src0 * src1 + src2; 7567 7568 _dst_val[_i].f32 = dst; 7569 } 7570 7571 break; 7572 } 7573 case 64: { 7574 7575 7576 7577 7578 for (unsigned _i = 0; _i < num_components; _i++) { 7579 const float64_t src0 = 7580 _src[0][_i].f64; 7581 const float64_t src1 = 7582 _src[1][_i].f64; 7583 const float64_t src2 = 7584 _src[2][_i].f64; 7585 7586 float64_t dst = src0 * src1 + src2; 7587 7588 _dst_val[_i].f64 = dst; 7589 } 7590 7591 break; 7592 } 7593 7594 default: 7595 unreachable("unknown bit width"); 7596 } 7597} 7598static void 7599evaluate_ffract(nir_const_value *_dst_val, 7600 MAYBE_UNUSED unsigned num_components, 7601 unsigned bit_size, 7602 MAYBE_UNUSED nir_const_value **_src) 7603{ 7604 switch (bit_size) { 7605 case 16: { 7606 7607 7608 7609 7610 for (unsigned _i = 0; _i < num_components; _i++) { 7611 const float src0 = 7612 _mesa_half_to_float(_src[0][_i].u16); 7613 7614 float16_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); 7615 7616 _dst_val[_i].u16 = _mesa_float_to_half(dst); 7617 } 7618 7619 break; 7620 } 7621 case 32: { 7622 7623 7624 7625 7626 for (unsigned _i = 0; _i < num_components; _i++) { 7627 const float32_t src0 = 7628 _src[0][_i].f32; 7629 7630 float32_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); 7631 7632 _dst_val[_i].f32 = dst; 7633 } 7634 7635 break; 7636 } 7637 case 64: { 7638 7639 7640 7641 7642 for (unsigned _i = 0; _i < num_components; _i++) { 7643 const float64_t src0 = 7644 _src[0][_i].f64; 7645 7646 float64_t dst = src0 - (bit_size == 64 ? floor(src0) : floorf(src0)); 7647 7648 _dst_val[_i].f64 = dst; 7649 } 7650 7651 break; 7652 } 7653 7654 default: 7655 unreachable("unknown bit width"); 7656 } 7657} 7658static void 7659evaluate_fge(nir_const_value *_dst_val, 7660 MAYBE_UNUSED unsigned num_components, 7661 unsigned bit_size, 7662 MAYBE_UNUSED nir_const_value **_src) 7663{ 7664 switch (bit_size) { 7665 case 16: { 7666 7667 7668 7669 7670 for (unsigned _i = 0; _i < num_components; _i++) { 7671 const float src0 = 7672 _mesa_half_to_float(_src[0][_i].u16); 7673 const float src1 = 7674 _mesa_half_to_float(_src[1][_i].u16); 7675 7676 bool1_t dst = src0 >= src1; 7677 7678 _dst_val[_i].b = -(int)dst; 7679 } 7680 7681 break; 7682 } 7683 case 32: { 7684 7685 7686 7687 7688 for (unsigned _i = 0; _i < num_components; _i++) { 7689 const float32_t src0 = 7690 _src[0][_i].f32; 7691 const float32_t src1 = 7692 _src[1][_i].f32; 7693 7694 bool1_t dst = src0 >= src1; 7695 7696 _dst_val[_i].b = -(int)dst; 7697 } 7698 7699 break; 7700 } 7701 case 64: { 7702 7703 7704 7705 7706 for (unsigned _i = 0; _i < num_components; _i++) { 7707 const float64_t src0 = 7708 _src[0][_i].f64; 7709 const float64_t src1 = 7710 _src[1][_i].f64; 7711 7712 bool1_t dst = src0 >= src1; 7713 7714 _dst_val[_i].b = -(int)dst; 7715 } 7716 7717 break; 7718 } 7719 7720 default: 7721 unreachable("unknown bit width"); 7722 } 7723} 7724static void 7725evaluate_fge32(nir_const_value *_dst_val, 7726 MAYBE_UNUSED unsigned num_components, 7727 unsigned bit_size, 7728 MAYBE_UNUSED nir_const_value **_src) 7729{ 7730 switch (bit_size) { 7731 case 16: { 7732 7733 7734 7735 7736 for (unsigned _i = 0; _i < num_components; _i++) { 7737 const float src0 = 7738 _mesa_half_to_float(_src[0][_i].u16); 7739 const float src1 = 7740 _mesa_half_to_float(_src[1][_i].u16); 7741 7742 bool32_t dst = src0 >= src1; 7743 7744 _dst_val[_i].i32 = -(int)dst; 7745 } 7746 7747 break; 7748 } 7749 case 32: { 7750 7751 7752 7753 7754 for (unsigned _i = 0; _i < num_components; _i++) { 7755 const float32_t src0 = 7756 _src[0][_i].f32; 7757 const float32_t src1 = 7758 _src[1][_i].f32; 7759 7760 bool32_t dst = src0 >= src1; 7761 7762 _dst_val[_i].i32 = -(int)dst; 7763 } 7764 7765 break; 7766 } 7767 case 64: { 7768 7769 7770 7771 7772 for (unsigned _i = 0; _i < num_components; _i++) { 7773 const float64_t src0 = 7774 _src[0][_i].f64; 7775 const float64_t src1 = 7776 _src[1][_i].f64; 7777 7778 bool32_t dst = src0 >= src1; 7779 7780 _dst_val[_i].i32 = -(int)dst; 7781 } 7782 7783 break; 7784 } 7785 7786 default: 7787 unreachable("unknown bit width"); 7788 } 7789} 7790static void 7791evaluate_find_lsb(nir_const_value *_dst_val, 7792 MAYBE_UNUSED unsigned num_components, 7793 unsigned bit_size, 7794 MAYBE_UNUSED nir_const_value **_src) 7795{ 7796 switch (bit_size) { 7797 case 1: { 7798 7799 7800 7801 7802 for (unsigned _i = 0; _i < num_components; _i++) { 7803 /* 1-bit integers use a 0/-1 convention */ 7804 const int1_t src0 = -(int1_t)_src[0][_i].b; 7805 7806 int32_t dst; 7807 7808 7809dst = -1; 7810for (unsigned bit = 0; bit < bit_size; bit++) { 7811 if ((src0 >> bit) & 1) { 7812 dst = bit; 7813 break; 7814 } 7815} 7816 7817 7818 _dst_val[_i].i32 = dst; 7819 } 7820 7821 break; 7822 } 7823 case 8: { 7824 7825 7826 7827 7828 for (unsigned _i = 0; _i < num_components; _i++) { 7829 const int8_t src0 = 7830 _src[0][_i].i8; 7831 7832 int32_t dst; 7833 7834 7835dst = -1; 7836for (unsigned bit = 0; bit < bit_size; bit++) { 7837 if ((src0 >> bit) & 1) { 7838 dst = bit; 7839 break; 7840 } 7841} 7842 7843 7844 _dst_val[_i].i32 = dst; 7845 } 7846 7847 break; 7848 } 7849 case 16: { 7850 7851 7852 7853 7854 for (unsigned _i = 0; _i < num_components; _i++) { 7855 const int16_t src0 = 7856 _src[0][_i].i16; 7857 7858 int32_t dst; 7859 7860 7861dst = -1; 7862for (unsigned bit = 0; bit < bit_size; bit++) { 7863 if ((src0 >> bit) & 1) { 7864 dst = bit; 7865 break; 7866 } 7867} 7868 7869 7870 _dst_val[_i].i32 = dst; 7871 } 7872 7873 break; 7874 } 7875 case 32: { 7876 7877 7878 7879 7880 for (unsigned _i = 0; _i < num_components; _i++) { 7881 const int32_t src0 = 7882 _src[0][_i].i32; 7883 7884 int32_t dst; 7885 7886 7887dst = -1; 7888for (unsigned bit = 0; bit < bit_size; bit++) { 7889 if ((src0 >> bit) & 1) { 7890 dst = bit; 7891 break; 7892 } 7893} 7894 7895 7896 _dst_val[_i].i32 = dst; 7897 } 7898 7899 break; 7900 } 7901 case 64: { 7902 7903 7904 7905 7906 for (unsigned _i = 0; _i < num_components; _i++) { 7907 const int64_t src0 = 7908 _src[0][_i].i64; 7909 7910 int32_t dst; 7911 7912 7913dst = -1; 7914for (unsigned bit = 0; bit < bit_size; bit++) { 7915 if ((src0 >> bit) & 1) { 7916 dst = bit; 7917 break; 7918 } 7919} 7920 7921 7922 _dst_val[_i].i32 = dst; 7923 } 7924 7925 break; 7926 } 7927 7928 default: 7929 unreachable("unknown bit width"); 7930 } 7931} 7932static void 7933evaluate_flog2(nir_const_value *_dst_val, 7934 MAYBE_UNUSED unsigned num_components, 7935 unsigned bit_size, 7936 MAYBE_UNUSED nir_const_value **_src) 7937{ 7938 switch (bit_size) { 7939 case 16: { 7940 7941 7942 7943 7944 for (unsigned _i = 0; _i < num_components; _i++) { 7945 const float src0 = 7946 _mesa_half_to_float(_src[0][_i].u16); 7947 7948 float16_t dst = log2f(src0); 7949 7950 _dst_val[_i].u16 = _mesa_float_to_half(dst); 7951 } 7952 7953 break; 7954 } 7955 case 32: { 7956 7957 7958 7959 7960 for (unsigned _i = 0; _i < num_components; _i++) { 7961 const float32_t src0 = 7962 _src[0][_i].f32; 7963 7964 float32_t dst = log2f(src0); 7965 7966 _dst_val[_i].f32 = dst; 7967 } 7968 7969 break; 7970 } 7971 case 64: { 7972 7973 7974 7975 7976 for (unsigned _i = 0; _i < num_components; _i++) { 7977 const float64_t src0 = 7978 _src[0][_i].f64; 7979 7980 float64_t dst = log2f(src0); 7981 7982 _dst_val[_i].f64 = dst; 7983 } 7984 7985 break; 7986 } 7987 7988 default: 7989 unreachable("unknown bit width"); 7990 } 7991} 7992static void 7993evaluate_flrp(nir_const_value *_dst_val, 7994 MAYBE_UNUSED unsigned num_components, 7995 unsigned bit_size, 7996 MAYBE_UNUSED nir_const_value **_src) 7997{ 7998 switch (bit_size) { 7999 case 16: { 8000 8001 8002 8003 8004 for (unsigned _i = 0; _i < num_components; _i++) { 8005 const float src0 = 8006 _mesa_half_to_float(_src[0][_i].u16); 8007 const float src1 = 8008 _mesa_half_to_float(_src[1][_i].u16); 8009 const float src2 = 8010 _mesa_half_to_float(_src[2][_i].u16); 8011 8012 float16_t dst = src0 * (1 - src2) + src1 * src2; 8013 8014 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8015 } 8016 8017 break; 8018 } 8019 case 32: { 8020 8021 8022 8023 8024 for (unsigned _i = 0; _i < num_components; _i++) { 8025 const float32_t src0 = 8026 _src[0][_i].f32; 8027 const float32_t src1 = 8028 _src[1][_i].f32; 8029 const float32_t src2 = 8030 _src[2][_i].f32; 8031 8032 float32_t dst = src0 * (1 - src2) + src1 * src2; 8033 8034 _dst_val[_i].f32 = dst; 8035 } 8036 8037 break; 8038 } 8039 case 64: { 8040 8041 8042 8043 8044 for (unsigned _i = 0; _i < num_components; _i++) { 8045 const float64_t src0 = 8046 _src[0][_i].f64; 8047 const float64_t src1 = 8048 _src[1][_i].f64; 8049 const float64_t src2 = 8050 _src[2][_i].f64; 8051 8052 float64_t dst = src0 * (1 - src2) + src1 * src2; 8053 8054 _dst_val[_i].f64 = dst; 8055 } 8056 8057 break; 8058 } 8059 8060 default: 8061 unreachable("unknown bit width"); 8062 } 8063} 8064static void 8065evaluate_flt(nir_const_value *_dst_val, 8066 MAYBE_UNUSED unsigned num_components, 8067 unsigned bit_size, 8068 MAYBE_UNUSED nir_const_value **_src) 8069{ 8070 switch (bit_size) { 8071 case 16: { 8072 8073 8074 8075 8076 for (unsigned _i = 0; _i < num_components; _i++) { 8077 const float src0 = 8078 _mesa_half_to_float(_src[0][_i].u16); 8079 const float src1 = 8080 _mesa_half_to_float(_src[1][_i].u16); 8081 8082 bool1_t dst = src0 < src1; 8083 8084 _dst_val[_i].b = -(int)dst; 8085 } 8086 8087 break; 8088 } 8089 case 32: { 8090 8091 8092 8093 8094 for (unsigned _i = 0; _i < num_components; _i++) { 8095 const float32_t src0 = 8096 _src[0][_i].f32; 8097 const float32_t src1 = 8098 _src[1][_i].f32; 8099 8100 bool1_t dst = src0 < src1; 8101 8102 _dst_val[_i].b = -(int)dst; 8103 } 8104 8105 break; 8106 } 8107 case 64: { 8108 8109 8110 8111 8112 for (unsigned _i = 0; _i < num_components; _i++) { 8113 const float64_t src0 = 8114 _src[0][_i].f64; 8115 const float64_t src1 = 8116 _src[1][_i].f64; 8117 8118 bool1_t dst = src0 < src1; 8119 8120 _dst_val[_i].b = -(int)dst; 8121 } 8122 8123 break; 8124 } 8125 8126 default: 8127 unreachable("unknown bit width"); 8128 } 8129} 8130static void 8131evaluate_flt32(nir_const_value *_dst_val, 8132 MAYBE_UNUSED unsigned num_components, 8133 unsigned bit_size, 8134 MAYBE_UNUSED nir_const_value **_src) 8135{ 8136 switch (bit_size) { 8137 case 16: { 8138 8139 8140 8141 8142 for (unsigned _i = 0; _i < num_components; _i++) { 8143 const float src0 = 8144 _mesa_half_to_float(_src[0][_i].u16); 8145 const float src1 = 8146 _mesa_half_to_float(_src[1][_i].u16); 8147 8148 bool32_t dst = src0 < src1; 8149 8150 _dst_val[_i].i32 = -(int)dst; 8151 } 8152 8153 break; 8154 } 8155 case 32: { 8156 8157 8158 8159 8160 for (unsigned _i = 0; _i < num_components; _i++) { 8161 const float32_t src0 = 8162 _src[0][_i].f32; 8163 const float32_t src1 = 8164 _src[1][_i].f32; 8165 8166 bool32_t dst = src0 < src1; 8167 8168 _dst_val[_i].i32 = -(int)dst; 8169 } 8170 8171 break; 8172 } 8173 case 64: { 8174 8175 8176 8177 8178 for (unsigned _i = 0; _i < num_components; _i++) { 8179 const float64_t src0 = 8180 _src[0][_i].f64; 8181 const float64_t src1 = 8182 _src[1][_i].f64; 8183 8184 bool32_t dst = src0 < src1; 8185 8186 _dst_val[_i].i32 = -(int)dst; 8187 } 8188 8189 break; 8190 } 8191 8192 default: 8193 unreachable("unknown bit width"); 8194 } 8195} 8196static void 8197evaluate_fmax(nir_const_value *_dst_val, 8198 MAYBE_UNUSED unsigned num_components, 8199 unsigned bit_size, 8200 MAYBE_UNUSED nir_const_value **_src) 8201{ 8202 switch (bit_size) { 8203 case 16: { 8204 8205 8206 8207 8208 for (unsigned _i = 0; _i < num_components; _i++) { 8209 const float src0 = 8210 _mesa_half_to_float(_src[0][_i].u16); 8211 const float src1 = 8212 _mesa_half_to_float(_src[1][_i].u16); 8213 8214 float16_t dst = fmaxf(src0, src1); 8215 8216 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8217 } 8218 8219 break; 8220 } 8221 case 32: { 8222 8223 8224 8225 8226 for (unsigned _i = 0; _i < num_components; _i++) { 8227 const float32_t src0 = 8228 _src[0][_i].f32; 8229 const float32_t src1 = 8230 _src[1][_i].f32; 8231 8232 float32_t dst = fmaxf(src0, src1); 8233 8234 _dst_val[_i].f32 = dst; 8235 } 8236 8237 break; 8238 } 8239 case 64: { 8240 8241 8242 8243 8244 for (unsigned _i = 0; _i < num_components; _i++) { 8245 const float64_t src0 = 8246 _src[0][_i].f64; 8247 const float64_t src1 = 8248 _src[1][_i].f64; 8249 8250 float64_t dst = fmaxf(src0, src1); 8251 8252 _dst_val[_i].f64 = dst; 8253 } 8254 8255 break; 8256 } 8257 8258 default: 8259 unreachable("unknown bit width"); 8260 } 8261} 8262static void 8263evaluate_fmax3(nir_const_value *_dst_val, 8264 MAYBE_UNUSED unsigned num_components, 8265 unsigned bit_size, 8266 MAYBE_UNUSED nir_const_value **_src) 8267{ 8268 switch (bit_size) { 8269 case 16: { 8270 8271 8272 8273 8274 for (unsigned _i = 0; _i < num_components; _i++) { 8275 const float src0 = 8276 _mesa_half_to_float(_src[0][_i].u16); 8277 const float src1 = 8278 _mesa_half_to_float(_src[1][_i].u16); 8279 const float src2 = 8280 _mesa_half_to_float(_src[2][_i].u16); 8281 8282 float16_t dst = fmaxf(src0, fmaxf(src1, src2)); 8283 8284 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8285 } 8286 8287 break; 8288 } 8289 case 32: { 8290 8291 8292 8293 8294 for (unsigned _i = 0; _i < num_components; _i++) { 8295 const float32_t src0 = 8296 _src[0][_i].f32; 8297 const float32_t src1 = 8298 _src[1][_i].f32; 8299 const float32_t src2 = 8300 _src[2][_i].f32; 8301 8302 float32_t dst = fmaxf(src0, fmaxf(src1, src2)); 8303 8304 _dst_val[_i].f32 = dst; 8305 } 8306 8307 break; 8308 } 8309 case 64: { 8310 8311 8312 8313 8314 for (unsigned _i = 0; _i < num_components; _i++) { 8315 const float64_t src0 = 8316 _src[0][_i].f64; 8317 const float64_t src1 = 8318 _src[1][_i].f64; 8319 const float64_t src2 = 8320 _src[2][_i].f64; 8321 8322 float64_t dst = fmaxf(src0, fmaxf(src1, src2)); 8323 8324 _dst_val[_i].f64 = dst; 8325 } 8326 8327 break; 8328 } 8329 8330 default: 8331 unreachable("unknown bit width"); 8332 } 8333} 8334static void 8335evaluate_fmed3(nir_const_value *_dst_val, 8336 MAYBE_UNUSED unsigned num_components, 8337 unsigned bit_size, 8338 MAYBE_UNUSED nir_const_value **_src) 8339{ 8340 switch (bit_size) { 8341 case 16: { 8342 8343 8344 8345 8346 for (unsigned _i = 0; _i < num_components; _i++) { 8347 const float src0 = 8348 _mesa_half_to_float(_src[0][_i].u16); 8349 const float src1 = 8350 _mesa_half_to_float(_src[1][_i].u16); 8351 const float src2 = 8352 _mesa_half_to_float(_src[2][_i].u16); 8353 8354 float16_t dst = fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1)); 8355 8356 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8357 } 8358 8359 break; 8360 } 8361 case 32: { 8362 8363 8364 8365 8366 for (unsigned _i = 0; _i < num_components; _i++) { 8367 const float32_t src0 = 8368 _src[0][_i].f32; 8369 const float32_t src1 = 8370 _src[1][_i].f32; 8371 const float32_t src2 = 8372 _src[2][_i].f32; 8373 8374 float32_t dst = fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1)); 8375 8376 _dst_val[_i].f32 = dst; 8377 } 8378 8379 break; 8380 } 8381 case 64: { 8382 8383 8384 8385 8386 for (unsigned _i = 0; _i < num_components; _i++) { 8387 const float64_t src0 = 8388 _src[0][_i].f64; 8389 const float64_t src1 = 8390 _src[1][_i].f64; 8391 const float64_t src2 = 8392 _src[2][_i].f64; 8393 8394 float64_t dst = fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1)); 8395 8396 _dst_val[_i].f64 = dst; 8397 } 8398 8399 break; 8400 } 8401 8402 default: 8403 unreachable("unknown bit width"); 8404 } 8405} 8406static void 8407evaluate_fmin(nir_const_value *_dst_val, 8408 MAYBE_UNUSED unsigned num_components, 8409 unsigned bit_size, 8410 MAYBE_UNUSED nir_const_value **_src) 8411{ 8412 switch (bit_size) { 8413 case 16: { 8414 8415 8416 8417 8418 for (unsigned _i = 0; _i < num_components; _i++) { 8419 const float src0 = 8420 _mesa_half_to_float(_src[0][_i].u16); 8421 const float src1 = 8422 _mesa_half_to_float(_src[1][_i].u16); 8423 8424 float16_t dst = fminf(src0, src1); 8425 8426 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8427 } 8428 8429 break; 8430 } 8431 case 32: { 8432 8433 8434 8435 8436 for (unsigned _i = 0; _i < num_components; _i++) { 8437 const float32_t src0 = 8438 _src[0][_i].f32; 8439 const float32_t src1 = 8440 _src[1][_i].f32; 8441 8442 float32_t dst = fminf(src0, src1); 8443 8444 _dst_val[_i].f32 = dst; 8445 } 8446 8447 break; 8448 } 8449 case 64: { 8450 8451 8452 8453 8454 for (unsigned _i = 0; _i < num_components; _i++) { 8455 const float64_t src0 = 8456 _src[0][_i].f64; 8457 const float64_t src1 = 8458 _src[1][_i].f64; 8459 8460 float64_t dst = fminf(src0, src1); 8461 8462 _dst_val[_i].f64 = dst; 8463 } 8464 8465 break; 8466 } 8467 8468 default: 8469 unreachable("unknown bit width"); 8470 } 8471} 8472static void 8473evaluate_fmin3(nir_const_value *_dst_val, 8474 MAYBE_UNUSED unsigned num_components, 8475 unsigned bit_size, 8476 MAYBE_UNUSED nir_const_value **_src) 8477{ 8478 switch (bit_size) { 8479 case 16: { 8480 8481 8482 8483 8484 for (unsigned _i = 0; _i < num_components; _i++) { 8485 const float src0 = 8486 _mesa_half_to_float(_src[0][_i].u16); 8487 const float src1 = 8488 _mesa_half_to_float(_src[1][_i].u16); 8489 const float src2 = 8490 _mesa_half_to_float(_src[2][_i].u16); 8491 8492 float16_t dst = fminf(src0, fminf(src1, src2)); 8493 8494 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8495 } 8496 8497 break; 8498 } 8499 case 32: { 8500 8501 8502 8503 8504 for (unsigned _i = 0; _i < num_components; _i++) { 8505 const float32_t src0 = 8506 _src[0][_i].f32; 8507 const float32_t src1 = 8508 _src[1][_i].f32; 8509 const float32_t src2 = 8510 _src[2][_i].f32; 8511 8512 float32_t dst = fminf(src0, fminf(src1, src2)); 8513 8514 _dst_val[_i].f32 = dst; 8515 } 8516 8517 break; 8518 } 8519 case 64: { 8520 8521 8522 8523 8524 for (unsigned _i = 0; _i < num_components; _i++) { 8525 const float64_t src0 = 8526 _src[0][_i].f64; 8527 const float64_t src1 = 8528 _src[1][_i].f64; 8529 const float64_t src2 = 8530 _src[2][_i].f64; 8531 8532 float64_t dst = fminf(src0, fminf(src1, src2)); 8533 8534 _dst_val[_i].f64 = dst; 8535 } 8536 8537 break; 8538 } 8539 8540 default: 8541 unreachable("unknown bit width"); 8542 } 8543} 8544static void 8545evaluate_fmod(nir_const_value *_dst_val, 8546 MAYBE_UNUSED unsigned num_components, 8547 unsigned bit_size, 8548 MAYBE_UNUSED nir_const_value **_src) 8549{ 8550 switch (bit_size) { 8551 case 16: { 8552 8553 8554 8555 8556 for (unsigned _i = 0; _i < num_components; _i++) { 8557 const float src0 = 8558 _mesa_half_to_float(_src[0][_i].u16); 8559 const float src1 = 8560 _mesa_half_to_float(_src[1][_i].u16); 8561 8562 float16_t dst = src0 - src1 * floorf(src0 / src1); 8563 8564 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8565 } 8566 8567 break; 8568 } 8569 case 32: { 8570 8571 8572 8573 8574 for (unsigned _i = 0; _i < num_components; _i++) { 8575 const float32_t src0 = 8576 _src[0][_i].f32; 8577 const float32_t src1 = 8578 _src[1][_i].f32; 8579 8580 float32_t dst = src0 - src1 * floorf(src0 / src1); 8581 8582 _dst_val[_i].f32 = dst; 8583 } 8584 8585 break; 8586 } 8587 case 64: { 8588 8589 8590 8591 8592 for (unsigned _i = 0; _i < num_components; _i++) { 8593 const float64_t src0 = 8594 _src[0][_i].f64; 8595 const float64_t src1 = 8596 _src[1][_i].f64; 8597 8598 float64_t dst = src0 - src1 * floorf(src0 / src1); 8599 8600 _dst_val[_i].f64 = dst; 8601 } 8602 8603 break; 8604 } 8605 8606 default: 8607 unreachable("unknown bit width"); 8608 } 8609} 8610static void 8611evaluate_fmov(nir_const_value *_dst_val, 8612 MAYBE_UNUSED unsigned num_components, 8613 unsigned bit_size, 8614 MAYBE_UNUSED nir_const_value **_src) 8615{ 8616 switch (bit_size) { 8617 case 16: { 8618 8619 8620 8621 8622 for (unsigned _i = 0; _i < num_components; _i++) { 8623 const float src0 = 8624 _mesa_half_to_float(_src[0][_i].u16); 8625 8626 float16_t dst = src0; 8627 8628 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8629 } 8630 8631 break; 8632 } 8633 case 32: { 8634 8635 8636 8637 8638 for (unsigned _i = 0; _i < num_components; _i++) { 8639 const float32_t src0 = 8640 _src[0][_i].f32; 8641 8642 float32_t dst = src0; 8643 8644 _dst_val[_i].f32 = dst; 8645 } 8646 8647 break; 8648 } 8649 case 64: { 8650 8651 8652 8653 8654 for (unsigned _i = 0; _i < num_components; _i++) { 8655 const float64_t src0 = 8656 _src[0][_i].f64; 8657 8658 float64_t dst = src0; 8659 8660 _dst_val[_i].f64 = dst; 8661 } 8662 8663 break; 8664 } 8665 8666 default: 8667 unreachable("unknown bit width"); 8668 } 8669} 8670static void 8671evaluate_fmul(nir_const_value *_dst_val, 8672 MAYBE_UNUSED unsigned num_components, 8673 unsigned bit_size, 8674 MAYBE_UNUSED nir_const_value **_src) 8675{ 8676 switch (bit_size) { 8677 case 16: { 8678 8679 8680 8681 8682 for (unsigned _i = 0; _i < num_components; _i++) { 8683 const float src0 = 8684 _mesa_half_to_float(_src[0][_i].u16); 8685 const float src1 = 8686 _mesa_half_to_float(_src[1][_i].u16); 8687 8688 float16_t dst = src0 * src1; 8689 8690 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8691 } 8692 8693 break; 8694 } 8695 case 32: { 8696 8697 8698 8699 8700 for (unsigned _i = 0; _i < num_components; _i++) { 8701 const float32_t src0 = 8702 _src[0][_i].f32; 8703 const float32_t src1 = 8704 _src[1][_i].f32; 8705 8706 float32_t dst = src0 * src1; 8707 8708 _dst_val[_i].f32 = dst; 8709 } 8710 8711 break; 8712 } 8713 case 64: { 8714 8715 8716 8717 8718 for (unsigned _i = 0; _i < num_components; _i++) { 8719 const float64_t src0 = 8720 _src[0][_i].f64; 8721 const float64_t src1 = 8722 _src[1][_i].f64; 8723 8724 float64_t dst = src0 * src1; 8725 8726 _dst_val[_i].f64 = dst; 8727 } 8728 8729 break; 8730 } 8731 8732 default: 8733 unreachable("unknown bit width"); 8734 } 8735} 8736static void 8737evaluate_fne(nir_const_value *_dst_val, 8738 MAYBE_UNUSED unsigned num_components, 8739 unsigned bit_size, 8740 MAYBE_UNUSED nir_const_value **_src) 8741{ 8742 switch (bit_size) { 8743 case 16: { 8744 8745 8746 8747 8748 for (unsigned _i = 0; _i < num_components; _i++) { 8749 const float src0 = 8750 _mesa_half_to_float(_src[0][_i].u16); 8751 const float src1 = 8752 _mesa_half_to_float(_src[1][_i].u16); 8753 8754 bool1_t dst = src0 != src1; 8755 8756 _dst_val[_i].b = -(int)dst; 8757 } 8758 8759 break; 8760 } 8761 case 32: { 8762 8763 8764 8765 8766 for (unsigned _i = 0; _i < num_components; _i++) { 8767 const float32_t src0 = 8768 _src[0][_i].f32; 8769 const float32_t src1 = 8770 _src[1][_i].f32; 8771 8772 bool1_t dst = src0 != src1; 8773 8774 _dst_val[_i].b = -(int)dst; 8775 } 8776 8777 break; 8778 } 8779 case 64: { 8780 8781 8782 8783 8784 for (unsigned _i = 0; _i < num_components; _i++) { 8785 const float64_t src0 = 8786 _src[0][_i].f64; 8787 const float64_t src1 = 8788 _src[1][_i].f64; 8789 8790 bool1_t dst = src0 != src1; 8791 8792 _dst_val[_i].b = -(int)dst; 8793 } 8794 8795 break; 8796 } 8797 8798 default: 8799 unreachable("unknown bit width"); 8800 } 8801} 8802static void 8803evaluate_fne32(nir_const_value *_dst_val, 8804 MAYBE_UNUSED unsigned num_components, 8805 unsigned bit_size, 8806 MAYBE_UNUSED nir_const_value **_src) 8807{ 8808 switch (bit_size) { 8809 case 16: { 8810 8811 8812 8813 8814 for (unsigned _i = 0; _i < num_components; _i++) { 8815 const float src0 = 8816 _mesa_half_to_float(_src[0][_i].u16); 8817 const float src1 = 8818 _mesa_half_to_float(_src[1][_i].u16); 8819 8820 bool32_t dst = src0 != src1; 8821 8822 _dst_val[_i].i32 = -(int)dst; 8823 } 8824 8825 break; 8826 } 8827 case 32: { 8828 8829 8830 8831 8832 for (unsigned _i = 0; _i < num_components; _i++) { 8833 const float32_t src0 = 8834 _src[0][_i].f32; 8835 const float32_t src1 = 8836 _src[1][_i].f32; 8837 8838 bool32_t dst = src0 != src1; 8839 8840 _dst_val[_i].i32 = -(int)dst; 8841 } 8842 8843 break; 8844 } 8845 case 64: { 8846 8847 8848 8849 8850 for (unsigned _i = 0; _i < num_components; _i++) { 8851 const float64_t src0 = 8852 _src[0][_i].f64; 8853 const float64_t src1 = 8854 _src[1][_i].f64; 8855 8856 bool32_t dst = src0 != src1; 8857 8858 _dst_val[_i].i32 = -(int)dst; 8859 } 8860 8861 break; 8862 } 8863 8864 default: 8865 unreachable("unknown bit width"); 8866 } 8867} 8868static void 8869evaluate_fneg(nir_const_value *_dst_val, 8870 MAYBE_UNUSED unsigned num_components, 8871 unsigned bit_size, 8872 MAYBE_UNUSED nir_const_value **_src) 8873{ 8874 switch (bit_size) { 8875 case 16: { 8876 8877 8878 8879 8880 for (unsigned _i = 0; _i < num_components; _i++) { 8881 const float src0 = 8882 _mesa_half_to_float(_src[0][_i].u16); 8883 8884 float16_t dst = -src0; 8885 8886 _dst_val[_i].u16 = _mesa_float_to_half(dst); 8887 } 8888 8889 break; 8890 } 8891 case 32: { 8892 8893 8894 8895 8896 for (unsigned _i = 0; _i < num_components; _i++) { 8897 const float32_t src0 = 8898 _src[0][_i].f32; 8899 8900 float32_t dst = -src0; 8901 8902 _dst_val[_i].f32 = dst; 8903 } 8904 8905 break; 8906 } 8907 case 64: { 8908 8909 8910 8911 8912 for (unsigned _i = 0; _i < num_components; _i++) { 8913 const float64_t src0 = 8914 _src[0][_i].f64; 8915 8916 float64_t dst = -src0; 8917 8918 _dst_val[_i].f64 = dst; 8919 } 8920 8921 break; 8922 } 8923 8924 default: 8925 unreachable("unknown bit width"); 8926 } 8927} 8928static void 8929evaluate_fnoise1_1(nir_const_value *_dst_val, 8930 MAYBE_UNUSED unsigned num_components, 8931 unsigned bit_size, 8932 MAYBE_UNUSED nir_const_value **_src) 8933{ 8934 switch (bit_size) { 8935 case 16: { 8936 8937 8938 8939 8940 struct float16_vec dst; 8941 8942 dst.x = dst.y = dst.z = dst.w = 0.0f; 8943 8944 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 8945 8946 break; 8947 } 8948 case 32: { 8949 8950 8951 8952 8953 struct float32_vec dst; 8954 8955 dst.x = dst.y = dst.z = dst.w = 0.0f; 8956 8957 _dst_val[0].f32 = dst.x; 8958 8959 break; 8960 } 8961 case 64: { 8962 8963 8964 8965 8966 struct float64_vec dst; 8967 8968 dst.x = dst.y = dst.z = dst.w = 0.0f; 8969 8970 _dst_val[0].f64 = dst.x; 8971 8972 break; 8973 } 8974 8975 default: 8976 unreachable("unknown bit width"); 8977 } 8978} 8979static void 8980evaluate_fnoise1_2(nir_const_value *_dst_val, 8981 MAYBE_UNUSED unsigned num_components, 8982 unsigned bit_size, 8983 MAYBE_UNUSED nir_const_value **_src) 8984{ 8985 switch (bit_size) { 8986 case 16: { 8987 8988 8989 8990 8991 struct float16_vec dst; 8992 8993 dst.x = dst.y = dst.z = dst.w = 0.0f; 8994 8995 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 8996 8997 break; 8998 } 8999 case 32: { 9000 9001 9002 9003 9004 struct float32_vec dst; 9005 9006 dst.x = dst.y = dst.z = dst.w = 0.0f; 9007 9008 _dst_val[0].f32 = dst.x; 9009 9010 break; 9011 } 9012 case 64: { 9013 9014 9015 9016 9017 struct float64_vec dst; 9018 9019 dst.x = dst.y = dst.z = dst.w = 0.0f; 9020 9021 _dst_val[0].f64 = dst.x; 9022 9023 break; 9024 } 9025 9026 default: 9027 unreachable("unknown bit width"); 9028 } 9029} 9030static void 9031evaluate_fnoise1_3(nir_const_value *_dst_val, 9032 MAYBE_UNUSED unsigned num_components, 9033 unsigned bit_size, 9034 MAYBE_UNUSED nir_const_value **_src) 9035{ 9036 switch (bit_size) { 9037 case 16: { 9038 9039 9040 9041 9042 struct float16_vec dst; 9043 9044 dst.x = dst.y = dst.z = dst.w = 0.0f; 9045 9046 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9047 9048 break; 9049 } 9050 case 32: { 9051 9052 9053 9054 9055 struct float32_vec dst; 9056 9057 dst.x = dst.y = dst.z = dst.w = 0.0f; 9058 9059 _dst_val[0].f32 = dst.x; 9060 9061 break; 9062 } 9063 case 64: { 9064 9065 9066 9067 9068 struct float64_vec dst; 9069 9070 dst.x = dst.y = dst.z = dst.w = 0.0f; 9071 9072 _dst_val[0].f64 = dst.x; 9073 9074 break; 9075 } 9076 9077 default: 9078 unreachable("unknown bit width"); 9079 } 9080} 9081static void 9082evaluate_fnoise1_4(nir_const_value *_dst_val, 9083 MAYBE_UNUSED unsigned num_components, 9084 unsigned bit_size, 9085 MAYBE_UNUSED nir_const_value **_src) 9086{ 9087 switch (bit_size) { 9088 case 16: { 9089 9090 9091 9092 9093 struct float16_vec dst; 9094 9095 dst.x = dst.y = dst.z = dst.w = 0.0f; 9096 9097 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9098 9099 break; 9100 } 9101 case 32: { 9102 9103 9104 9105 9106 struct float32_vec dst; 9107 9108 dst.x = dst.y = dst.z = dst.w = 0.0f; 9109 9110 _dst_val[0].f32 = dst.x; 9111 9112 break; 9113 } 9114 case 64: { 9115 9116 9117 9118 9119 struct float64_vec dst; 9120 9121 dst.x = dst.y = dst.z = dst.w = 0.0f; 9122 9123 _dst_val[0].f64 = dst.x; 9124 9125 break; 9126 } 9127 9128 default: 9129 unreachable("unknown bit width"); 9130 } 9131} 9132static void 9133evaluate_fnoise2_1(nir_const_value *_dst_val, 9134 MAYBE_UNUSED unsigned num_components, 9135 unsigned bit_size, 9136 MAYBE_UNUSED nir_const_value **_src) 9137{ 9138 switch (bit_size) { 9139 case 16: { 9140 9141 9142 9143 9144 struct float16_vec dst; 9145 9146 dst.x = dst.y = dst.z = dst.w = 0.0f; 9147 9148 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9149 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9150 9151 break; 9152 } 9153 case 32: { 9154 9155 9156 9157 9158 struct float32_vec dst; 9159 9160 dst.x = dst.y = dst.z = dst.w = 0.0f; 9161 9162 _dst_val[0].f32 = dst.x; 9163 _dst_val[1].f32 = dst.y; 9164 9165 break; 9166 } 9167 case 64: { 9168 9169 9170 9171 9172 struct float64_vec dst; 9173 9174 dst.x = dst.y = dst.z = dst.w = 0.0f; 9175 9176 _dst_val[0].f64 = dst.x; 9177 _dst_val[1].f64 = dst.y; 9178 9179 break; 9180 } 9181 9182 default: 9183 unreachable("unknown bit width"); 9184 } 9185} 9186static void 9187evaluate_fnoise2_2(nir_const_value *_dst_val, 9188 MAYBE_UNUSED unsigned num_components, 9189 unsigned bit_size, 9190 MAYBE_UNUSED nir_const_value **_src) 9191{ 9192 switch (bit_size) { 9193 case 16: { 9194 9195 9196 9197 9198 struct float16_vec dst; 9199 9200 dst.x = dst.y = dst.z = dst.w = 0.0f; 9201 9202 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9203 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9204 9205 break; 9206 } 9207 case 32: { 9208 9209 9210 9211 9212 struct float32_vec dst; 9213 9214 dst.x = dst.y = dst.z = dst.w = 0.0f; 9215 9216 _dst_val[0].f32 = dst.x; 9217 _dst_val[1].f32 = dst.y; 9218 9219 break; 9220 } 9221 case 64: { 9222 9223 9224 9225 9226 struct float64_vec dst; 9227 9228 dst.x = dst.y = dst.z = dst.w = 0.0f; 9229 9230 _dst_val[0].f64 = dst.x; 9231 _dst_val[1].f64 = dst.y; 9232 9233 break; 9234 } 9235 9236 default: 9237 unreachable("unknown bit width"); 9238 } 9239} 9240static void 9241evaluate_fnoise2_3(nir_const_value *_dst_val, 9242 MAYBE_UNUSED unsigned num_components, 9243 unsigned bit_size, 9244 MAYBE_UNUSED nir_const_value **_src) 9245{ 9246 switch (bit_size) { 9247 case 16: { 9248 9249 9250 9251 9252 struct float16_vec dst; 9253 9254 dst.x = dst.y = dst.z = dst.w = 0.0f; 9255 9256 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9257 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9258 9259 break; 9260 } 9261 case 32: { 9262 9263 9264 9265 9266 struct float32_vec dst; 9267 9268 dst.x = dst.y = dst.z = dst.w = 0.0f; 9269 9270 _dst_val[0].f32 = dst.x; 9271 _dst_val[1].f32 = dst.y; 9272 9273 break; 9274 } 9275 case 64: { 9276 9277 9278 9279 9280 struct float64_vec dst; 9281 9282 dst.x = dst.y = dst.z = dst.w = 0.0f; 9283 9284 _dst_val[0].f64 = dst.x; 9285 _dst_val[1].f64 = dst.y; 9286 9287 break; 9288 } 9289 9290 default: 9291 unreachable("unknown bit width"); 9292 } 9293} 9294static void 9295evaluate_fnoise2_4(nir_const_value *_dst_val, 9296 MAYBE_UNUSED unsigned num_components, 9297 unsigned bit_size, 9298 MAYBE_UNUSED nir_const_value **_src) 9299{ 9300 switch (bit_size) { 9301 case 16: { 9302 9303 9304 9305 9306 struct float16_vec dst; 9307 9308 dst.x = dst.y = dst.z = dst.w = 0.0f; 9309 9310 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9311 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9312 9313 break; 9314 } 9315 case 32: { 9316 9317 9318 9319 9320 struct float32_vec dst; 9321 9322 dst.x = dst.y = dst.z = dst.w = 0.0f; 9323 9324 _dst_val[0].f32 = dst.x; 9325 _dst_val[1].f32 = dst.y; 9326 9327 break; 9328 } 9329 case 64: { 9330 9331 9332 9333 9334 struct float64_vec dst; 9335 9336 dst.x = dst.y = dst.z = dst.w = 0.0f; 9337 9338 _dst_val[0].f64 = dst.x; 9339 _dst_val[1].f64 = dst.y; 9340 9341 break; 9342 } 9343 9344 default: 9345 unreachable("unknown bit width"); 9346 } 9347} 9348static void 9349evaluate_fnoise3_1(nir_const_value *_dst_val, 9350 MAYBE_UNUSED unsigned num_components, 9351 unsigned bit_size, 9352 MAYBE_UNUSED nir_const_value **_src) 9353{ 9354 switch (bit_size) { 9355 case 16: { 9356 9357 9358 9359 9360 struct float16_vec dst; 9361 9362 dst.x = dst.y = dst.z = dst.w = 0.0f; 9363 9364 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9365 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9366 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9367 9368 break; 9369 } 9370 case 32: { 9371 9372 9373 9374 9375 struct float32_vec dst; 9376 9377 dst.x = dst.y = dst.z = dst.w = 0.0f; 9378 9379 _dst_val[0].f32 = dst.x; 9380 _dst_val[1].f32 = dst.y; 9381 _dst_val[2].f32 = dst.z; 9382 9383 break; 9384 } 9385 case 64: { 9386 9387 9388 9389 9390 struct float64_vec dst; 9391 9392 dst.x = dst.y = dst.z = dst.w = 0.0f; 9393 9394 _dst_val[0].f64 = dst.x; 9395 _dst_val[1].f64 = dst.y; 9396 _dst_val[2].f64 = dst.z; 9397 9398 break; 9399 } 9400 9401 default: 9402 unreachable("unknown bit width"); 9403 } 9404} 9405static void 9406evaluate_fnoise3_2(nir_const_value *_dst_val, 9407 MAYBE_UNUSED unsigned num_components, 9408 unsigned bit_size, 9409 MAYBE_UNUSED nir_const_value **_src) 9410{ 9411 switch (bit_size) { 9412 case 16: { 9413 9414 9415 9416 9417 struct float16_vec dst; 9418 9419 dst.x = dst.y = dst.z = dst.w = 0.0f; 9420 9421 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9422 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9423 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9424 9425 break; 9426 } 9427 case 32: { 9428 9429 9430 9431 9432 struct float32_vec dst; 9433 9434 dst.x = dst.y = dst.z = dst.w = 0.0f; 9435 9436 _dst_val[0].f32 = dst.x; 9437 _dst_val[1].f32 = dst.y; 9438 _dst_val[2].f32 = dst.z; 9439 9440 break; 9441 } 9442 case 64: { 9443 9444 9445 9446 9447 struct float64_vec dst; 9448 9449 dst.x = dst.y = dst.z = dst.w = 0.0f; 9450 9451 _dst_val[0].f64 = dst.x; 9452 _dst_val[1].f64 = dst.y; 9453 _dst_val[2].f64 = dst.z; 9454 9455 break; 9456 } 9457 9458 default: 9459 unreachable("unknown bit width"); 9460 } 9461} 9462static void 9463evaluate_fnoise3_3(nir_const_value *_dst_val, 9464 MAYBE_UNUSED unsigned num_components, 9465 unsigned bit_size, 9466 MAYBE_UNUSED nir_const_value **_src) 9467{ 9468 switch (bit_size) { 9469 case 16: { 9470 9471 9472 9473 9474 struct float16_vec dst; 9475 9476 dst.x = dst.y = dst.z = dst.w = 0.0f; 9477 9478 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9479 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9480 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9481 9482 break; 9483 } 9484 case 32: { 9485 9486 9487 9488 9489 struct float32_vec dst; 9490 9491 dst.x = dst.y = dst.z = dst.w = 0.0f; 9492 9493 _dst_val[0].f32 = dst.x; 9494 _dst_val[1].f32 = dst.y; 9495 _dst_val[2].f32 = dst.z; 9496 9497 break; 9498 } 9499 case 64: { 9500 9501 9502 9503 9504 struct float64_vec dst; 9505 9506 dst.x = dst.y = dst.z = dst.w = 0.0f; 9507 9508 _dst_val[0].f64 = dst.x; 9509 _dst_val[1].f64 = dst.y; 9510 _dst_val[2].f64 = dst.z; 9511 9512 break; 9513 } 9514 9515 default: 9516 unreachable("unknown bit width"); 9517 } 9518} 9519static void 9520evaluate_fnoise3_4(nir_const_value *_dst_val, 9521 MAYBE_UNUSED unsigned num_components, 9522 unsigned bit_size, 9523 MAYBE_UNUSED nir_const_value **_src) 9524{ 9525 switch (bit_size) { 9526 case 16: { 9527 9528 9529 9530 9531 struct float16_vec dst; 9532 9533 dst.x = dst.y = dst.z = dst.w = 0.0f; 9534 9535 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9536 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9537 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9538 9539 break; 9540 } 9541 case 32: { 9542 9543 9544 9545 9546 struct float32_vec dst; 9547 9548 dst.x = dst.y = dst.z = dst.w = 0.0f; 9549 9550 _dst_val[0].f32 = dst.x; 9551 _dst_val[1].f32 = dst.y; 9552 _dst_val[2].f32 = dst.z; 9553 9554 break; 9555 } 9556 case 64: { 9557 9558 9559 9560 9561 struct float64_vec dst; 9562 9563 dst.x = dst.y = dst.z = dst.w = 0.0f; 9564 9565 _dst_val[0].f64 = dst.x; 9566 _dst_val[1].f64 = dst.y; 9567 _dst_val[2].f64 = dst.z; 9568 9569 break; 9570 } 9571 9572 default: 9573 unreachable("unknown bit width"); 9574 } 9575} 9576static void 9577evaluate_fnoise4_1(nir_const_value *_dst_val, 9578 MAYBE_UNUSED unsigned num_components, 9579 unsigned bit_size, 9580 MAYBE_UNUSED nir_const_value **_src) 9581{ 9582 switch (bit_size) { 9583 case 16: { 9584 9585 9586 9587 9588 struct float16_vec dst; 9589 9590 dst.x = dst.y = dst.z = dst.w = 0.0f; 9591 9592 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9593 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9594 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9595 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 9596 9597 break; 9598 } 9599 case 32: { 9600 9601 9602 9603 9604 struct float32_vec dst; 9605 9606 dst.x = dst.y = dst.z = dst.w = 0.0f; 9607 9608 _dst_val[0].f32 = dst.x; 9609 _dst_val[1].f32 = dst.y; 9610 _dst_val[2].f32 = dst.z; 9611 _dst_val[3].f32 = dst.w; 9612 9613 break; 9614 } 9615 case 64: { 9616 9617 9618 9619 9620 struct float64_vec dst; 9621 9622 dst.x = dst.y = dst.z = dst.w = 0.0f; 9623 9624 _dst_val[0].f64 = dst.x; 9625 _dst_val[1].f64 = dst.y; 9626 _dst_val[2].f64 = dst.z; 9627 _dst_val[3].f64 = dst.w; 9628 9629 break; 9630 } 9631 9632 default: 9633 unreachable("unknown bit width"); 9634 } 9635} 9636static void 9637evaluate_fnoise4_2(nir_const_value *_dst_val, 9638 MAYBE_UNUSED unsigned num_components, 9639 unsigned bit_size, 9640 MAYBE_UNUSED nir_const_value **_src) 9641{ 9642 switch (bit_size) { 9643 case 16: { 9644 9645 9646 9647 9648 struct float16_vec dst; 9649 9650 dst.x = dst.y = dst.z = dst.w = 0.0f; 9651 9652 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9653 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9654 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9655 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 9656 9657 break; 9658 } 9659 case 32: { 9660 9661 9662 9663 9664 struct float32_vec dst; 9665 9666 dst.x = dst.y = dst.z = dst.w = 0.0f; 9667 9668 _dst_val[0].f32 = dst.x; 9669 _dst_val[1].f32 = dst.y; 9670 _dst_val[2].f32 = dst.z; 9671 _dst_val[3].f32 = dst.w; 9672 9673 break; 9674 } 9675 case 64: { 9676 9677 9678 9679 9680 struct float64_vec dst; 9681 9682 dst.x = dst.y = dst.z = dst.w = 0.0f; 9683 9684 _dst_val[0].f64 = dst.x; 9685 _dst_val[1].f64 = dst.y; 9686 _dst_val[2].f64 = dst.z; 9687 _dst_val[3].f64 = dst.w; 9688 9689 break; 9690 } 9691 9692 default: 9693 unreachable("unknown bit width"); 9694 } 9695} 9696static void 9697evaluate_fnoise4_3(nir_const_value *_dst_val, 9698 MAYBE_UNUSED unsigned num_components, 9699 unsigned bit_size, 9700 MAYBE_UNUSED nir_const_value **_src) 9701{ 9702 switch (bit_size) { 9703 case 16: { 9704 9705 9706 9707 9708 struct float16_vec dst; 9709 9710 dst.x = dst.y = dst.z = dst.w = 0.0f; 9711 9712 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9713 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9714 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9715 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 9716 9717 break; 9718 } 9719 case 32: { 9720 9721 9722 9723 9724 struct float32_vec dst; 9725 9726 dst.x = dst.y = dst.z = dst.w = 0.0f; 9727 9728 _dst_val[0].f32 = dst.x; 9729 _dst_val[1].f32 = dst.y; 9730 _dst_val[2].f32 = dst.z; 9731 _dst_val[3].f32 = dst.w; 9732 9733 break; 9734 } 9735 case 64: { 9736 9737 9738 9739 9740 struct float64_vec dst; 9741 9742 dst.x = dst.y = dst.z = dst.w = 0.0f; 9743 9744 _dst_val[0].f64 = dst.x; 9745 _dst_val[1].f64 = dst.y; 9746 _dst_val[2].f64 = dst.z; 9747 _dst_val[3].f64 = dst.w; 9748 9749 break; 9750 } 9751 9752 default: 9753 unreachable("unknown bit width"); 9754 } 9755} 9756static void 9757evaluate_fnoise4_4(nir_const_value *_dst_val, 9758 MAYBE_UNUSED unsigned num_components, 9759 unsigned bit_size, 9760 MAYBE_UNUSED nir_const_value **_src) 9761{ 9762 switch (bit_size) { 9763 case 16: { 9764 9765 9766 9767 9768 struct float16_vec dst; 9769 9770 dst.x = dst.y = dst.z = dst.w = 0.0f; 9771 9772 _dst_val[0].u16 = _mesa_float_to_half(dst.x); 9773 _dst_val[1].u16 = _mesa_float_to_half(dst.y); 9774 _dst_val[2].u16 = _mesa_float_to_half(dst.z); 9775 _dst_val[3].u16 = _mesa_float_to_half(dst.w); 9776 9777 break; 9778 } 9779 case 32: { 9780 9781 9782 9783 9784 struct float32_vec dst; 9785 9786 dst.x = dst.y = dst.z = dst.w = 0.0f; 9787 9788 _dst_val[0].f32 = dst.x; 9789 _dst_val[1].f32 = dst.y; 9790 _dst_val[2].f32 = dst.z; 9791 _dst_val[3].f32 = dst.w; 9792 9793 break; 9794 } 9795 case 64: { 9796 9797 9798 9799 9800 struct float64_vec dst; 9801 9802 dst.x = dst.y = dst.z = dst.w = 0.0f; 9803 9804 _dst_val[0].f64 = dst.x; 9805 _dst_val[1].f64 = dst.y; 9806 _dst_val[2].f64 = dst.z; 9807 _dst_val[3].f64 = dst.w; 9808 9809 break; 9810 } 9811 9812 default: 9813 unreachable("unknown bit width"); 9814 } 9815} 9816static void 9817evaluate_fnot(nir_const_value *_dst_val, 9818 MAYBE_UNUSED unsigned num_components, 9819 unsigned bit_size, 9820 MAYBE_UNUSED nir_const_value **_src) 9821{ 9822 switch (bit_size) { 9823 case 16: { 9824 9825 9826 9827 9828 for (unsigned _i = 0; _i < num_components; _i++) { 9829 const float src0 = 9830 _mesa_half_to_float(_src[0][_i].u16); 9831 9832 float16_t dst = bit_size == 64 ? ((src0 == 0.0) ? 1.0 : 0.0f) : ((src0 == 0.0f) ? 1.0f : 0.0f); 9833 9834 _dst_val[_i].u16 = _mesa_float_to_half(dst); 9835 } 9836 9837 break; 9838 } 9839 case 32: { 9840 9841 9842 9843 9844 for (unsigned _i = 0; _i < num_components; _i++) { 9845 const float32_t src0 = 9846 _src[0][_i].f32; 9847 9848 float32_t dst = bit_size == 64 ? ((src0 == 0.0) ? 1.0 : 0.0f) : ((src0 == 0.0f) ? 1.0f : 0.0f); 9849 9850 _dst_val[_i].f32 = dst; 9851 } 9852 9853 break; 9854 } 9855 case 64: { 9856 9857 9858 9859 9860 for (unsigned _i = 0; _i < num_components; _i++) { 9861 const float64_t src0 = 9862 _src[0][_i].f64; 9863 9864 float64_t dst = bit_size == 64 ? ((src0 == 0.0) ? 1.0 : 0.0f) : ((src0 == 0.0f) ? 1.0f : 0.0f); 9865 9866 _dst_val[_i].f64 = dst; 9867 } 9868 9869 break; 9870 } 9871 9872 default: 9873 unreachable("unknown bit width"); 9874 } 9875} 9876static void 9877evaluate_for(nir_const_value *_dst_val, 9878 MAYBE_UNUSED unsigned num_components, 9879 UNUSED unsigned bit_size, 9880 MAYBE_UNUSED nir_const_value **_src) 9881{ 9882 9883 9884 9885 9886 for (unsigned _i = 0; _i < num_components; _i++) { 9887 const float32_t src0 = 9888 _src[0][_i].f32; 9889 const float32_t src1 = 9890 _src[1][_i].f32; 9891 9892 float32_t dst = ((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f; 9893 9894 _dst_val[_i].f32 = dst; 9895 } 9896 9897} 9898static void 9899evaluate_fpow(nir_const_value *_dst_val, 9900 MAYBE_UNUSED unsigned num_components, 9901 unsigned bit_size, 9902 MAYBE_UNUSED nir_const_value **_src) 9903{ 9904 switch (bit_size) { 9905 case 16: { 9906 9907 9908 9909 9910 for (unsigned _i = 0; _i < num_components; _i++) { 9911 const float src0 = 9912 _mesa_half_to_float(_src[0][_i].u16); 9913 const float src1 = 9914 _mesa_half_to_float(_src[1][_i].u16); 9915 9916 float16_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); 9917 9918 _dst_val[_i].u16 = _mesa_float_to_half(dst); 9919 } 9920 9921 break; 9922 } 9923 case 32: { 9924 9925 9926 9927 9928 for (unsigned _i = 0; _i < num_components; _i++) { 9929 const float32_t src0 = 9930 _src[0][_i].f32; 9931 const float32_t src1 = 9932 _src[1][_i].f32; 9933 9934 float32_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); 9935 9936 _dst_val[_i].f32 = dst; 9937 } 9938 9939 break; 9940 } 9941 case 64: { 9942 9943 9944 9945 9946 for (unsigned _i = 0; _i < num_components; _i++) { 9947 const float64_t src0 = 9948 _src[0][_i].f64; 9949 const float64_t src1 = 9950 _src[1][_i].f64; 9951 9952 float64_t dst = bit_size == 64 ? powf(src0, src1) : pow(src0, src1); 9953 9954 _dst_val[_i].f64 = dst; 9955 } 9956 9957 break; 9958 } 9959 9960 default: 9961 unreachable("unknown bit width"); 9962 } 9963} 9964static void 9965evaluate_fquantize2f16(nir_const_value *_dst_val, 9966 MAYBE_UNUSED unsigned num_components, 9967 unsigned bit_size, 9968 MAYBE_UNUSED nir_const_value **_src) 9969{ 9970 switch (bit_size) { 9971 case 16: { 9972 9973 9974 9975 9976 for (unsigned _i = 0; _i < num_components; _i++) { 9977 const float src0 = 9978 _mesa_half_to_float(_src[0][_i].u16); 9979 9980 float16_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); 9981 9982 _dst_val[_i].u16 = _mesa_float_to_half(dst); 9983 } 9984 9985 break; 9986 } 9987 case 32: { 9988 9989 9990 9991 9992 for (unsigned _i = 0; _i < num_components; _i++) { 9993 const float32_t src0 = 9994 _src[0][_i].f32; 9995 9996 float32_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); 9997 9998 _dst_val[_i].f32 = dst; 9999 } 10000 10001 break; 10002 } 10003 case 64: { 10004 10005 10006 10007 10008 for (unsigned _i = 0; _i < num_components; _i++) { 10009 const float64_t src0 = 10010 _src[0][_i].f64; 10011 10012 float64_t dst = (fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0)); 10013 10014 _dst_val[_i].f64 = dst; 10015 } 10016 10017 break; 10018 } 10019 10020 default: 10021 unreachable("unknown bit width"); 10022 } 10023} 10024static void 10025evaluate_frcp(nir_const_value *_dst_val, 10026 MAYBE_UNUSED unsigned num_components, 10027 unsigned bit_size, 10028 MAYBE_UNUSED nir_const_value **_src) 10029{ 10030 switch (bit_size) { 10031 case 16: { 10032 10033 10034 10035 10036 for (unsigned _i = 0; _i < num_components; _i++) { 10037 const float src0 = 10038 _mesa_half_to_float(_src[0][_i].u16); 10039 10040 float16_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; 10041 10042 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10043 } 10044 10045 break; 10046 } 10047 case 32: { 10048 10049 10050 10051 10052 for (unsigned _i = 0; _i < num_components; _i++) { 10053 const float32_t src0 = 10054 _src[0][_i].f32; 10055 10056 float32_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; 10057 10058 _dst_val[_i].f32 = dst; 10059 } 10060 10061 break; 10062 } 10063 case 64: { 10064 10065 10066 10067 10068 for (unsigned _i = 0; _i < num_components; _i++) { 10069 const float64_t src0 = 10070 _src[0][_i].f64; 10071 10072 float64_t dst = bit_size == 64 ? 1.0 / src0 : 1.0f / src0; 10073 10074 _dst_val[_i].f64 = dst; 10075 } 10076 10077 break; 10078 } 10079 10080 default: 10081 unreachable("unknown bit width"); 10082 } 10083} 10084static void 10085evaluate_frem(nir_const_value *_dst_val, 10086 MAYBE_UNUSED unsigned num_components, 10087 unsigned bit_size, 10088 MAYBE_UNUSED nir_const_value **_src) 10089{ 10090 switch (bit_size) { 10091 case 16: { 10092 10093 10094 10095 10096 for (unsigned _i = 0; _i < num_components; _i++) { 10097 const float src0 = 10098 _mesa_half_to_float(_src[0][_i].u16); 10099 const float src1 = 10100 _mesa_half_to_float(_src[1][_i].u16); 10101 10102 float16_t dst = src0 - src1 * truncf(src0 / src1); 10103 10104 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10105 } 10106 10107 break; 10108 } 10109 case 32: { 10110 10111 10112 10113 10114 for (unsigned _i = 0; _i < num_components; _i++) { 10115 const float32_t src0 = 10116 _src[0][_i].f32; 10117 const float32_t src1 = 10118 _src[1][_i].f32; 10119 10120 float32_t dst = src0 - src1 * truncf(src0 / src1); 10121 10122 _dst_val[_i].f32 = dst; 10123 } 10124 10125 break; 10126 } 10127 case 64: { 10128 10129 10130 10131 10132 for (unsigned _i = 0; _i < num_components; _i++) { 10133 const float64_t src0 = 10134 _src[0][_i].f64; 10135 const float64_t src1 = 10136 _src[1][_i].f64; 10137 10138 float64_t dst = src0 - src1 * truncf(src0 / src1); 10139 10140 _dst_val[_i].f64 = dst; 10141 } 10142 10143 break; 10144 } 10145 10146 default: 10147 unreachable("unknown bit width"); 10148 } 10149} 10150static void 10151evaluate_frexp_exp(nir_const_value *_dst_val, 10152 MAYBE_UNUSED unsigned num_components, 10153 unsigned bit_size, 10154 MAYBE_UNUSED nir_const_value **_src) 10155{ 10156 switch (bit_size) { 10157 case 16: { 10158 10159 10160 10161 10162 for (unsigned _i = 0; _i < num_components; _i++) { 10163 const float src0 = 10164 _mesa_half_to_float(_src[0][_i].u16); 10165 10166 int32_t dst; 10167 10168 frexp(src0, &dst); 10169 10170 _dst_val[_i].i32 = dst; 10171 } 10172 10173 break; 10174 } 10175 case 32: { 10176 10177 10178 10179 10180 for (unsigned _i = 0; _i < num_components; _i++) { 10181 const float32_t src0 = 10182 _src[0][_i].f32; 10183 10184 int32_t dst; 10185 10186 frexp(src0, &dst); 10187 10188 _dst_val[_i].i32 = dst; 10189 } 10190 10191 break; 10192 } 10193 case 64: { 10194 10195 10196 10197 10198 for (unsigned _i = 0; _i < num_components; _i++) { 10199 const float64_t src0 = 10200 _src[0][_i].f64; 10201 10202 int32_t dst; 10203 10204 frexp(src0, &dst); 10205 10206 _dst_val[_i].i32 = dst; 10207 } 10208 10209 break; 10210 } 10211 10212 default: 10213 unreachable("unknown bit width"); 10214 } 10215} 10216static void 10217evaluate_frexp_sig(nir_const_value *_dst_val, 10218 MAYBE_UNUSED unsigned num_components, 10219 unsigned bit_size, 10220 MAYBE_UNUSED nir_const_value **_src) 10221{ 10222 switch (bit_size) { 10223 case 16: { 10224 10225 10226 10227 10228 for (unsigned _i = 0; _i < num_components; _i++) { 10229 const float src0 = 10230 _mesa_half_to_float(_src[0][_i].u16); 10231 10232 float16_t dst; 10233 10234 int n; dst = frexp(src0, &n); 10235 10236 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10237 } 10238 10239 break; 10240 } 10241 case 32: { 10242 10243 10244 10245 10246 for (unsigned _i = 0; _i < num_components; _i++) { 10247 const float32_t src0 = 10248 _src[0][_i].f32; 10249 10250 float32_t dst; 10251 10252 int n; dst = frexp(src0, &n); 10253 10254 _dst_val[_i].f32 = dst; 10255 } 10256 10257 break; 10258 } 10259 case 64: { 10260 10261 10262 10263 10264 for (unsigned _i = 0; _i < num_components; _i++) { 10265 const float64_t src0 = 10266 _src[0][_i].f64; 10267 10268 float64_t dst; 10269 10270 int n; dst = frexp(src0, &n); 10271 10272 _dst_val[_i].f64 = dst; 10273 } 10274 10275 break; 10276 } 10277 10278 default: 10279 unreachable("unknown bit width"); 10280 } 10281} 10282static void 10283evaluate_fround_even(nir_const_value *_dst_val, 10284 MAYBE_UNUSED unsigned num_components, 10285 unsigned bit_size, 10286 MAYBE_UNUSED nir_const_value **_src) 10287{ 10288 switch (bit_size) { 10289 case 16: { 10290 10291 10292 10293 10294 for (unsigned _i = 0; _i < num_components; _i++) { 10295 const float src0 = 10296 _mesa_half_to_float(_src[0][_i].u16); 10297 10298 float16_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); 10299 10300 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10301 } 10302 10303 break; 10304 } 10305 case 32: { 10306 10307 10308 10309 10310 for (unsigned _i = 0; _i < num_components; _i++) { 10311 const float32_t src0 = 10312 _src[0][_i].f32; 10313 10314 float32_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); 10315 10316 _dst_val[_i].f32 = dst; 10317 } 10318 10319 break; 10320 } 10321 case 64: { 10322 10323 10324 10325 10326 for (unsigned _i = 0; _i < num_components; _i++) { 10327 const float64_t src0 = 10328 _src[0][_i].f64; 10329 10330 float64_t dst = bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0); 10331 10332 _dst_val[_i].f64 = dst; 10333 } 10334 10335 break; 10336 } 10337 10338 default: 10339 unreachable("unknown bit width"); 10340 } 10341} 10342static void 10343evaluate_frsq(nir_const_value *_dst_val, 10344 MAYBE_UNUSED unsigned num_components, 10345 unsigned bit_size, 10346 MAYBE_UNUSED nir_const_value **_src) 10347{ 10348 switch (bit_size) { 10349 case 16: { 10350 10351 10352 10353 10354 for (unsigned _i = 0; _i < num_components; _i++) { 10355 const float src0 = 10356 _mesa_half_to_float(_src[0][_i].u16); 10357 10358 float16_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); 10359 10360 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10361 } 10362 10363 break; 10364 } 10365 case 32: { 10366 10367 10368 10369 10370 for (unsigned _i = 0; _i < num_components; _i++) { 10371 const float32_t src0 = 10372 _src[0][_i].f32; 10373 10374 float32_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); 10375 10376 _dst_val[_i].f32 = dst; 10377 } 10378 10379 break; 10380 } 10381 case 64: { 10382 10383 10384 10385 10386 for (unsigned _i = 0; _i < num_components; _i++) { 10387 const float64_t src0 = 10388 _src[0][_i].f64; 10389 10390 float64_t dst = bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0); 10391 10392 _dst_val[_i].f64 = dst; 10393 } 10394 10395 break; 10396 } 10397 10398 default: 10399 unreachable("unknown bit width"); 10400 } 10401} 10402static void 10403evaluate_fsat(nir_const_value *_dst_val, 10404 MAYBE_UNUSED unsigned num_components, 10405 unsigned bit_size, 10406 MAYBE_UNUSED nir_const_value **_src) 10407{ 10408 switch (bit_size) { 10409 case 16: { 10410 10411 10412 10413 10414 for (unsigned _i = 0; _i < num_components; _i++) { 10415 const float src0 = 10416 _mesa_half_to_float(_src[0][_i].u16); 10417 10418 float16_t dst = bit_size == 64 ? ((src0 > 1.0) ? 1.0 : ((src0 <= 0.0) ? 0.0 : src0)) : ((src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)); 10419 10420 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10421 } 10422 10423 break; 10424 } 10425 case 32: { 10426 10427 10428 10429 10430 for (unsigned _i = 0; _i < num_components; _i++) { 10431 const float32_t src0 = 10432 _src[0][_i].f32; 10433 10434 float32_t dst = bit_size == 64 ? ((src0 > 1.0) ? 1.0 : ((src0 <= 0.0) ? 0.0 : src0)) : ((src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)); 10435 10436 _dst_val[_i].f32 = dst; 10437 } 10438 10439 break; 10440 } 10441 case 64: { 10442 10443 10444 10445 10446 for (unsigned _i = 0; _i < num_components; _i++) { 10447 const float64_t src0 = 10448 _src[0][_i].f64; 10449 10450 float64_t dst = bit_size == 64 ? ((src0 > 1.0) ? 1.0 : ((src0 <= 0.0) ? 0.0 : src0)) : ((src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)); 10451 10452 _dst_val[_i].f64 = dst; 10453 } 10454 10455 break; 10456 } 10457 10458 default: 10459 unreachable("unknown bit width"); 10460 } 10461} 10462static void 10463evaluate_fsign(nir_const_value *_dst_val, 10464 MAYBE_UNUSED unsigned num_components, 10465 unsigned bit_size, 10466 MAYBE_UNUSED nir_const_value **_src) 10467{ 10468 switch (bit_size) { 10469 case 16: { 10470 10471 10472 10473 10474 for (unsigned _i = 0; _i < num_components; _i++) { 10475 const float src0 = 10476 _mesa_half_to_float(_src[0][_i].u16); 10477 10478 float16_t dst = bit_size == 64 ? ((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : ((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)); 10479 10480 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10481 } 10482 10483 break; 10484 } 10485 case 32: { 10486 10487 10488 10489 10490 for (unsigned _i = 0; _i < num_components; _i++) { 10491 const float32_t src0 = 10492 _src[0][_i].f32; 10493 10494 float32_t dst = bit_size == 64 ? ((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : ((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)); 10495 10496 _dst_val[_i].f32 = dst; 10497 } 10498 10499 break; 10500 } 10501 case 64: { 10502 10503 10504 10505 10506 for (unsigned _i = 0; _i < num_components; _i++) { 10507 const float64_t src0 = 10508 _src[0][_i].f64; 10509 10510 float64_t dst = bit_size == 64 ? ((src0 == 0.0) ? 0.0 : ((src0 > 0.0) ? 1.0 : -1.0)) : ((src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)); 10511 10512 _dst_val[_i].f64 = dst; 10513 } 10514 10515 break; 10516 } 10517 10518 default: 10519 unreachable("unknown bit width"); 10520 } 10521} 10522static void 10523evaluate_fsin(nir_const_value *_dst_val, 10524 MAYBE_UNUSED unsigned num_components, 10525 unsigned bit_size, 10526 MAYBE_UNUSED nir_const_value **_src) 10527{ 10528 switch (bit_size) { 10529 case 16: { 10530 10531 10532 10533 10534 for (unsigned _i = 0; _i < num_components; _i++) { 10535 const float src0 = 10536 _mesa_half_to_float(_src[0][_i].u16); 10537 10538 float16_t dst = bit_size == 64 ? sin(src0) : sinf(src0); 10539 10540 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10541 } 10542 10543 break; 10544 } 10545 case 32: { 10546 10547 10548 10549 10550 for (unsigned _i = 0; _i < num_components; _i++) { 10551 const float32_t src0 = 10552 _src[0][_i].f32; 10553 10554 float32_t dst = bit_size == 64 ? sin(src0) : sinf(src0); 10555 10556 _dst_val[_i].f32 = dst; 10557 } 10558 10559 break; 10560 } 10561 case 64: { 10562 10563 10564 10565 10566 for (unsigned _i = 0; _i < num_components; _i++) { 10567 const float64_t src0 = 10568 _src[0][_i].f64; 10569 10570 float64_t dst = bit_size == 64 ? sin(src0) : sinf(src0); 10571 10572 _dst_val[_i].f64 = dst; 10573 } 10574 10575 break; 10576 } 10577 10578 default: 10579 unreachable("unknown bit width"); 10580 } 10581} 10582static void 10583evaluate_fsqrt(nir_const_value *_dst_val, 10584 MAYBE_UNUSED unsigned num_components, 10585 unsigned bit_size, 10586 MAYBE_UNUSED nir_const_value **_src) 10587{ 10588 switch (bit_size) { 10589 case 16: { 10590 10591 10592 10593 10594 for (unsigned _i = 0; _i < num_components; _i++) { 10595 const float src0 = 10596 _mesa_half_to_float(_src[0][_i].u16); 10597 10598 float16_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); 10599 10600 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10601 } 10602 10603 break; 10604 } 10605 case 32: { 10606 10607 10608 10609 10610 for (unsigned _i = 0; _i < num_components; _i++) { 10611 const float32_t src0 = 10612 _src[0][_i].f32; 10613 10614 float32_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); 10615 10616 _dst_val[_i].f32 = dst; 10617 } 10618 10619 break; 10620 } 10621 case 64: { 10622 10623 10624 10625 10626 for (unsigned _i = 0; _i < num_components; _i++) { 10627 const float64_t src0 = 10628 _src[0][_i].f64; 10629 10630 float64_t dst = bit_size == 64 ? sqrt(src0) : sqrtf(src0); 10631 10632 _dst_val[_i].f64 = dst; 10633 } 10634 10635 break; 10636 } 10637 10638 default: 10639 unreachable("unknown bit width"); 10640 } 10641} 10642static void 10643evaluate_fsub(nir_const_value *_dst_val, 10644 MAYBE_UNUSED unsigned num_components, 10645 unsigned bit_size, 10646 MAYBE_UNUSED nir_const_value **_src) 10647{ 10648 switch (bit_size) { 10649 case 16: { 10650 10651 10652 10653 10654 for (unsigned _i = 0; _i < num_components; _i++) { 10655 const float src0 = 10656 _mesa_half_to_float(_src[0][_i].u16); 10657 const float src1 = 10658 _mesa_half_to_float(_src[1][_i].u16); 10659 10660 float16_t dst = src0 - src1; 10661 10662 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10663 } 10664 10665 break; 10666 } 10667 case 32: { 10668 10669 10670 10671 10672 for (unsigned _i = 0; _i < num_components; _i++) { 10673 const float32_t src0 = 10674 _src[0][_i].f32; 10675 const float32_t src1 = 10676 _src[1][_i].f32; 10677 10678 float32_t dst = src0 - src1; 10679 10680 _dst_val[_i].f32 = dst; 10681 } 10682 10683 break; 10684 } 10685 case 64: { 10686 10687 10688 10689 10690 for (unsigned _i = 0; _i < num_components; _i++) { 10691 const float64_t src0 = 10692 _src[0][_i].f64; 10693 const float64_t src1 = 10694 _src[1][_i].f64; 10695 10696 float64_t dst = src0 - src1; 10697 10698 _dst_val[_i].f64 = dst; 10699 } 10700 10701 break; 10702 } 10703 10704 default: 10705 unreachable("unknown bit width"); 10706 } 10707} 10708static void 10709evaluate_ftrunc(nir_const_value *_dst_val, 10710 MAYBE_UNUSED unsigned num_components, 10711 unsigned bit_size, 10712 MAYBE_UNUSED nir_const_value **_src) 10713{ 10714 switch (bit_size) { 10715 case 16: { 10716 10717 10718 10719 10720 for (unsigned _i = 0; _i < num_components; _i++) { 10721 const float src0 = 10722 _mesa_half_to_float(_src[0][_i].u16); 10723 10724 float16_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); 10725 10726 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10727 } 10728 10729 break; 10730 } 10731 case 32: { 10732 10733 10734 10735 10736 for (unsigned _i = 0; _i < num_components; _i++) { 10737 const float32_t src0 = 10738 _src[0][_i].f32; 10739 10740 float32_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); 10741 10742 _dst_val[_i].f32 = dst; 10743 } 10744 10745 break; 10746 } 10747 case 64: { 10748 10749 10750 10751 10752 for (unsigned _i = 0; _i < num_components; _i++) { 10753 const float64_t src0 = 10754 _src[0][_i].f64; 10755 10756 float64_t dst = bit_size == 64 ? trunc(src0) : truncf(src0); 10757 10758 _dst_val[_i].f64 = dst; 10759 } 10760 10761 break; 10762 } 10763 10764 default: 10765 unreachable("unknown bit width"); 10766 } 10767} 10768static void 10769evaluate_fxor(nir_const_value *_dst_val, 10770 MAYBE_UNUSED unsigned num_components, 10771 UNUSED unsigned bit_size, 10772 MAYBE_UNUSED nir_const_value **_src) 10773{ 10774 10775 10776 10777 10778 for (unsigned _i = 0; _i < num_components; _i++) { 10779 const float32_t src0 = 10780 _src[0][_i].f32; 10781 const float32_t src1 = 10782 _src[1][_i].f32; 10783 10784 float32_t dst = (src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f; 10785 10786 _dst_val[_i].f32 = dst; 10787 } 10788 10789} 10790static void 10791evaluate_i2b1(nir_const_value *_dst_val, 10792 MAYBE_UNUSED unsigned num_components, 10793 unsigned bit_size, 10794 MAYBE_UNUSED nir_const_value **_src) 10795{ 10796 switch (bit_size) { 10797 case 1: { 10798 10799 10800 10801 10802 for (unsigned _i = 0; _i < num_components; _i++) { 10803 /* 1-bit integers use a 0/-1 convention */ 10804 const int1_t src0 = -(int1_t)_src[0][_i].b; 10805 10806 bool1_t dst = src0 != 0; 10807 10808 _dst_val[_i].b = -(int)dst; 10809 } 10810 10811 break; 10812 } 10813 case 8: { 10814 10815 10816 10817 10818 for (unsigned _i = 0; _i < num_components; _i++) { 10819 const int8_t src0 = 10820 _src[0][_i].i8; 10821 10822 bool1_t dst = src0 != 0; 10823 10824 _dst_val[_i].b = -(int)dst; 10825 } 10826 10827 break; 10828 } 10829 case 16: { 10830 10831 10832 10833 10834 for (unsigned _i = 0; _i < num_components; _i++) { 10835 const int16_t src0 = 10836 _src[0][_i].i16; 10837 10838 bool1_t dst = src0 != 0; 10839 10840 _dst_val[_i].b = -(int)dst; 10841 } 10842 10843 break; 10844 } 10845 case 32: { 10846 10847 10848 10849 10850 for (unsigned _i = 0; _i < num_components; _i++) { 10851 const int32_t src0 = 10852 _src[0][_i].i32; 10853 10854 bool1_t dst = src0 != 0; 10855 10856 _dst_val[_i].b = -(int)dst; 10857 } 10858 10859 break; 10860 } 10861 case 64: { 10862 10863 10864 10865 10866 for (unsigned _i = 0; _i < num_components; _i++) { 10867 const int64_t src0 = 10868 _src[0][_i].i64; 10869 10870 bool1_t dst = src0 != 0; 10871 10872 _dst_val[_i].b = -(int)dst; 10873 } 10874 10875 break; 10876 } 10877 10878 default: 10879 unreachable("unknown bit width"); 10880 } 10881} 10882static void 10883evaluate_i2b32(nir_const_value *_dst_val, 10884 MAYBE_UNUSED unsigned num_components, 10885 unsigned bit_size, 10886 MAYBE_UNUSED nir_const_value **_src) 10887{ 10888 switch (bit_size) { 10889 case 1: { 10890 10891 10892 10893 10894 for (unsigned _i = 0; _i < num_components; _i++) { 10895 /* 1-bit integers use a 0/-1 convention */ 10896 const int1_t src0 = -(int1_t)_src[0][_i].b; 10897 10898 bool32_t dst = src0 != 0; 10899 10900 _dst_val[_i].i32 = -(int)dst; 10901 } 10902 10903 break; 10904 } 10905 case 8: { 10906 10907 10908 10909 10910 for (unsigned _i = 0; _i < num_components; _i++) { 10911 const int8_t src0 = 10912 _src[0][_i].i8; 10913 10914 bool32_t dst = src0 != 0; 10915 10916 _dst_val[_i].i32 = -(int)dst; 10917 } 10918 10919 break; 10920 } 10921 case 16: { 10922 10923 10924 10925 10926 for (unsigned _i = 0; _i < num_components; _i++) { 10927 const int16_t src0 = 10928 _src[0][_i].i16; 10929 10930 bool32_t dst = src0 != 0; 10931 10932 _dst_val[_i].i32 = -(int)dst; 10933 } 10934 10935 break; 10936 } 10937 case 32: { 10938 10939 10940 10941 10942 for (unsigned _i = 0; _i < num_components; _i++) { 10943 const int32_t src0 = 10944 _src[0][_i].i32; 10945 10946 bool32_t dst = src0 != 0; 10947 10948 _dst_val[_i].i32 = -(int)dst; 10949 } 10950 10951 break; 10952 } 10953 case 64: { 10954 10955 10956 10957 10958 for (unsigned _i = 0; _i < num_components; _i++) { 10959 const int64_t src0 = 10960 _src[0][_i].i64; 10961 10962 bool32_t dst = src0 != 0; 10963 10964 _dst_val[_i].i32 = -(int)dst; 10965 } 10966 10967 break; 10968 } 10969 10970 default: 10971 unreachable("unknown bit width"); 10972 } 10973} 10974static void 10975evaluate_i2f16(nir_const_value *_dst_val, 10976 MAYBE_UNUSED unsigned num_components, 10977 unsigned bit_size, 10978 MAYBE_UNUSED nir_const_value **_src) 10979{ 10980 switch (bit_size) { 10981 case 1: { 10982 10983 10984 10985 10986 for (unsigned _i = 0; _i < num_components; _i++) { 10987 /* 1-bit integers use a 0/-1 convention */ 10988 const int1_t src0 = -(int1_t)_src[0][_i].b; 10989 10990 float16_t dst = src0; 10991 10992 _dst_val[_i].u16 = _mesa_float_to_half(dst); 10993 } 10994 10995 break; 10996 } 10997 case 8: { 10998 10999 11000 11001 11002 for (unsigned _i = 0; _i < num_components; _i++) { 11003 const int8_t src0 = 11004 _src[0][_i].i8; 11005 11006 float16_t dst = src0; 11007 11008 _dst_val[_i].u16 = _mesa_float_to_half(dst); 11009 } 11010 11011 break; 11012 } 11013 case 16: { 11014 11015 11016 11017 11018 for (unsigned _i = 0; _i < num_components; _i++) { 11019 const int16_t src0 = 11020 _src[0][_i].i16; 11021 11022 float16_t dst = src0; 11023 11024 _dst_val[_i].u16 = _mesa_float_to_half(dst); 11025 } 11026 11027 break; 11028 } 11029 case 32: { 11030 11031 11032 11033 11034 for (unsigned _i = 0; _i < num_components; _i++) { 11035 const int32_t src0 = 11036 _src[0][_i].i32; 11037 11038 float16_t dst = src0; 11039 11040 _dst_val[_i].u16 = _mesa_float_to_half(dst); 11041 } 11042 11043 break; 11044 } 11045 case 64: { 11046 11047 11048 11049 11050 for (unsigned _i = 0; _i < num_components; _i++) { 11051 const int64_t src0 = 11052 _src[0][_i].i64; 11053 11054 float16_t dst = src0; 11055 11056 _dst_val[_i].u16 = _mesa_float_to_half(dst); 11057 } 11058 11059 break; 11060 } 11061 11062 default: 11063 unreachable("unknown bit width"); 11064 } 11065} 11066static void 11067evaluate_i2f32(nir_const_value *_dst_val, 11068 MAYBE_UNUSED unsigned num_components, 11069 unsigned bit_size, 11070 MAYBE_UNUSED nir_const_value **_src) 11071{ 11072 switch (bit_size) { 11073 case 1: { 11074 11075 11076 11077 11078 for (unsigned _i = 0; _i < num_components; _i++) { 11079 /* 1-bit integers use a 0/-1 convention */ 11080 const int1_t src0 = -(int1_t)_src[0][_i].b; 11081 11082 float32_t dst = src0; 11083 11084 _dst_val[_i].f32 = dst; 11085 } 11086 11087 break; 11088 } 11089 case 8: { 11090 11091 11092 11093 11094 for (unsigned _i = 0; _i < num_components; _i++) { 11095 const int8_t src0 = 11096 _src[0][_i].i8; 11097 11098 float32_t dst = src0; 11099 11100 _dst_val[_i].f32 = dst; 11101 } 11102 11103 break; 11104 } 11105 case 16: { 11106 11107 11108 11109 11110 for (unsigned _i = 0; _i < num_components; _i++) { 11111 const int16_t src0 = 11112 _src[0][_i].i16; 11113 11114 float32_t dst = src0; 11115 11116 _dst_val[_i].f32 = dst; 11117 } 11118 11119 break; 11120 } 11121 case 32: { 11122 11123 11124 11125 11126 for (unsigned _i = 0; _i < num_components; _i++) { 11127 const int32_t src0 = 11128 _src[0][_i].i32; 11129 11130 float32_t dst = src0; 11131 11132 _dst_val[_i].f32 = dst; 11133 } 11134 11135 break; 11136 } 11137 case 64: { 11138 11139 11140 11141 11142 for (unsigned _i = 0; _i < num_components; _i++) { 11143 const int64_t src0 = 11144 _src[0][_i].i64; 11145 11146 float32_t dst = src0; 11147 11148 _dst_val[_i].f32 = dst; 11149 } 11150 11151 break; 11152 } 11153 11154 default: 11155 unreachable("unknown bit width"); 11156 } 11157} 11158static void 11159evaluate_i2f64(nir_const_value *_dst_val, 11160 MAYBE_UNUSED unsigned num_components, 11161 unsigned bit_size, 11162 MAYBE_UNUSED nir_const_value **_src) 11163{ 11164 switch (bit_size) { 11165 case 1: { 11166 11167 11168 11169 11170 for (unsigned _i = 0; _i < num_components; _i++) { 11171 /* 1-bit integers use a 0/-1 convention */ 11172 const int1_t src0 = -(int1_t)_src[0][_i].b; 11173 11174 float64_t dst = src0; 11175 11176 _dst_val[_i].f64 = dst; 11177 } 11178 11179 break; 11180 } 11181 case 8: { 11182 11183 11184 11185 11186 for (unsigned _i = 0; _i < num_components; _i++) { 11187 const int8_t src0 = 11188 _src[0][_i].i8; 11189 11190 float64_t dst = src0; 11191 11192 _dst_val[_i].f64 = dst; 11193 } 11194 11195 break; 11196 } 11197 case 16: { 11198 11199 11200 11201 11202 for (unsigned _i = 0; _i < num_components; _i++) { 11203 const int16_t src0 = 11204 _src[0][_i].i16; 11205 11206 float64_t dst = src0; 11207 11208 _dst_val[_i].f64 = dst; 11209 } 11210 11211 break; 11212 } 11213 case 32: { 11214 11215 11216 11217 11218 for (unsigned _i = 0; _i < num_components; _i++) { 11219 const int32_t src0 = 11220 _src[0][_i].i32; 11221 11222 float64_t dst = src0; 11223 11224 _dst_val[_i].f64 = dst; 11225 } 11226 11227 break; 11228 } 11229 case 64: { 11230 11231 11232 11233 11234 for (unsigned _i = 0; _i < num_components; _i++) { 11235 const int64_t src0 = 11236 _src[0][_i].i64; 11237 11238 float64_t dst = src0; 11239 11240 _dst_val[_i].f64 = dst; 11241 } 11242 11243 break; 11244 } 11245 11246 default: 11247 unreachable("unknown bit width"); 11248 } 11249} 11250static void 11251evaluate_i2i1(nir_const_value *_dst_val, 11252 MAYBE_UNUSED unsigned num_components, 11253 unsigned bit_size, 11254 MAYBE_UNUSED nir_const_value **_src) 11255{ 11256 switch (bit_size) { 11257 case 1: { 11258 11259 11260 11261 11262 for (unsigned _i = 0; _i < num_components; _i++) { 11263 /* 1-bit integers use a 0/-1 convention */ 11264 const int1_t src0 = -(int1_t)_src[0][_i].b; 11265 11266 int1_t dst = src0; 11267 11268 /* 1-bit integers get truncated */ 11269 _dst_val[_i].b = dst & 1; 11270 } 11271 11272 break; 11273 } 11274 case 8: { 11275 11276 11277 11278 11279 for (unsigned _i = 0; _i < num_components; _i++) { 11280 const int8_t src0 = 11281 _src[0][_i].i8; 11282 11283 int1_t dst = src0; 11284 11285 /* 1-bit integers get truncated */ 11286 _dst_val[_i].b = dst & 1; 11287 } 11288 11289 break; 11290 } 11291 case 16: { 11292 11293 11294 11295 11296 for (unsigned _i = 0; _i < num_components; _i++) { 11297 const int16_t src0 = 11298 _src[0][_i].i16; 11299 11300 int1_t dst = src0; 11301 11302 /* 1-bit integers get truncated */ 11303 _dst_val[_i].b = dst & 1; 11304 } 11305 11306 break; 11307 } 11308 case 32: { 11309 11310 11311 11312 11313 for (unsigned _i = 0; _i < num_components; _i++) { 11314 const int32_t src0 = 11315 _src[0][_i].i32; 11316 11317 int1_t dst = src0; 11318 11319 /* 1-bit integers get truncated */ 11320 _dst_val[_i].b = dst & 1; 11321 } 11322 11323 break; 11324 } 11325 case 64: { 11326 11327 11328 11329 11330 for (unsigned _i = 0; _i < num_components; _i++) { 11331 const int64_t src0 = 11332 _src[0][_i].i64; 11333 11334 int1_t dst = src0; 11335 11336 /* 1-bit integers get truncated */ 11337 _dst_val[_i].b = dst & 1; 11338 } 11339 11340 break; 11341 } 11342 11343 default: 11344 unreachable("unknown bit width"); 11345 } 11346} 11347static void 11348evaluate_i2i16(nir_const_value *_dst_val, 11349 MAYBE_UNUSED unsigned num_components, 11350 unsigned bit_size, 11351 MAYBE_UNUSED nir_const_value **_src) 11352{ 11353 switch (bit_size) { 11354 case 1: { 11355 11356 11357 11358 11359 for (unsigned _i = 0; _i < num_components; _i++) { 11360 /* 1-bit integers use a 0/-1 convention */ 11361 const int1_t src0 = -(int1_t)_src[0][_i].b; 11362 11363 int16_t dst = src0; 11364 11365 _dst_val[_i].i16 = dst; 11366 } 11367 11368 break; 11369 } 11370 case 8: { 11371 11372 11373 11374 11375 for (unsigned _i = 0; _i < num_components; _i++) { 11376 const int8_t src0 = 11377 _src[0][_i].i8; 11378 11379 int16_t dst = src0; 11380 11381 _dst_val[_i].i16 = dst; 11382 } 11383 11384 break; 11385 } 11386 case 16: { 11387 11388 11389 11390 11391 for (unsigned _i = 0; _i < num_components; _i++) { 11392 const int16_t src0 = 11393 _src[0][_i].i16; 11394 11395 int16_t dst = src0; 11396 11397 _dst_val[_i].i16 = dst; 11398 } 11399 11400 break; 11401 } 11402 case 32: { 11403 11404 11405 11406 11407 for (unsigned _i = 0; _i < num_components; _i++) { 11408 const int32_t src0 = 11409 _src[0][_i].i32; 11410 11411 int16_t dst = src0; 11412 11413 _dst_val[_i].i16 = dst; 11414 } 11415 11416 break; 11417 } 11418 case 64: { 11419 11420 11421 11422 11423 for (unsigned _i = 0; _i < num_components; _i++) { 11424 const int64_t src0 = 11425 _src[0][_i].i64; 11426 11427 int16_t dst = src0; 11428 11429 _dst_val[_i].i16 = dst; 11430 } 11431 11432 break; 11433 } 11434 11435 default: 11436 unreachable("unknown bit width"); 11437 } 11438} 11439static void 11440evaluate_i2i32(nir_const_value *_dst_val, 11441 MAYBE_UNUSED unsigned num_components, 11442 unsigned bit_size, 11443 MAYBE_UNUSED nir_const_value **_src) 11444{ 11445 switch (bit_size) { 11446 case 1: { 11447 11448 11449 11450 11451 for (unsigned _i = 0; _i < num_components; _i++) { 11452 /* 1-bit integers use a 0/-1 convention */ 11453 const int1_t src0 = -(int1_t)_src[0][_i].b; 11454 11455 int32_t dst = src0; 11456 11457 _dst_val[_i].i32 = dst; 11458 } 11459 11460 break; 11461 } 11462 case 8: { 11463 11464 11465 11466 11467 for (unsigned _i = 0; _i < num_components; _i++) { 11468 const int8_t src0 = 11469 _src[0][_i].i8; 11470 11471 int32_t dst = src0; 11472 11473 _dst_val[_i].i32 = dst; 11474 } 11475 11476 break; 11477 } 11478 case 16: { 11479 11480 11481 11482 11483 for (unsigned _i = 0; _i < num_components; _i++) { 11484 const int16_t src0 = 11485 _src[0][_i].i16; 11486 11487 int32_t dst = src0; 11488 11489 _dst_val[_i].i32 = dst; 11490 } 11491 11492 break; 11493 } 11494 case 32: { 11495 11496 11497 11498 11499 for (unsigned _i = 0; _i < num_components; _i++) { 11500 const int32_t src0 = 11501 _src[0][_i].i32; 11502 11503 int32_t dst = src0; 11504 11505 _dst_val[_i].i32 = dst; 11506 } 11507 11508 break; 11509 } 11510 case 64: { 11511 11512 11513 11514 11515 for (unsigned _i = 0; _i < num_components; _i++) { 11516 const int64_t src0 = 11517 _src[0][_i].i64; 11518 11519 int32_t dst = src0; 11520 11521 _dst_val[_i].i32 = dst; 11522 } 11523 11524 break; 11525 } 11526 11527 default: 11528 unreachable("unknown bit width"); 11529 } 11530} 11531static void 11532evaluate_i2i64(nir_const_value *_dst_val, 11533 MAYBE_UNUSED unsigned num_components, 11534 unsigned bit_size, 11535 MAYBE_UNUSED nir_const_value **_src) 11536{ 11537 switch (bit_size) { 11538 case 1: { 11539 11540 11541 11542 11543 for (unsigned _i = 0; _i < num_components; _i++) { 11544 /* 1-bit integers use a 0/-1 convention */ 11545 const int1_t src0 = -(int1_t)_src[0][_i].b; 11546 11547 int64_t dst = src0; 11548 11549 _dst_val[_i].i64 = dst; 11550 } 11551 11552 break; 11553 } 11554 case 8: { 11555 11556 11557 11558 11559 for (unsigned _i = 0; _i < num_components; _i++) { 11560 const int8_t src0 = 11561 _src[0][_i].i8; 11562 11563 int64_t dst = src0; 11564 11565 _dst_val[_i].i64 = dst; 11566 } 11567 11568 break; 11569 } 11570 case 16: { 11571 11572 11573 11574 11575 for (unsigned _i = 0; _i < num_components; _i++) { 11576 const int16_t src0 = 11577 _src[0][_i].i16; 11578 11579 int64_t dst = src0; 11580 11581 _dst_val[_i].i64 = dst; 11582 } 11583 11584 break; 11585 } 11586 case 32: { 11587 11588 11589 11590 11591 for (unsigned _i = 0; _i < num_components; _i++) { 11592 const int32_t src0 = 11593 _src[0][_i].i32; 11594 11595 int64_t dst = src0; 11596 11597 _dst_val[_i].i64 = dst; 11598 } 11599 11600 break; 11601 } 11602 case 64: { 11603 11604 11605 11606 11607 for (unsigned _i = 0; _i < num_components; _i++) { 11608 const int64_t src0 = 11609 _src[0][_i].i64; 11610 11611 int64_t dst = src0; 11612 11613 _dst_val[_i].i64 = dst; 11614 } 11615 11616 break; 11617 } 11618 11619 default: 11620 unreachable("unknown bit width"); 11621 } 11622} 11623static void 11624evaluate_i2i8(nir_const_value *_dst_val, 11625 MAYBE_UNUSED unsigned num_components, 11626 unsigned bit_size, 11627 MAYBE_UNUSED nir_const_value **_src) 11628{ 11629 switch (bit_size) { 11630 case 1: { 11631 11632 11633 11634 11635 for (unsigned _i = 0; _i < num_components; _i++) { 11636 /* 1-bit integers use a 0/-1 convention */ 11637 const int1_t src0 = -(int1_t)_src[0][_i].b; 11638 11639 int8_t dst = src0; 11640 11641 _dst_val[_i].i8 = dst; 11642 } 11643 11644 break; 11645 } 11646 case 8: { 11647 11648 11649 11650 11651 for (unsigned _i = 0; _i < num_components; _i++) { 11652 const int8_t src0 = 11653 _src[0][_i].i8; 11654 11655 int8_t dst = src0; 11656 11657 _dst_val[_i].i8 = dst; 11658 } 11659 11660 break; 11661 } 11662 case 16: { 11663 11664 11665 11666 11667 for (unsigned _i = 0; _i < num_components; _i++) { 11668 const int16_t src0 = 11669 _src[0][_i].i16; 11670 11671 int8_t dst = src0; 11672 11673 _dst_val[_i].i8 = dst; 11674 } 11675 11676 break; 11677 } 11678 case 32: { 11679 11680 11681 11682 11683 for (unsigned _i = 0; _i < num_components; _i++) { 11684 const int32_t src0 = 11685 _src[0][_i].i32; 11686 11687 int8_t dst = src0; 11688 11689 _dst_val[_i].i8 = dst; 11690 } 11691 11692 break; 11693 } 11694 case 64: { 11695 11696 11697 11698 11699 for (unsigned _i = 0; _i < num_components; _i++) { 11700 const int64_t src0 = 11701 _src[0][_i].i64; 11702 11703 int8_t dst = src0; 11704 11705 _dst_val[_i].i8 = dst; 11706 } 11707 11708 break; 11709 } 11710 11711 default: 11712 unreachable("unknown bit width"); 11713 } 11714} 11715static void 11716evaluate_iabs(nir_const_value *_dst_val, 11717 MAYBE_UNUSED unsigned num_components, 11718 unsigned bit_size, 11719 MAYBE_UNUSED nir_const_value **_src) 11720{ 11721 switch (bit_size) { 11722 case 1: { 11723 11724 11725 11726 11727 for (unsigned _i = 0; _i < num_components; _i++) { 11728 /* 1-bit integers use a 0/-1 convention */ 11729 const int1_t src0 = -(int1_t)_src[0][_i].b; 11730 11731 int1_t dst = (src0 < 0) ? -src0 : src0; 11732 11733 /* 1-bit integers get truncated */ 11734 _dst_val[_i].b = dst & 1; 11735 } 11736 11737 break; 11738 } 11739 case 8: { 11740 11741 11742 11743 11744 for (unsigned _i = 0; _i < num_components; _i++) { 11745 const int8_t src0 = 11746 _src[0][_i].i8; 11747 11748 int8_t dst = (src0 < 0) ? -src0 : src0; 11749 11750 _dst_val[_i].i8 = dst; 11751 } 11752 11753 break; 11754 } 11755 case 16: { 11756 11757 11758 11759 11760 for (unsigned _i = 0; _i < num_components; _i++) { 11761 const int16_t src0 = 11762 _src[0][_i].i16; 11763 11764 int16_t dst = (src0 < 0) ? -src0 : src0; 11765 11766 _dst_val[_i].i16 = dst; 11767 } 11768 11769 break; 11770 } 11771 case 32: { 11772 11773 11774 11775 11776 for (unsigned _i = 0; _i < num_components; _i++) { 11777 const int32_t src0 = 11778 _src[0][_i].i32; 11779 11780 int32_t dst = (src0 < 0) ? -src0 : src0; 11781 11782 _dst_val[_i].i32 = dst; 11783 } 11784 11785 break; 11786 } 11787 case 64: { 11788 11789 11790 11791 11792 for (unsigned _i = 0; _i < num_components; _i++) { 11793 const int64_t src0 = 11794 _src[0][_i].i64; 11795 11796 int64_t dst = (src0 < 0) ? -src0 : src0; 11797 11798 _dst_val[_i].i64 = dst; 11799 } 11800 11801 break; 11802 } 11803 11804 default: 11805 unreachable("unknown bit width"); 11806 } 11807} 11808static void 11809evaluate_iadd(nir_const_value *_dst_val, 11810 MAYBE_UNUSED unsigned num_components, 11811 unsigned bit_size, 11812 MAYBE_UNUSED nir_const_value **_src) 11813{ 11814 switch (bit_size) { 11815 case 1: { 11816 11817 11818 11819 11820 for (unsigned _i = 0; _i < num_components; _i++) { 11821 /* 1-bit integers use a 0/-1 convention */ 11822 const int1_t src0 = -(int1_t)_src[0][_i].b; 11823 /* 1-bit integers use a 0/-1 convention */ 11824 const int1_t src1 = -(int1_t)_src[1][_i].b; 11825 11826 int1_t dst = src0 + src1; 11827 11828 /* 1-bit integers get truncated */ 11829 _dst_val[_i].b = dst & 1; 11830 } 11831 11832 break; 11833 } 11834 case 8: { 11835 11836 11837 11838 11839 for (unsigned _i = 0; _i < num_components; _i++) { 11840 const int8_t src0 = 11841 _src[0][_i].i8; 11842 const int8_t src1 = 11843 _src[1][_i].i8; 11844 11845 int8_t dst = src0 + src1; 11846 11847 _dst_val[_i].i8 = dst; 11848 } 11849 11850 break; 11851 } 11852 case 16: { 11853 11854 11855 11856 11857 for (unsigned _i = 0; _i < num_components; _i++) { 11858 const int16_t src0 = 11859 _src[0][_i].i16; 11860 const int16_t src1 = 11861 _src[1][_i].i16; 11862 11863 int16_t dst = src0 + src1; 11864 11865 _dst_val[_i].i16 = dst; 11866 } 11867 11868 break; 11869 } 11870 case 32: { 11871 11872 11873 11874 11875 for (unsigned _i = 0; _i < num_components; _i++) { 11876 const int32_t src0 = 11877 _src[0][_i].i32; 11878 const int32_t src1 = 11879 _src[1][_i].i32; 11880 11881 int32_t dst = src0 + src1; 11882 11883 _dst_val[_i].i32 = dst; 11884 } 11885 11886 break; 11887 } 11888 case 64: { 11889 11890 11891 11892 11893 for (unsigned _i = 0; _i < num_components; _i++) { 11894 const int64_t src0 = 11895 _src[0][_i].i64; 11896 const int64_t src1 = 11897 _src[1][_i].i64; 11898 11899 int64_t dst = src0 + src1; 11900 11901 _dst_val[_i].i64 = dst; 11902 } 11903 11904 break; 11905 } 11906 11907 default: 11908 unreachable("unknown bit width"); 11909 } 11910} 11911static void 11912evaluate_iadd_sat(nir_const_value *_dst_val, 11913 MAYBE_UNUSED unsigned num_components, 11914 unsigned bit_size, 11915 MAYBE_UNUSED nir_const_value **_src) 11916{ 11917 switch (bit_size) { 11918 case 1: { 11919 11920 11921 11922 11923 for (unsigned _i = 0; _i < num_components; _i++) { 11924 /* 1-bit integers use a 0/-1 convention */ 11925 const int1_t src0 = -(int1_t)_src[0][_i].b; 11926 /* 1-bit integers use a 0/-1 convention */ 11927 const int1_t src1 = -(int1_t)_src[1][_i].b; 11928 11929 int1_t dst = 11930 src1 > 0 ? 11931 (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : 11932 (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) 11933; 11934 11935 /* 1-bit integers get truncated */ 11936 _dst_val[_i].b = dst & 1; 11937 } 11938 11939 break; 11940 } 11941 case 8: { 11942 11943 11944 11945 11946 for (unsigned _i = 0; _i < num_components; _i++) { 11947 const int8_t src0 = 11948 _src[0][_i].i8; 11949 const int8_t src1 = 11950 _src[1][_i].i8; 11951 11952 int8_t dst = 11953 src1 > 0 ? 11954 (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : 11955 (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) 11956; 11957 11958 _dst_val[_i].i8 = dst; 11959 } 11960 11961 break; 11962 } 11963 case 16: { 11964 11965 11966 11967 11968 for (unsigned _i = 0; _i < num_components; _i++) { 11969 const int16_t src0 = 11970 _src[0][_i].i16; 11971 const int16_t src1 = 11972 _src[1][_i].i16; 11973 11974 int16_t dst = 11975 src1 > 0 ? 11976 (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : 11977 (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) 11978; 11979 11980 _dst_val[_i].i16 = dst; 11981 } 11982 11983 break; 11984 } 11985 case 32: { 11986 11987 11988 11989 11990 for (unsigned _i = 0; _i < num_components; _i++) { 11991 const int32_t src0 = 11992 _src[0][_i].i32; 11993 const int32_t src1 = 11994 _src[1][_i].i32; 11995 11996 int32_t dst = 11997 src1 > 0 ? 11998 (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : 11999 (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) 12000; 12001 12002 _dst_val[_i].i32 = dst; 12003 } 12004 12005 break; 12006 } 12007 case 64: { 12008 12009 12010 12011 12012 for (unsigned _i = 0; _i < num_components; _i++) { 12013 const int64_t src0 = 12014 _src[0][_i].i64; 12015 const int64_t src1 = 12016 _src[1][_i].i64; 12017 12018 int64_t dst = 12019 src1 > 0 ? 12020 (src0 + src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 + src1) : 12021 (src0 < src0 + src1 ? (1ull << (bit_size - 1)) : src0 + src1) 12022; 12023 12024 _dst_val[_i].i64 = dst; 12025 } 12026 12027 break; 12028 } 12029 12030 default: 12031 unreachable("unknown bit width"); 12032 } 12033} 12034static void 12035evaluate_iand(nir_const_value *_dst_val, 12036 MAYBE_UNUSED unsigned num_components, 12037 unsigned bit_size, 12038 MAYBE_UNUSED nir_const_value **_src) 12039{ 12040 switch (bit_size) { 12041 case 1: { 12042 12043 12044 12045 12046 for (unsigned _i = 0; _i < num_components; _i++) { 12047 const uint1_t src0 = 12048 _src[0][_i].b; 12049 const uint1_t src1 = 12050 _src[1][_i].b; 12051 12052 uint1_t dst = src0 & src1; 12053 12054 /* 1-bit integers get truncated */ 12055 _dst_val[_i].b = dst & 1; 12056 } 12057 12058 break; 12059 } 12060 case 8: { 12061 12062 12063 12064 12065 for (unsigned _i = 0; _i < num_components; _i++) { 12066 const uint8_t src0 = 12067 _src[0][_i].u8; 12068 const uint8_t src1 = 12069 _src[1][_i].u8; 12070 12071 uint8_t dst = src0 & src1; 12072 12073 _dst_val[_i].u8 = dst; 12074 } 12075 12076 break; 12077 } 12078 case 16: { 12079 12080 12081 12082 12083 for (unsigned _i = 0; _i < num_components; _i++) { 12084 const uint16_t src0 = 12085 _src[0][_i].u16; 12086 const uint16_t src1 = 12087 _src[1][_i].u16; 12088 12089 uint16_t dst = src0 & src1; 12090 12091 _dst_val[_i].u16 = dst; 12092 } 12093 12094 break; 12095 } 12096 case 32: { 12097 12098 12099 12100 12101 for (unsigned _i = 0; _i < num_components; _i++) { 12102 const uint32_t src0 = 12103 _src[0][_i].u32; 12104 const uint32_t src1 = 12105 _src[1][_i].u32; 12106 12107 uint32_t dst = src0 & src1; 12108 12109 _dst_val[_i].u32 = dst; 12110 } 12111 12112 break; 12113 } 12114 case 64: { 12115 12116 12117 12118 12119 for (unsigned _i = 0; _i < num_components; _i++) { 12120 const uint64_t src0 = 12121 _src[0][_i].u64; 12122 const uint64_t src1 = 12123 _src[1][_i].u64; 12124 12125 uint64_t dst = src0 & src1; 12126 12127 _dst_val[_i].u64 = dst; 12128 } 12129 12130 break; 12131 } 12132 12133 default: 12134 unreachable("unknown bit width"); 12135 } 12136} 12137static void 12138evaluate_ibfe(nir_const_value *_dst_val, 12139 MAYBE_UNUSED unsigned num_components, 12140 UNUSED unsigned bit_size, 12141 MAYBE_UNUSED nir_const_value **_src) 12142{ 12143 12144 12145 12146 12147 for (unsigned _i = 0; _i < num_components; _i++) { 12148 const int32_t src0 = 12149 _src[0][_i].i32; 12150 const int32_t src1 = 12151 _src[1][_i].i32; 12152 const int32_t src2 = 12153 _src[2][_i].i32; 12154 12155 int32_t dst; 12156 12157 12158int base = src0; 12159int offset = src1, bits = src2; 12160if (bits == 0) { 12161 dst = 0; 12162} else if (bits < 0 || offset < 0) { 12163 dst = 0; /* undefined */ 12164} else if (offset + bits < 32) { 12165 dst = (base << (32 - bits - offset)) >> (32 - bits); 12166} else { 12167 dst = base >> offset; 12168} 12169 12170 12171 _dst_val[_i].i32 = dst; 12172 } 12173 12174} 12175static void 12176evaluate_ibitfield_extract(nir_const_value *_dst_val, 12177 MAYBE_UNUSED unsigned num_components, 12178 UNUSED unsigned bit_size, 12179 MAYBE_UNUSED nir_const_value **_src) 12180{ 12181 12182 12183 12184 12185 for (unsigned _i = 0; _i < num_components; _i++) { 12186 const int32_t src0 = 12187 _src[0][_i].i32; 12188 const int32_t src1 = 12189 _src[1][_i].i32; 12190 const int32_t src2 = 12191 _src[2][_i].i32; 12192 12193 int32_t dst; 12194 12195 12196int base = src0; 12197int offset = src1, bits = src2; 12198if (bits == 0) { 12199 dst = 0; 12200} else if (offset < 0 || bits < 0 || offset + bits > 32) { 12201 dst = 0; 12202} else { 12203 dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */ 12204} 12205 12206 12207 _dst_val[_i].i32 = dst; 12208 } 12209 12210} 12211static void 12212evaluate_idiv(nir_const_value *_dst_val, 12213 MAYBE_UNUSED unsigned num_components, 12214 unsigned bit_size, 12215 MAYBE_UNUSED nir_const_value **_src) 12216{ 12217 switch (bit_size) { 12218 case 1: { 12219 12220 12221 12222 12223 for (unsigned _i = 0; _i < num_components; _i++) { 12224 /* 1-bit integers use a 0/-1 convention */ 12225 const int1_t src0 = -(int1_t)_src[0][_i].b; 12226 /* 1-bit integers use a 0/-1 convention */ 12227 const int1_t src1 = -(int1_t)_src[1][_i].b; 12228 12229 int1_t dst = src1 == 0 ? 0 : (src0 / src1); 12230 12231 /* 1-bit integers get truncated */ 12232 _dst_val[_i].b = dst & 1; 12233 } 12234 12235 break; 12236 } 12237 case 8: { 12238 12239 12240 12241 12242 for (unsigned _i = 0; _i < num_components; _i++) { 12243 const int8_t src0 = 12244 _src[0][_i].i8; 12245 const int8_t src1 = 12246 _src[1][_i].i8; 12247 12248 int8_t dst = src1 == 0 ? 0 : (src0 / src1); 12249 12250 _dst_val[_i].i8 = dst; 12251 } 12252 12253 break; 12254 } 12255 case 16: { 12256 12257 12258 12259 12260 for (unsigned _i = 0; _i < num_components; _i++) { 12261 const int16_t src0 = 12262 _src[0][_i].i16; 12263 const int16_t src1 = 12264 _src[1][_i].i16; 12265 12266 int16_t dst = src1 == 0 ? 0 : (src0 / src1); 12267 12268 _dst_val[_i].i16 = dst; 12269 } 12270 12271 break; 12272 } 12273 case 32: { 12274 12275 12276 12277 12278 for (unsigned _i = 0; _i < num_components; _i++) { 12279 const int32_t src0 = 12280 _src[0][_i].i32; 12281 const int32_t src1 = 12282 _src[1][_i].i32; 12283 12284 int32_t dst = src1 == 0 ? 0 : (src0 / src1); 12285 12286 _dst_val[_i].i32 = dst; 12287 } 12288 12289 break; 12290 } 12291 case 64: { 12292 12293 12294 12295 12296 for (unsigned _i = 0; _i < num_components; _i++) { 12297 const int64_t src0 = 12298 _src[0][_i].i64; 12299 const int64_t src1 = 12300 _src[1][_i].i64; 12301 12302 int64_t dst = src1 == 0 ? 0 : (src0 / src1); 12303 12304 _dst_val[_i].i64 = dst; 12305 } 12306 12307 break; 12308 } 12309 12310 default: 12311 unreachable("unknown bit width"); 12312 } 12313} 12314static void 12315evaluate_ieq(nir_const_value *_dst_val, 12316 MAYBE_UNUSED unsigned num_components, 12317 unsigned bit_size, 12318 MAYBE_UNUSED nir_const_value **_src) 12319{ 12320 switch (bit_size) { 12321 case 1: { 12322 12323 12324 12325 12326 for (unsigned _i = 0; _i < num_components; _i++) { 12327 /* 1-bit integers use a 0/-1 convention */ 12328 const int1_t src0 = -(int1_t)_src[0][_i].b; 12329 /* 1-bit integers use a 0/-1 convention */ 12330 const int1_t src1 = -(int1_t)_src[1][_i].b; 12331 12332 bool1_t dst = src0 == src1; 12333 12334 _dst_val[_i].b = -(int)dst; 12335 } 12336 12337 break; 12338 } 12339 case 8: { 12340 12341 12342 12343 12344 for (unsigned _i = 0; _i < num_components; _i++) { 12345 const int8_t src0 = 12346 _src[0][_i].i8; 12347 const int8_t src1 = 12348 _src[1][_i].i8; 12349 12350 bool1_t dst = src0 == src1; 12351 12352 _dst_val[_i].b = -(int)dst; 12353 } 12354 12355 break; 12356 } 12357 case 16: { 12358 12359 12360 12361 12362 for (unsigned _i = 0; _i < num_components; _i++) { 12363 const int16_t src0 = 12364 _src[0][_i].i16; 12365 const int16_t src1 = 12366 _src[1][_i].i16; 12367 12368 bool1_t dst = src0 == src1; 12369 12370 _dst_val[_i].b = -(int)dst; 12371 } 12372 12373 break; 12374 } 12375 case 32: { 12376 12377 12378 12379 12380 for (unsigned _i = 0; _i < num_components; _i++) { 12381 const int32_t src0 = 12382 _src[0][_i].i32; 12383 const int32_t src1 = 12384 _src[1][_i].i32; 12385 12386 bool1_t dst = src0 == src1; 12387 12388 _dst_val[_i].b = -(int)dst; 12389 } 12390 12391 break; 12392 } 12393 case 64: { 12394 12395 12396 12397 12398 for (unsigned _i = 0; _i < num_components; _i++) { 12399 const int64_t src0 = 12400 _src[0][_i].i64; 12401 const int64_t src1 = 12402 _src[1][_i].i64; 12403 12404 bool1_t dst = src0 == src1; 12405 12406 _dst_val[_i].b = -(int)dst; 12407 } 12408 12409 break; 12410 } 12411 12412 default: 12413 unreachable("unknown bit width"); 12414 } 12415} 12416static void 12417evaluate_ieq32(nir_const_value *_dst_val, 12418 MAYBE_UNUSED unsigned num_components, 12419 unsigned bit_size, 12420 MAYBE_UNUSED nir_const_value **_src) 12421{ 12422 switch (bit_size) { 12423 case 1: { 12424 12425 12426 12427 12428 for (unsigned _i = 0; _i < num_components; _i++) { 12429 /* 1-bit integers use a 0/-1 convention */ 12430 const int1_t src0 = -(int1_t)_src[0][_i].b; 12431 /* 1-bit integers use a 0/-1 convention */ 12432 const int1_t src1 = -(int1_t)_src[1][_i].b; 12433 12434 bool32_t dst = src0 == src1; 12435 12436 _dst_val[_i].i32 = -(int)dst; 12437 } 12438 12439 break; 12440 } 12441 case 8: { 12442 12443 12444 12445 12446 for (unsigned _i = 0; _i < num_components; _i++) { 12447 const int8_t src0 = 12448 _src[0][_i].i8; 12449 const int8_t src1 = 12450 _src[1][_i].i8; 12451 12452 bool32_t dst = src0 == src1; 12453 12454 _dst_val[_i].i32 = -(int)dst; 12455 } 12456 12457 break; 12458 } 12459 case 16: { 12460 12461 12462 12463 12464 for (unsigned _i = 0; _i < num_components; _i++) { 12465 const int16_t src0 = 12466 _src[0][_i].i16; 12467 const int16_t src1 = 12468 _src[1][_i].i16; 12469 12470 bool32_t dst = src0 == src1; 12471 12472 _dst_val[_i].i32 = -(int)dst; 12473 } 12474 12475 break; 12476 } 12477 case 32: { 12478 12479 12480 12481 12482 for (unsigned _i = 0; _i < num_components; _i++) { 12483 const int32_t src0 = 12484 _src[0][_i].i32; 12485 const int32_t src1 = 12486 _src[1][_i].i32; 12487 12488 bool32_t dst = src0 == src1; 12489 12490 _dst_val[_i].i32 = -(int)dst; 12491 } 12492 12493 break; 12494 } 12495 case 64: { 12496 12497 12498 12499 12500 for (unsigned _i = 0; _i < num_components; _i++) { 12501 const int64_t src0 = 12502 _src[0][_i].i64; 12503 const int64_t src1 = 12504 _src[1][_i].i64; 12505 12506 bool32_t dst = src0 == src1; 12507 12508 _dst_val[_i].i32 = -(int)dst; 12509 } 12510 12511 break; 12512 } 12513 12514 default: 12515 unreachable("unknown bit width"); 12516 } 12517} 12518static void 12519evaluate_ifind_msb(nir_const_value *_dst_val, 12520 MAYBE_UNUSED unsigned num_components, 12521 UNUSED unsigned bit_size, 12522 MAYBE_UNUSED nir_const_value **_src) 12523{ 12524 12525 12526 12527 12528 for (unsigned _i = 0; _i < num_components; _i++) { 12529 const int32_t src0 = 12530 _src[0][_i].i32; 12531 12532 int32_t dst; 12533 12534 12535dst = -1; 12536for (int bit = 31; bit >= 0; bit--) { 12537 /* If src0 < 0, we're looking for the first 0 bit. 12538 * if src0 >= 0, we're looking for the first 1 bit. 12539 */ 12540 if ((((src0 >> bit) & 1) && (src0 >= 0)) || 12541 (!((src0 >> bit) & 1) && (src0 < 0))) { 12542 dst = bit; 12543 break; 12544 } 12545} 12546 12547 12548 _dst_val[_i].i32 = dst; 12549 } 12550 12551} 12552static void 12553evaluate_ige(nir_const_value *_dst_val, 12554 MAYBE_UNUSED unsigned num_components, 12555 unsigned bit_size, 12556 MAYBE_UNUSED nir_const_value **_src) 12557{ 12558 switch (bit_size) { 12559 case 1: { 12560 12561 12562 12563 12564 for (unsigned _i = 0; _i < num_components; _i++) { 12565 /* 1-bit integers use a 0/-1 convention */ 12566 const int1_t src0 = -(int1_t)_src[0][_i].b; 12567 /* 1-bit integers use a 0/-1 convention */ 12568 const int1_t src1 = -(int1_t)_src[1][_i].b; 12569 12570 bool1_t dst = src0 >= src1; 12571 12572 _dst_val[_i].b = -(int)dst; 12573 } 12574 12575 break; 12576 } 12577 case 8: { 12578 12579 12580 12581 12582 for (unsigned _i = 0; _i < num_components; _i++) { 12583 const int8_t src0 = 12584 _src[0][_i].i8; 12585 const int8_t src1 = 12586 _src[1][_i].i8; 12587 12588 bool1_t dst = src0 >= src1; 12589 12590 _dst_val[_i].b = -(int)dst; 12591 } 12592 12593 break; 12594 } 12595 case 16: { 12596 12597 12598 12599 12600 for (unsigned _i = 0; _i < num_components; _i++) { 12601 const int16_t src0 = 12602 _src[0][_i].i16; 12603 const int16_t src1 = 12604 _src[1][_i].i16; 12605 12606 bool1_t dst = src0 >= src1; 12607 12608 _dst_val[_i].b = -(int)dst; 12609 } 12610 12611 break; 12612 } 12613 case 32: { 12614 12615 12616 12617 12618 for (unsigned _i = 0; _i < num_components; _i++) { 12619 const int32_t src0 = 12620 _src[0][_i].i32; 12621 const int32_t src1 = 12622 _src[1][_i].i32; 12623 12624 bool1_t dst = src0 >= src1; 12625 12626 _dst_val[_i].b = -(int)dst; 12627 } 12628 12629 break; 12630 } 12631 case 64: { 12632 12633 12634 12635 12636 for (unsigned _i = 0; _i < num_components; _i++) { 12637 const int64_t src0 = 12638 _src[0][_i].i64; 12639 const int64_t src1 = 12640 _src[1][_i].i64; 12641 12642 bool1_t dst = src0 >= src1; 12643 12644 _dst_val[_i].b = -(int)dst; 12645 } 12646 12647 break; 12648 } 12649 12650 default: 12651 unreachable("unknown bit width"); 12652 } 12653} 12654static void 12655evaluate_ige32(nir_const_value *_dst_val, 12656 MAYBE_UNUSED unsigned num_components, 12657 unsigned bit_size, 12658 MAYBE_UNUSED nir_const_value **_src) 12659{ 12660 switch (bit_size) { 12661 case 1: { 12662 12663 12664 12665 12666 for (unsigned _i = 0; _i < num_components; _i++) { 12667 /* 1-bit integers use a 0/-1 convention */ 12668 const int1_t src0 = -(int1_t)_src[0][_i].b; 12669 /* 1-bit integers use a 0/-1 convention */ 12670 const int1_t src1 = -(int1_t)_src[1][_i].b; 12671 12672 bool32_t dst = src0 >= src1; 12673 12674 _dst_val[_i].i32 = -(int)dst; 12675 } 12676 12677 break; 12678 } 12679 case 8: { 12680 12681 12682 12683 12684 for (unsigned _i = 0; _i < num_components; _i++) { 12685 const int8_t src0 = 12686 _src[0][_i].i8; 12687 const int8_t src1 = 12688 _src[1][_i].i8; 12689 12690 bool32_t dst = src0 >= src1; 12691 12692 _dst_val[_i].i32 = -(int)dst; 12693 } 12694 12695 break; 12696 } 12697 case 16: { 12698 12699 12700 12701 12702 for (unsigned _i = 0; _i < num_components; _i++) { 12703 const int16_t src0 = 12704 _src[0][_i].i16; 12705 const int16_t src1 = 12706 _src[1][_i].i16; 12707 12708 bool32_t dst = src0 >= src1; 12709 12710 _dst_val[_i].i32 = -(int)dst; 12711 } 12712 12713 break; 12714 } 12715 case 32: { 12716 12717 12718 12719 12720 for (unsigned _i = 0; _i < num_components; _i++) { 12721 const int32_t src0 = 12722 _src[0][_i].i32; 12723 const int32_t src1 = 12724 _src[1][_i].i32; 12725 12726 bool32_t dst = src0 >= src1; 12727 12728 _dst_val[_i].i32 = -(int)dst; 12729 } 12730 12731 break; 12732 } 12733 case 64: { 12734 12735 12736 12737 12738 for (unsigned _i = 0; _i < num_components; _i++) { 12739 const int64_t src0 = 12740 _src[0][_i].i64; 12741 const int64_t src1 = 12742 _src[1][_i].i64; 12743 12744 bool32_t dst = src0 >= src1; 12745 12746 _dst_val[_i].i32 = -(int)dst; 12747 } 12748 12749 break; 12750 } 12751 12752 default: 12753 unreachable("unknown bit width"); 12754 } 12755} 12756static void 12757evaluate_ihadd(nir_const_value *_dst_val, 12758 MAYBE_UNUSED unsigned num_components, 12759 unsigned bit_size, 12760 MAYBE_UNUSED nir_const_value **_src) 12761{ 12762 switch (bit_size) { 12763 case 1: { 12764 12765 12766 12767 12768 for (unsigned _i = 0; _i < num_components; _i++) { 12769 /* 1-bit integers use a 0/-1 convention */ 12770 const int1_t src0 = -(int1_t)_src[0][_i].b; 12771 /* 1-bit integers use a 0/-1 convention */ 12772 const int1_t src1 = -(int1_t)_src[1][_i].b; 12773 12774 int1_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 12775 12776 /* 1-bit integers get truncated */ 12777 _dst_val[_i].b = dst & 1; 12778 } 12779 12780 break; 12781 } 12782 case 8: { 12783 12784 12785 12786 12787 for (unsigned _i = 0; _i < num_components; _i++) { 12788 const int8_t src0 = 12789 _src[0][_i].i8; 12790 const int8_t src1 = 12791 _src[1][_i].i8; 12792 12793 int8_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 12794 12795 _dst_val[_i].i8 = dst; 12796 } 12797 12798 break; 12799 } 12800 case 16: { 12801 12802 12803 12804 12805 for (unsigned _i = 0; _i < num_components; _i++) { 12806 const int16_t src0 = 12807 _src[0][_i].i16; 12808 const int16_t src1 = 12809 _src[1][_i].i16; 12810 12811 int16_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 12812 12813 _dst_val[_i].i16 = dst; 12814 } 12815 12816 break; 12817 } 12818 case 32: { 12819 12820 12821 12822 12823 for (unsigned _i = 0; _i < num_components; _i++) { 12824 const int32_t src0 = 12825 _src[0][_i].i32; 12826 const int32_t src1 = 12827 _src[1][_i].i32; 12828 12829 int32_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 12830 12831 _dst_val[_i].i32 = dst; 12832 } 12833 12834 break; 12835 } 12836 case 64: { 12837 12838 12839 12840 12841 for (unsigned _i = 0; _i < num_components; _i++) { 12842 const int64_t src0 = 12843 _src[0][_i].i64; 12844 const int64_t src1 = 12845 _src[1][_i].i64; 12846 12847 int64_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 12848 12849 _dst_val[_i].i64 = dst; 12850 } 12851 12852 break; 12853 } 12854 12855 default: 12856 unreachable("unknown bit width"); 12857 } 12858} 12859static void 12860evaluate_ilt(nir_const_value *_dst_val, 12861 MAYBE_UNUSED unsigned num_components, 12862 unsigned bit_size, 12863 MAYBE_UNUSED nir_const_value **_src) 12864{ 12865 switch (bit_size) { 12866 case 1: { 12867 12868 12869 12870 12871 for (unsigned _i = 0; _i < num_components; _i++) { 12872 /* 1-bit integers use a 0/-1 convention */ 12873 const int1_t src0 = -(int1_t)_src[0][_i].b; 12874 /* 1-bit integers use a 0/-1 convention */ 12875 const int1_t src1 = -(int1_t)_src[1][_i].b; 12876 12877 bool1_t dst = src0 < src1; 12878 12879 _dst_val[_i].b = -(int)dst; 12880 } 12881 12882 break; 12883 } 12884 case 8: { 12885 12886 12887 12888 12889 for (unsigned _i = 0; _i < num_components; _i++) { 12890 const int8_t src0 = 12891 _src[0][_i].i8; 12892 const int8_t src1 = 12893 _src[1][_i].i8; 12894 12895 bool1_t dst = src0 < src1; 12896 12897 _dst_val[_i].b = -(int)dst; 12898 } 12899 12900 break; 12901 } 12902 case 16: { 12903 12904 12905 12906 12907 for (unsigned _i = 0; _i < num_components; _i++) { 12908 const int16_t src0 = 12909 _src[0][_i].i16; 12910 const int16_t src1 = 12911 _src[1][_i].i16; 12912 12913 bool1_t dst = src0 < src1; 12914 12915 _dst_val[_i].b = -(int)dst; 12916 } 12917 12918 break; 12919 } 12920 case 32: { 12921 12922 12923 12924 12925 for (unsigned _i = 0; _i < num_components; _i++) { 12926 const int32_t src0 = 12927 _src[0][_i].i32; 12928 const int32_t src1 = 12929 _src[1][_i].i32; 12930 12931 bool1_t dst = src0 < src1; 12932 12933 _dst_val[_i].b = -(int)dst; 12934 } 12935 12936 break; 12937 } 12938 case 64: { 12939 12940 12941 12942 12943 for (unsigned _i = 0; _i < num_components; _i++) { 12944 const int64_t src0 = 12945 _src[0][_i].i64; 12946 const int64_t src1 = 12947 _src[1][_i].i64; 12948 12949 bool1_t dst = src0 < src1; 12950 12951 _dst_val[_i].b = -(int)dst; 12952 } 12953 12954 break; 12955 } 12956 12957 default: 12958 unreachable("unknown bit width"); 12959 } 12960} 12961static void 12962evaluate_ilt32(nir_const_value *_dst_val, 12963 MAYBE_UNUSED unsigned num_components, 12964 unsigned bit_size, 12965 MAYBE_UNUSED nir_const_value **_src) 12966{ 12967 switch (bit_size) { 12968 case 1: { 12969 12970 12971 12972 12973 for (unsigned _i = 0; _i < num_components; _i++) { 12974 /* 1-bit integers use a 0/-1 convention */ 12975 const int1_t src0 = -(int1_t)_src[0][_i].b; 12976 /* 1-bit integers use a 0/-1 convention */ 12977 const int1_t src1 = -(int1_t)_src[1][_i].b; 12978 12979 bool32_t dst = src0 < src1; 12980 12981 _dst_val[_i].i32 = -(int)dst; 12982 } 12983 12984 break; 12985 } 12986 case 8: { 12987 12988 12989 12990 12991 for (unsigned _i = 0; _i < num_components; _i++) { 12992 const int8_t src0 = 12993 _src[0][_i].i8; 12994 const int8_t src1 = 12995 _src[1][_i].i8; 12996 12997 bool32_t dst = src0 < src1; 12998 12999 _dst_val[_i].i32 = -(int)dst; 13000 } 13001 13002 break; 13003 } 13004 case 16: { 13005 13006 13007 13008 13009 for (unsigned _i = 0; _i < num_components; _i++) { 13010 const int16_t src0 = 13011 _src[0][_i].i16; 13012 const int16_t src1 = 13013 _src[1][_i].i16; 13014 13015 bool32_t dst = src0 < src1; 13016 13017 _dst_val[_i].i32 = -(int)dst; 13018 } 13019 13020 break; 13021 } 13022 case 32: { 13023 13024 13025 13026 13027 for (unsigned _i = 0; _i < num_components; _i++) { 13028 const int32_t src0 = 13029 _src[0][_i].i32; 13030 const int32_t src1 = 13031 _src[1][_i].i32; 13032 13033 bool32_t dst = src0 < src1; 13034 13035 _dst_val[_i].i32 = -(int)dst; 13036 } 13037 13038 break; 13039 } 13040 case 64: { 13041 13042 13043 13044 13045 for (unsigned _i = 0; _i < num_components; _i++) { 13046 const int64_t src0 = 13047 _src[0][_i].i64; 13048 const int64_t src1 = 13049 _src[1][_i].i64; 13050 13051 bool32_t dst = src0 < src1; 13052 13053 _dst_val[_i].i32 = -(int)dst; 13054 } 13055 13056 break; 13057 } 13058 13059 default: 13060 unreachable("unknown bit width"); 13061 } 13062} 13063static void 13064evaluate_imax(nir_const_value *_dst_val, 13065 MAYBE_UNUSED unsigned num_components, 13066 unsigned bit_size, 13067 MAYBE_UNUSED nir_const_value **_src) 13068{ 13069 switch (bit_size) { 13070 case 1: { 13071 13072 13073 13074 13075 for (unsigned _i = 0; _i < num_components; _i++) { 13076 /* 1-bit integers use a 0/-1 convention */ 13077 const int1_t src0 = -(int1_t)_src[0][_i].b; 13078 /* 1-bit integers use a 0/-1 convention */ 13079 const int1_t src1 = -(int1_t)_src[1][_i].b; 13080 13081 int1_t dst = src1 > src0 ? src1 : src0; 13082 13083 /* 1-bit integers get truncated */ 13084 _dst_val[_i].b = dst & 1; 13085 } 13086 13087 break; 13088 } 13089 case 8: { 13090 13091 13092 13093 13094 for (unsigned _i = 0; _i < num_components; _i++) { 13095 const int8_t src0 = 13096 _src[0][_i].i8; 13097 const int8_t src1 = 13098 _src[1][_i].i8; 13099 13100 int8_t dst = src1 > src0 ? src1 : src0; 13101 13102 _dst_val[_i].i8 = dst; 13103 } 13104 13105 break; 13106 } 13107 case 16: { 13108 13109 13110 13111 13112 for (unsigned _i = 0; _i < num_components; _i++) { 13113 const int16_t src0 = 13114 _src[0][_i].i16; 13115 const int16_t src1 = 13116 _src[1][_i].i16; 13117 13118 int16_t dst = src1 > src0 ? src1 : src0; 13119 13120 _dst_val[_i].i16 = dst; 13121 } 13122 13123 break; 13124 } 13125 case 32: { 13126 13127 13128 13129 13130 for (unsigned _i = 0; _i < num_components; _i++) { 13131 const int32_t src0 = 13132 _src[0][_i].i32; 13133 const int32_t src1 = 13134 _src[1][_i].i32; 13135 13136 int32_t dst = src1 > src0 ? src1 : src0; 13137 13138 _dst_val[_i].i32 = dst; 13139 } 13140 13141 break; 13142 } 13143 case 64: { 13144 13145 13146 13147 13148 for (unsigned _i = 0; _i < num_components; _i++) { 13149 const int64_t src0 = 13150 _src[0][_i].i64; 13151 const int64_t src1 = 13152 _src[1][_i].i64; 13153 13154 int64_t dst = src1 > src0 ? src1 : src0; 13155 13156 _dst_val[_i].i64 = dst; 13157 } 13158 13159 break; 13160 } 13161 13162 default: 13163 unreachable("unknown bit width"); 13164 } 13165} 13166static void 13167evaluate_imax3(nir_const_value *_dst_val, 13168 MAYBE_UNUSED unsigned num_components, 13169 unsigned bit_size, 13170 MAYBE_UNUSED nir_const_value **_src) 13171{ 13172 switch (bit_size) { 13173 case 1: { 13174 13175 13176 13177 13178 for (unsigned _i = 0; _i < num_components; _i++) { 13179 /* 1-bit integers use a 0/-1 convention */ 13180 const int1_t src0 = -(int1_t)_src[0][_i].b; 13181 /* 1-bit integers use a 0/-1 convention */ 13182 const int1_t src1 = -(int1_t)_src[1][_i].b; 13183 /* 1-bit integers use a 0/-1 convention */ 13184 const int1_t src2 = -(int1_t)_src[2][_i].b; 13185 13186 int1_t dst = MAX2(src0, MAX2(src1, src2)); 13187 13188 /* 1-bit integers get truncated */ 13189 _dst_val[_i].b = dst & 1; 13190 } 13191 13192 break; 13193 } 13194 case 8: { 13195 13196 13197 13198 13199 for (unsigned _i = 0; _i < num_components; _i++) { 13200 const int8_t src0 = 13201 _src[0][_i].i8; 13202 const int8_t src1 = 13203 _src[1][_i].i8; 13204 const int8_t src2 = 13205 _src[2][_i].i8; 13206 13207 int8_t dst = MAX2(src0, MAX2(src1, src2)); 13208 13209 _dst_val[_i].i8 = dst; 13210 } 13211 13212 break; 13213 } 13214 case 16: { 13215 13216 13217 13218 13219 for (unsigned _i = 0; _i < num_components; _i++) { 13220 const int16_t src0 = 13221 _src[0][_i].i16; 13222 const int16_t src1 = 13223 _src[1][_i].i16; 13224 const int16_t src2 = 13225 _src[2][_i].i16; 13226 13227 int16_t dst = MAX2(src0, MAX2(src1, src2)); 13228 13229 _dst_val[_i].i16 = dst; 13230 } 13231 13232 break; 13233 } 13234 case 32: { 13235 13236 13237 13238 13239 for (unsigned _i = 0; _i < num_components; _i++) { 13240 const int32_t src0 = 13241 _src[0][_i].i32; 13242 const int32_t src1 = 13243 _src[1][_i].i32; 13244 const int32_t src2 = 13245 _src[2][_i].i32; 13246 13247 int32_t dst = MAX2(src0, MAX2(src1, src2)); 13248 13249 _dst_val[_i].i32 = dst; 13250 } 13251 13252 break; 13253 } 13254 case 64: { 13255 13256 13257 13258 13259 for (unsigned _i = 0; _i < num_components; _i++) { 13260 const int64_t src0 = 13261 _src[0][_i].i64; 13262 const int64_t src1 = 13263 _src[1][_i].i64; 13264 const int64_t src2 = 13265 _src[2][_i].i64; 13266 13267 int64_t dst = MAX2(src0, MAX2(src1, src2)); 13268 13269 _dst_val[_i].i64 = dst; 13270 } 13271 13272 break; 13273 } 13274 13275 default: 13276 unreachable("unknown bit width"); 13277 } 13278} 13279static void 13280evaluate_imed3(nir_const_value *_dst_val, 13281 MAYBE_UNUSED unsigned num_components, 13282 unsigned bit_size, 13283 MAYBE_UNUSED nir_const_value **_src) 13284{ 13285 switch (bit_size) { 13286 case 1: { 13287 13288 13289 13290 13291 for (unsigned _i = 0; _i < num_components; _i++) { 13292 /* 1-bit integers use a 0/-1 convention */ 13293 const int1_t src0 = -(int1_t)_src[0][_i].b; 13294 /* 1-bit integers use a 0/-1 convention */ 13295 const int1_t src1 = -(int1_t)_src[1][_i].b; 13296 /* 1-bit integers use a 0/-1 convention */ 13297 const int1_t src2 = -(int1_t)_src[2][_i].b; 13298 13299 int1_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 13300 13301 /* 1-bit integers get truncated */ 13302 _dst_val[_i].b = dst & 1; 13303 } 13304 13305 break; 13306 } 13307 case 8: { 13308 13309 13310 13311 13312 for (unsigned _i = 0; _i < num_components; _i++) { 13313 const int8_t src0 = 13314 _src[0][_i].i8; 13315 const int8_t src1 = 13316 _src[1][_i].i8; 13317 const int8_t src2 = 13318 _src[2][_i].i8; 13319 13320 int8_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 13321 13322 _dst_val[_i].i8 = dst; 13323 } 13324 13325 break; 13326 } 13327 case 16: { 13328 13329 13330 13331 13332 for (unsigned _i = 0; _i < num_components; _i++) { 13333 const int16_t src0 = 13334 _src[0][_i].i16; 13335 const int16_t src1 = 13336 _src[1][_i].i16; 13337 const int16_t src2 = 13338 _src[2][_i].i16; 13339 13340 int16_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 13341 13342 _dst_val[_i].i16 = dst; 13343 } 13344 13345 break; 13346 } 13347 case 32: { 13348 13349 13350 13351 13352 for (unsigned _i = 0; _i < num_components; _i++) { 13353 const int32_t src0 = 13354 _src[0][_i].i32; 13355 const int32_t src1 = 13356 _src[1][_i].i32; 13357 const int32_t src2 = 13358 _src[2][_i].i32; 13359 13360 int32_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 13361 13362 _dst_val[_i].i32 = dst; 13363 } 13364 13365 break; 13366 } 13367 case 64: { 13368 13369 13370 13371 13372 for (unsigned _i = 0; _i < num_components; _i++) { 13373 const int64_t src0 = 13374 _src[0][_i].i64; 13375 const int64_t src1 = 13376 _src[1][_i].i64; 13377 const int64_t src2 = 13378 _src[2][_i].i64; 13379 13380 int64_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 13381 13382 _dst_val[_i].i64 = dst; 13383 } 13384 13385 break; 13386 } 13387 13388 default: 13389 unreachable("unknown bit width"); 13390 } 13391} 13392static void 13393evaluate_imin(nir_const_value *_dst_val, 13394 MAYBE_UNUSED unsigned num_components, 13395 unsigned bit_size, 13396 MAYBE_UNUSED nir_const_value **_src) 13397{ 13398 switch (bit_size) { 13399 case 1: { 13400 13401 13402 13403 13404 for (unsigned _i = 0; _i < num_components; _i++) { 13405 /* 1-bit integers use a 0/-1 convention */ 13406 const int1_t src0 = -(int1_t)_src[0][_i].b; 13407 /* 1-bit integers use a 0/-1 convention */ 13408 const int1_t src1 = -(int1_t)_src[1][_i].b; 13409 13410 int1_t dst = src1 > src0 ? src0 : src1; 13411 13412 /* 1-bit integers get truncated */ 13413 _dst_val[_i].b = dst & 1; 13414 } 13415 13416 break; 13417 } 13418 case 8: { 13419 13420 13421 13422 13423 for (unsigned _i = 0; _i < num_components; _i++) { 13424 const int8_t src0 = 13425 _src[0][_i].i8; 13426 const int8_t src1 = 13427 _src[1][_i].i8; 13428 13429 int8_t dst = src1 > src0 ? src0 : src1; 13430 13431 _dst_val[_i].i8 = dst; 13432 } 13433 13434 break; 13435 } 13436 case 16: { 13437 13438 13439 13440 13441 for (unsigned _i = 0; _i < num_components; _i++) { 13442 const int16_t src0 = 13443 _src[0][_i].i16; 13444 const int16_t src1 = 13445 _src[1][_i].i16; 13446 13447 int16_t dst = src1 > src0 ? src0 : src1; 13448 13449 _dst_val[_i].i16 = dst; 13450 } 13451 13452 break; 13453 } 13454 case 32: { 13455 13456 13457 13458 13459 for (unsigned _i = 0; _i < num_components; _i++) { 13460 const int32_t src0 = 13461 _src[0][_i].i32; 13462 const int32_t src1 = 13463 _src[1][_i].i32; 13464 13465 int32_t dst = src1 > src0 ? src0 : src1; 13466 13467 _dst_val[_i].i32 = dst; 13468 } 13469 13470 break; 13471 } 13472 case 64: { 13473 13474 13475 13476 13477 for (unsigned _i = 0; _i < num_components; _i++) { 13478 const int64_t src0 = 13479 _src[0][_i].i64; 13480 const int64_t src1 = 13481 _src[1][_i].i64; 13482 13483 int64_t dst = src1 > src0 ? src0 : src1; 13484 13485 _dst_val[_i].i64 = dst; 13486 } 13487 13488 break; 13489 } 13490 13491 default: 13492 unreachable("unknown bit width"); 13493 } 13494} 13495static void 13496evaluate_imin3(nir_const_value *_dst_val, 13497 MAYBE_UNUSED unsigned num_components, 13498 unsigned bit_size, 13499 MAYBE_UNUSED nir_const_value **_src) 13500{ 13501 switch (bit_size) { 13502 case 1: { 13503 13504 13505 13506 13507 for (unsigned _i = 0; _i < num_components; _i++) { 13508 /* 1-bit integers use a 0/-1 convention */ 13509 const int1_t src0 = -(int1_t)_src[0][_i].b; 13510 /* 1-bit integers use a 0/-1 convention */ 13511 const int1_t src1 = -(int1_t)_src[1][_i].b; 13512 /* 1-bit integers use a 0/-1 convention */ 13513 const int1_t src2 = -(int1_t)_src[2][_i].b; 13514 13515 int1_t dst = MIN2(src0, MIN2(src1, src2)); 13516 13517 /* 1-bit integers get truncated */ 13518 _dst_val[_i].b = dst & 1; 13519 } 13520 13521 break; 13522 } 13523 case 8: { 13524 13525 13526 13527 13528 for (unsigned _i = 0; _i < num_components; _i++) { 13529 const int8_t src0 = 13530 _src[0][_i].i8; 13531 const int8_t src1 = 13532 _src[1][_i].i8; 13533 const int8_t src2 = 13534 _src[2][_i].i8; 13535 13536 int8_t dst = MIN2(src0, MIN2(src1, src2)); 13537 13538 _dst_val[_i].i8 = dst; 13539 } 13540 13541 break; 13542 } 13543 case 16: { 13544 13545 13546 13547 13548 for (unsigned _i = 0; _i < num_components; _i++) { 13549 const int16_t src0 = 13550 _src[0][_i].i16; 13551 const int16_t src1 = 13552 _src[1][_i].i16; 13553 const int16_t src2 = 13554 _src[2][_i].i16; 13555 13556 int16_t dst = MIN2(src0, MIN2(src1, src2)); 13557 13558 _dst_val[_i].i16 = dst; 13559 } 13560 13561 break; 13562 } 13563 case 32: { 13564 13565 13566 13567 13568 for (unsigned _i = 0; _i < num_components; _i++) { 13569 const int32_t src0 = 13570 _src[0][_i].i32; 13571 const int32_t src1 = 13572 _src[1][_i].i32; 13573 const int32_t src2 = 13574 _src[2][_i].i32; 13575 13576 int32_t dst = MIN2(src0, MIN2(src1, src2)); 13577 13578 _dst_val[_i].i32 = dst; 13579 } 13580 13581 break; 13582 } 13583 case 64: { 13584 13585 13586 13587 13588 for (unsigned _i = 0; _i < num_components; _i++) { 13589 const int64_t src0 = 13590 _src[0][_i].i64; 13591 const int64_t src1 = 13592 _src[1][_i].i64; 13593 const int64_t src2 = 13594 _src[2][_i].i64; 13595 13596 int64_t dst = MIN2(src0, MIN2(src1, src2)); 13597 13598 _dst_val[_i].i64 = dst; 13599 } 13600 13601 break; 13602 } 13603 13604 default: 13605 unreachable("unknown bit width"); 13606 } 13607} 13608static void 13609evaluate_imod(nir_const_value *_dst_val, 13610 MAYBE_UNUSED unsigned num_components, 13611 unsigned bit_size, 13612 MAYBE_UNUSED nir_const_value **_src) 13613{ 13614 switch (bit_size) { 13615 case 1: { 13616 13617 13618 13619 13620 for (unsigned _i = 0; _i < num_components; _i++) { 13621 /* 1-bit integers use a 0/-1 convention */ 13622 const int1_t src0 = -(int1_t)_src[0][_i].b; 13623 /* 1-bit integers use a 0/-1 convention */ 13624 const int1_t src1 = -(int1_t)_src[1][_i].b; 13625 13626 int1_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); 13627 13628 /* 1-bit integers get truncated */ 13629 _dst_val[_i].b = dst & 1; 13630 } 13631 13632 break; 13633 } 13634 case 8: { 13635 13636 13637 13638 13639 for (unsigned _i = 0; _i < num_components; _i++) { 13640 const int8_t src0 = 13641 _src[0][_i].i8; 13642 const int8_t src1 = 13643 _src[1][_i].i8; 13644 13645 int8_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); 13646 13647 _dst_val[_i].i8 = dst; 13648 } 13649 13650 break; 13651 } 13652 case 16: { 13653 13654 13655 13656 13657 for (unsigned _i = 0; _i < num_components; _i++) { 13658 const int16_t src0 = 13659 _src[0][_i].i16; 13660 const int16_t src1 = 13661 _src[1][_i].i16; 13662 13663 int16_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); 13664 13665 _dst_val[_i].i16 = dst; 13666 } 13667 13668 break; 13669 } 13670 case 32: { 13671 13672 13673 13674 13675 for (unsigned _i = 0; _i < num_components; _i++) { 13676 const int32_t src0 = 13677 _src[0][_i].i32; 13678 const int32_t src1 = 13679 _src[1][_i].i32; 13680 13681 int32_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); 13682 13683 _dst_val[_i].i32 = dst; 13684 } 13685 13686 break; 13687 } 13688 case 64: { 13689 13690 13691 13692 13693 for (unsigned _i = 0; _i < num_components; _i++) { 13694 const int64_t src0 = 13695 _src[0][_i].i64; 13696 const int64_t src1 = 13697 _src[1][_i].i64; 13698 13699 int64_t dst = src1 == 0 ? 0 : ((src0 % src1 == 0 || (src0 >= 0) == (src1 >= 0)) ? src0 % src1 : src0 % src1 + src1); 13700 13701 _dst_val[_i].i64 = dst; 13702 } 13703 13704 break; 13705 } 13706 13707 default: 13708 unreachable("unknown bit width"); 13709 } 13710} 13711static void 13712evaluate_imov(nir_const_value *_dst_val, 13713 MAYBE_UNUSED unsigned num_components, 13714 unsigned bit_size, 13715 MAYBE_UNUSED nir_const_value **_src) 13716{ 13717 switch (bit_size) { 13718 case 1: { 13719 13720 13721 13722 13723 for (unsigned _i = 0; _i < num_components; _i++) { 13724 /* 1-bit integers use a 0/-1 convention */ 13725 const int1_t src0 = -(int1_t)_src[0][_i].b; 13726 13727 int1_t dst = src0; 13728 13729 /* 1-bit integers get truncated */ 13730 _dst_val[_i].b = dst & 1; 13731 } 13732 13733 break; 13734 } 13735 case 8: { 13736 13737 13738 13739 13740 for (unsigned _i = 0; _i < num_components; _i++) { 13741 const int8_t src0 = 13742 _src[0][_i].i8; 13743 13744 int8_t dst = src0; 13745 13746 _dst_val[_i].i8 = dst; 13747 } 13748 13749 break; 13750 } 13751 case 16: { 13752 13753 13754 13755 13756 for (unsigned _i = 0; _i < num_components; _i++) { 13757 const int16_t src0 = 13758 _src[0][_i].i16; 13759 13760 int16_t dst = src0; 13761 13762 _dst_val[_i].i16 = dst; 13763 } 13764 13765 break; 13766 } 13767 case 32: { 13768 13769 13770 13771 13772 for (unsigned _i = 0; _i < num_components; _i++) { 13773 const int32_t src0 = 13774 _src[0][_i].i32; 13775 13776 int32_t dst = src0; 13777 13778 _dst_val[_i].i32 = dst; 13779 } 13780 13781 break; 13782 } 13783 case 64: { 13784 13785 13786 13787 13788 for (unsigned _i = 0; _i < num_components; _i++) { 13789 const int64_t src0 = 13790 _src[0][_i].i64; 13791 13792 int64_t dst = src0; 13793 13794 _dst_val[_i].i64 = dst; 13795 } 13796 13797 break; 13798 } 13799 13800 default: 13801 unreachable("unknown bit width"); 13802 } 13803} 13804static void 13805evaluate_imul(nir_const_value *_dst_val, 13806 MAYBE_UNUSED unsigned num_components, 13807 unsigned bit_size, 13808 MAYBE_UNUSED nir_const_value **_src) 13809{ 13810 switch (bit_size) { 13811 case 1: { 13812 13813 13814 13815 13816 for (unsigned _i = 0; _i < num_components; _i++) { 13817 /* 1-bit integers use a 0/-1 convention */ 13818 const int1_t src0 = -(int1_t)_src[0][_i].b; 13819 /* 1-bit integers use a 0/-1 convention */ 13820 const int1_t src1 = -(int1_t)_src[1][_i].b; 13821 13822 int1_t dst = src0 * src1; 13823 13824 /* 1-bit integers get truncated */ 13825 _dst_val[_i].b = dst & 1; 13826 } 13827 13828 break; 13829 } 13830 case 8: { 13831 13832 13833 13834 13835 for (unsigned _i = 0; _i < num_components; _i++) { 13836 const int8_t src0 = 13837 _src[0][_i].i8; 13838 const int8_t src1 = 13839 _src[1][_i].i8; 13840 13841 int8_t dst = src0 * src1; 13842 13843 _dst_val[_i].i8 = dst; 13844 } 13845 13846 break; 13847 } 13848 case 16: { 13849 13850 13851 13852 13853 for (unsigned _i = 0; _i < num_components; _i++) { 13854 const int16_t src0 = 13855 _src[0][_i].i16; 13856 const int16_t src1 = 13857 _src[1][_i].i16; 13858 13859 int16_t dst = src0 * src1; 13860 13861 _dst_val[_i].i16 = dst; 13862 } 13863 13864 break; 13865 } 13866 case 32: { 13867 13868 13869 13870 13871 for (unsigned _i = 0; _i < num_components; _i++) { 13872 const int32_t src0 = 13873 _src[0][_i].i32; 13874 const int32_t src1 = 13875 _src[1][_i].i32; 13876 13877 int32_t dst = src0 * src1; 13878 13879 _dst_val[_i].i32 = dst; 13880 } 13881 13882 break; 13883 } 13884 case 64: { 13885 13886 13887 13888 13889 for (unsigned _i = 0; _i < num_components; _i++) { 13890 const int64_t src0 = 13891 _src[0][_i].i64; 13892 const int64_t src1 = 13893 _src[1][_i].i64; 13894 13895 int64_t dst = src0 * src1; 13896 13897 _dst_val[_i].i64 = dst; 13898 } 13899 13900 break; 13901 } 13902 13903 default: 13904 unreachable("unknown bit width"); 13905 } 13906} 13907static void 13908evaluate_imul_2x32_64(nir_const_value *_dst_val, 13909 MAYBE_UNUSED unsigned num_components, 13910 UNUSED unsigned bit_size, 13911 MAYBE_UNUSED nir_const_value **_src) 13912{ 13913 13914 13915 13916 13917 for (unsigned _i = 0; _i < num_components; _i++) { 13918 const int32_t src0 = 13919 _src[0][_i].i32; 13920 const int32_t src1 = 13921 _src[1][_i].i32; 13922 13923 int64_t dst = (int64_t)src0 * (int64_t)src1; 13924 13925 _dst_val[_i].i64 = dst; 13926 } 13927 13928} 13929static void 13930evaluate_imul_high(nir_const_value *_dst_val, 13931 MAYBE_UNUSED unsigned num_components, 13932 unsigned bit_size, 13933 MAYBE_UNUSED nir_const_value **_src) 13934{ 13935 switch (bit_size) { 13936 case 1: { 13937 13938 13939 13940 13941 for (unsigned _i = 0; _i < num_components; _i++) { 13942 /* 1-bit integers use a 0/-1 convention */ 13943 const int1_t src0 = -(int1_t)_src[0][_i].b; 13944 /* 1-bit integers use a 0/-1 convention */ 13945 const int1_t src1 = -(int1_t)_src[1][_i].b; 13946 13947 int1_t dst; 13948 13949 13950if (bit_size == 64) { 13951 /* We need to do a full 128-bit x 128-bit multiply in order for the sign 13952 * extension to work properly. The casts are kind-of annoying but needed 13953 * to prevent compiler warnings. 13954 */ 13955 uint32_t src0_u32[4] = { 13956 src0, 13957 (int64_t)src0 >> 32, 13958 (int64_t)src0 >> 63, 13959 (int64_t)src0 >> 63, 13960 }; 13961 uint32_t src1_u32[4] = { 13962 src1, 13963 (int64_t)src1 >> 32, 13964 (int64_t)src1 >> 63, 13965 (int64_t)src1 >> 63, 13966 }; 13967 uint32_t prod_u32[4]; 13968 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 13969 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 13970} else { 13971 dst = ((int64_t)src0 * (int64_t)src1) >> bit_size; 13972} 13973 13974 13975 /* 1-bit integers get truncated */ 13976 _dst_val[_i].b = dst & 1; 13977 } 13978 13979 break; 13980 } 13981 case 8: { 13982 13983 13984 13985 13986 for (unsigned _i = 0; _i < num_components; _i++) { 13987 const int8_t src0 = 13988 _src[0][_i].i8; 13989 const int8_t src1 = 13990 _src[1][_i].i8; 13991 13992 int8_t dst; 13993 13994 13995if (bit_size == 64) { 13996 /* We need to do a full 128-bit x 128-bit multiply in order for the sign 13997 * extension to work properly. The casts are kind-of annoying but needed 13998 * to prevent compiler warnings. 13999 */ 14000 uint32_t src0_u32[4] = { 14001 src0, 14002 (int64_t)src0 >> 32, 14003 (int64_t)src0 >> 63, 14004 (int64_t)src0 >> 63, 14005 }; 14006 uint32_t src1_u32[4] = { 14007 src1, 14008 (int64_t)src1 >> 32, 14009 (int64_t)src1 >> 63, 14010 (int64_t)src1 >> 63, 14011 }; 14012 uint32_t prod_u32[4]; 14013 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 14014 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 14015} else { 14016 dst = ((int64_t)src0 * (int64_t)src1) >> bit_size; 14017} 14018 14019 14020 _dst_val[_i].i8 = dst; 14021 } 14022 14023 break; 14024 } 14025 case 16: { 14026 14027 14028 14029 14030 for (unsigned _i = 0; _i < num_components; _i++) { 14031 const int16_t src0 = 14032 _src[0][_i].i16; 14033 const int16_t src1 = 14034 _src[1][_i].i16; 14035 14036 int16_t dst; 14037 14038 14039if (bit_size == 64) { 14040 /* We need to do a full 128-bit x 128-bit multiply in order for the sign 14041 * extension to work properly. The casts are kind-of annoying but needed 14042 * to prevent compiler warnings. 14043 */ 14044 uint32_t src0_u32[4] = { 14045 src0, 14046 (int64_t)src0 >> 32, 14047 (int64_t)src0 >> 63, 14048 (int64_t)src0 >> 63, 14049 }; 14050 uint32_t src1_u32[4] = { 14051 src1, 14052 (int64_t)src1 >> 32, 14053 (int64_t)src1 >> 63, 14054 (int64_t)src1 >> 63, 14055 }; 14056 uint32_t prod_u32[4]; 14057 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 14058 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 14059} else { 14060 dst = ((int64_t)src0 * (int64_t)src1) >> bit_size; 14061} 14062 14063 14064 _dst_val[_i].i16 = dst; 14065 } 14066 14067 break; 14068 } 14069 case 32: { 14070 14071 14072 14073 14074 for (unsigned _i = 0; _i < num_components; _i++) { 14075 const int32_t src0 = 14076 _src[0][_i].i32; 14077 const int32_t src1 = 14078 _src[1][_i].i32; 14079 14080 int32_t dst; 14081 14082 14083if (bit_size == 64) { 14084 /* We need to do a full 128-bit x 128-bit multiply in order for the sign 14085 * extension to work properly. The casts are kind-of annoying but needed 14086 * to prevent compiler warnings. 14087 */ 14088 uint32_t src0_u32[4] = { 14089 src0, 14090 (int64_t)src0 >> 32, 14091 (int64_t)src0 >> 63, 14092 (int64_t)src0 >> 63, 14093 }; 14094 uint32_t src1_u32[4] = { 14095 src1, 14096 (int64_t)src1 >> 32, 14097 (int64_t)src1 >> 63, 14098 (int64_t)src1 >> 63, 14099 }; 14100 uint32_t prod_u32[4]; 14101 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 14102 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 14103} else { 14104 dst = ((int64_t)src0 * (int64_t)src1) >> bit_size; 14105} 14106 14107 14108 _dst_val[_i].i32 = dst; 14109 } 14110 14111 break; 14112 } 14113 case 64: { 14114 14115 14116 14117 14118 for (unsigned _i = 0; _i < num_components; _i++) { 14119 const int64_t src0 = 14120 _src[0][_i].i64; 14121 const int64_t src1 = 14122 _src[1][_i].i64; 14123 14124 int64_t dst; 14125 14126 14127if (bit_size == 64) { 14128 /* We need to do a full 128-bit x 128-bit multiply in order for the sign 14129 * extension to work properly. The casts are kind-of annoying but needed 14130 * to prevent compiler warnings. 14131 */ 14132 uint32_t src0_u32[4] = { 14133 src0, 14134 (int64_t)src0 >> 32, 14135 (int64_t)src0 >> 63, 14136 (int64_t)src0 >> 63, 14137 }; 14138 uint32_t src1_u32[4] = { 14139 src1, 14140 (int64_t)src1 >> 32, 14141 (int64_t)src1 >> 63, 14142 (int64_t)src1 >> 63, 14143 }; 14144 uint32_t prod_u32[4]; 14145 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 14146 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 14147} else { 14148 dst = ((int64_t)src0 * (int64_t)src1) >> bit_size; 14149} 14150 14151 14152 _dst_val[_i].i64 = dst; 14153 } 14154 14155 break; 14156 } 14157 14158 default: 14159 unreachable("unknown bit width"); 14160 } 14161} 14162static void 14163evaluate_ine(nir_const_value *_dst_val, 14164 MAYBE_UNUSED unsigned num_components, 14165 unsigned bit_size, 14166 MAYBE_UNUSED nir_const_value **_src) 14167{ 14168 switch (bit_size) { 14169 case 1: { 14170 14171 14172 14173 14174 for (unsigned _i = 0; _i < num_components; _i++) { 14175 /* 1-bit integers use a 0/-1 convention */ 14176 const int1_t src0 = -(int1_t)_src[0][_i].b; 14177 /* 1-bit integers use a 0/-1 convention */ 14178 const int1_t src1 = -(int1_t)_src[1][_i].b; 14179 14180 bool1_t dst = src0 != src1; 14181 14182 _dst_val[_i].b = -(int)dst; 14183 } 14184 14185 break; 14186 } 14187 case 8: { 14188 14189 14190 14191 14192 for (unsigned _i = 0; _i < num_components; _i++) { 14193 const int8_t src0 = 14194 _src[0][_i].i8; 14195 const int8_t src1 = 14196 _src[1][_i].i8; 14197 14198 bool1_t dst = src0 != src1; 14199 14200 _dst_val[_i].b = -(int)dst; 14201 } 14202 14203 break; 14204 } 14205 case 16: { 14206 14207 14208 14209 14210 for (unsigned _i = 0; _i < num_components; _i++) { 14211 const int16_t src0 = 14212 _src[0][_i].i16; 14213 const int16_t src1 = 14214 _src[1][_i].i16; 14215 14216 bool1_t dst = src0 != src1; 14217 14218 _dst_val[_i].b = -(int)dst; 14219 } 14220 14221 break; 14222 } 14223 case 32: { 14224 14225 14226 14227 14228 for (unsigned _i = 0; _i < num_components; _i++) { 14229 const int32_t src0 = 14230 _src[0][_i].i32; 14231 const int32_t src1 = 14232 _src[1][_i].i32; 14233 14234 bool1_t dst = src0 != src1; 14235 14236 _dst_val[_i].b = -(int)dst; 14237 } 14238 14239 break; 14240 } 14241 case 64: { 14242 14243 14244 14245 14246 for (unsigned _i = 0; _i < num_components; _i++) { 14247 const int64_t src0 = 14248 _src[0][_i].i64; 14249 const int64_t src1 = 14250 _src[1][_i].i64; 14251 14252 bool1_t dst = src0 != src1; 14253 14254 _dst_val[_i].b = -(int)dst; 14255 } 14256 14257 break; 14258 } 14259 14260 default: 14261 unreachable("unknown bit width"); 14262 } 14263} 14264static void 14265evaluate_ine32(nir_const_value *_dst_val, 14266 MAYBE_UNUSED unsigned num_components, 14267 unsigned bit_size, 14268 MAYBE_UNUSED nir_const_value **_src) 14269{ 14270 switch (bit_size) { 14271 case 1: { 14272 14273 14274 14275 14276 for (unsigned _i = 0; _i < num_components; _i++) { 14277 /* 1-bit integers use a 0/-1 convention */ 14278 const int1_t src0 = -(int1_t)_src[0][_i].b; 14279 /* 1-bit integers use a 0/-1 convention */ 14280 const int1_t src1 = -(int1_t)_src[1][_i].b; 14281 14282 bool32_t dst = src0 != src1; 14283 14284 _dst_val[_i].i32 = -(int)dst; 14285 } 14286 14287 break; 14288 } 14289 case 8: { 14290 14291 14292 14293 14294 for (unsigned _i = 0; _i < num_components; _i++) { 14295 const int8_t src0 = 14296 _src[0][_i].i8; 14297 const int8_t src1 = 14298 _src[1][_i].i8; 14299 14300 bool32_t dst = src0 != src1; 14301 14302 _dst_val[_i].i32 = -(int)dst; 14303 } 14304 14305 break; 14306 } 14307 case 16: { 14308 14309 14310 14311 14312 for (unsigned _i = 0; _i < num_components; _i++) { 14313 const int16_t src0 = 14314 _src[0][_i].i16; 14315 const int16_t src1 = 14316 _src[1][_i].i16; 14317 14318 bool32_t dst = src0 != src1; 14319 14320 _dst_val[_i].i32 = -(int)dst; 14321 } 14322 14323 break; 14324 } 14325 case 32: { 14326 14327 14328 14329 14330 for (unsigned _i = 0; _i < num_components; _i++) { 14331 const int32_t src0 = 14332 _src[0][_i].i32; 14333 const int32_t src1 = 14334 _src[1][_i].i32; 14335 14336 bool32_t dst = src0 != src1; 14337 14338 _dst_val[_i].i32 = -(int)dst; 14339 } 14340 14341 break; 14342 } 14343 case 64: { 14344 14345 14346 14347 14348 for (unsigned _i = 0; _i < num_components; _i++) { 14349 const int64_t src0 = 14350 _src[0][_i].i64; 14351 const int64_t src1 = 14352 _src[1][_i].i64; 14353 14354 bool32_t dst = src0 != src1; 14355 14356 _dst_val[_i].i32 = -(int)dst; 14357 } 14358 14359 break; 14360 } 14361 14362 default: 14363 unreachable("unknown bit width"); 14364 } 14365} 14366static void 14367evaluate_ineg(nir_const_value *_dst_val, 14368 MAYBE_UNUSED unsigned num_components, 14369 unsigned bit_size, 14370 MAYBE_UNUSED nir_const_value **_src) 14371{ 14372 switch (bit_size) { 14373 case 1: { 14374 14375 14376 14377 14378 for (unsigned _i = 0; _i < num_components; _i++) { 14379 /* 1-bit integers use a 0/-1 convention */ 14380 const int1_t src0 = -(int1_t)_src[0][_i].b; 14381 14382 int1_t dst = -src0; 14383 14384 /* 1-bit integers get truncated */ 14385 _dst_val[_i].b = dst & 1; 14386 } 14387 14388 break; 14389 } 14390 case 8: { 14391 14392 14393 14394 14395 for (unsigned _i = 0; _i < num_components; _i++) { 14396 const int8_t src0 = 14397 _src[0][_i].i8; 14398 14399 int8_t dst = -src0; 14400 14401 _dst_val[_i].i8 = dst; 14402 } 14403 14404 break; 14405 } 14406 case 16: { 14407 14408 14409 14410 14411 for (unsigned _i = 0; _i < num_components; _i++) { 14412 const int16_t src0 = 14413 _src[0][_i].i16; 14414 14415 int16_t dst = -src0; 14416 14417 _dst_val[_i].i16 = dst; 14418 } 14419 14420 break; 14421 } 14422 case 32: { 14423 14424 14425 14426 14427 for (unsigned _i = 0; _i < num_components; _i++) { 14428 const int32_t src0 = 14429 _src[0][_i].i32; 14430 14431 int32_t dst = -src0; 14432 14433 _dst_val[_i].i32 = dst; 14434 } 14435 14436 break; 14437 } 14438 case 64: { 14439 14440 14441 14442 14443 for (unsigned _i = 0; _i < num_components; _i++) { 14444 const int64_t src0 = 14445 _src[0][_i].i64; 14446 14447 int64_t dst = -src0; 14448 14449 _dst_val[_i].i64 = dst; 14450 } 14451 14452 break; 14453 } 14454 14455 default: 14456 unreachable("unknown bit width"); 14457 } 14458} 14459static void 14460evaluate_inot(nir_const_value *_dst_val, 14461 MAYBE_UNUSED unsigned num_components, 14462 unsigned bit_size, 14463 MAYBE_UNUSED nir_const_value **_src) 14464{ 14465 switch (bit_size) { 14466 case 1: { 14467 14468 14469 14470 14471 for (unsigned _i = 0; _i < num_components; _i++) { 14472 /* 1-bit integers use a 0/-1 convention */ 14473 const int1_t src0 = -(int1_t)_src[0][_i].b; 14474 14475 int1_t dst = ~src0; 14476 14477 /* 1-bit integers get truncated */ 14478 _dst_val[_i].b = dst & 1; 14479 } 14480 14481 break; 14482 } 14483 case 8: { 14484 14485 14486 14487 14488 for (unsigned _i = 0; _i < num_components; _i++) { 14489 const int8_t src0 = 14490 _src[0][_i].i8; 14491 14492 int8_t dst = ~src0; 14493 14494 _dst_val[_i].i8 = dst; 14495 } 14496 14497 break; 14498 } 14499 case 16: { 14500 14501 14502 14503 14504 for (unsigned _i = 0; _i < num_components; _i++) { 14505 const int16_t src0 = 14506 _src[0][_i].i16; 14507 14508 int16_t dst = ~src0; 14509 14510 _dst_val[_i].i16 = dst; 14511 } 14512 14513 break; 14514 } 14515 case 32: { 14516 14517 14518 14519 14520 for (unsigned _i = 0; _i < num_components; _i++) { 14521 const int32_t src0 = 14522 _src[0][_i].i32; 14523 14524 int32_t dst = ~src0; 14525 14526 _dst_val[_i].i32 = dst; 14527 } 14528 14529 break; 14530 } 14531 case 64: { 14532 14533 14534 14535 14536 for (unsigned _i = 0; _i < num_components; _i++) { 14537 const int64_t src0 = 14538 _src[0][_i].i64; 14539 14540 int64_t dst = ~src0; 14541 14542 _dst_val[_i].i64 = dst; 14543 } 14544 14545 break; 14546 } 14547 14548 default: 14549 unreachable("unknown bit width"); 14550 } 14551} 14552static void 14553evaluate_ior(nir_const_value *_dst_val, 14554 MAYBE_UNUSED unsigned num_components, 14555 unsigned bit_size, 14556 MAYBE_UNUSED nir_const_value **_src) 14557{ 14558 switch (bit_size) { 14559 case 1: { 14560 14561 14562 14563 14564 for (unsigned _i = 0; _i < num_components; _i++) { 14565 const uint1_t src0 = 14566 _src[0][_i].b; 14567 const uint1_t src1 = 14568 _src[1][_i].b; 14569 14570 uint1_t dst = src0 | src1; 14571 14572 /* 1-bit integers get truncated */ 14573 _dst_val[_i].b = dst & 1; 14574 } 14575 14576 break; 14577 } 14578 case 8: { 14579 14580 14581 14582 14583 for (unsigned _i = 0; _i < num_components; _i++) { 14584 const uint8_t src0 = 14585 _src[0][_i].u8; 14586 const uint8_t src1 = 14587 _src[1][_i].u8; 14588 14589 uint8_t dst = src0 | src1; 14590 14591 _dst_val[_i].u8 = dst; 14592 } 14593 14594 break; 14595 } 14596 case 16: { 14597 14598 14599 14600 14601 for (unsigned _i = 0; _i < num_components; _i++) { 14602 const uint16_t src0 = 14603 _src[0][_i].u16; 14604 const uint16_t src1 = 14605 _src[1][_i].u16; 14606 14607 uint16_t dst = src0 | src1; 14608 14609 _dst_val[_i].u16 = dst; 14610 } 14611 14612 break; 14613 } 14614 case 32: { 14615 14616 14617 14618 14619 for (unsigned _i = 0; _i < num_components; _i++) { 14620 const uint32_t src0 = 14621 _src[0][_i].u32; 14622 const uint32_t src1 = 14623 _src[1][_i].u32; 14624 14625 uint32_t dst = src0 | src1; 14626 14627 _dst_val[_i].u32 = dst; 14628 } 14629 14630 break; 14631 } 14632 case 64: { 14633 14634 14635 14636 14637 for (unsigned _i = 0; _i < num_components; _i++) { 14638 const uint64_t src0 = 14639 _src[0][_i].u64; 14640 const uint64_t src1 = 14641 _src[1][_i].u64; 14642 14643 uint64_t dst = src0 | src1; 14644 14645 _dst_val[_i].u64 = dst; 14646 } 14647 14648 break; 14649 } 14650 14651 default: 14652 unreachable("unknown bit width"); 14653 } 14654} 14655static void 14656evaluate_irem(nir_const_value *_dst_val, 14657 MAYBE_UNUSED unsigned num_components, 14658 unsigned bit_size, 14659 MAYBE_UNUSED nir_const_value **_src) 14660{ 14661 switch (bit_size) { 14662 case 1: { 14663 14664 14665 14666 14667 for (unsigned _i = 0; _i < num_components; _i++) { 14668 /* 1-bit integers use a 0/-1 convention */ 14669 const int1_t src0 = -(int1_t)_src[0][_i].b; 14670 /* 1-bit integers use a 0/-1 convention */ 14671 const int1_t src1 = -(int1_t)_src[1][_i].b; 14672 14673 int1_t dst = src1 == 0 ? 0 : src0 % src1; 14674 14675 /* 1-bit integers get truncated */ 14676 _dst_val[_i].b = dst & 1; 14677 } 14678 14679 break; 14680 } 14681 case 8: { 14682 14683 14684 14685 14686 for (unsigned _i = 0; _i < num_components; _i++) { 14687 const int8_t src0 = 14688 _src[0][_i].i8; 14689 const int8_t src1 = 14690 _src[1][_i].i8; 14691 14692 int8_t dst = src1 == 0 ? 0 : src0 % src1; 14693 14694 _dst_val[_i].i8 = dst; 14695 } 14696 14697 break; 14698 } 14699 case 16: { 14700 14701 14702 14703 14704 for (unsigned _i = 0; _i < num_components; _i++) { 14705 const int16_t src0 = 14706 _src[0][_i].i16; 14707 const int16_t src1 = 14708 _src[1][_i].i16; 14709 14710 int16_t dst = src1 == 0 ? 0 : src0 % src1; 14711 14712 _dst_val[_i].i16 = dst; 14713 } 14714 14715 break; 14716 } 14717 case 32: { 14718 14719 14720 14721 14722 for (unsigned _i = 0; _i < num_components; _i++) { 14723 const int32_t src0 = 14724 _src[0][_i].i32; 14725 const int32_t src1 = 14726 _src[1][_i].i32; 14727 14728 int32_t dst = src1 == 0 ? 0 : src0 % src1; 14729 14730 _dst_val[_i].i32 = dst; 14731 } 14732 14733 break; 14734 } 14735 case 64: { 14736 14737 14738 14739 14740 for (unsigned _i = 0; _i < num_components; _i++) { 14741 const int64_t src0 = 14742 _src[0][_i].i64; 14743 const int64_t src1 = 14744 _src[1][_i].i64; 14745 14746 int64_t dst = src1 == 0 ? 0 : src0 % src1; 14747 14748 _dst_val[_i].i64 = dst; 14749 } 14750 14751 break; 14752 } 14753 14754 default: 14755 unreachable("unknown bit width"); 14756 } 14757} 14758static void 14759evaluate_irhadd(nir_const_value *_dst_val, 14760 MAYBE_UNUSED unsigned num_components, 14761 unsigned bit_size, 14762 MAYBE_UNUSED nir_const_value **_src) 14763{ 14764 switch (bit_size) { 14765 case 1: { 14766 14767 14768 14769 14770 for (unsigned _i = 0; _i < num_components; _i++) { 14771 /* 1-bit integers use a 0/-1 convention */ 14772 const int1_t src0 = -(int1_t)_src[0][_i].b; 14773 /* 1-bit integers use a 0/-1 convention */ 14774 const int1_t src1 = -(int1_t)_src[1][_i].b; 14775 14776 int1_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 14777 14778 /* 1-bit integers get truncated */ 14779 _dst_val[_i].b = dst & 1; 14780 } 14781 14782 break; 14783 } 14784 case 8: { 14785 14786 14787 14788 14789 for (unsigned _i = 0; _i < num_components; _i++) { 14790 const int8_t src0 = 14791 _src[0][_i].i8; 14792 const int8_t src1 = 14793 _src[1][_i].i8; 14794 14795 int8_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 14796 14797 _dst_val[_i].i8 = dst; 14798 } 14799 14800 break; 14801 } 14802 case 16: { 14803 14804 14805 14806 14807 for (unsigned _i = 0; _i < num_components; _i++) { 14808 const int16_t src0 = 14809 _src[0][_i].i16; 14810 const int16_t src1 = 14811 _src[1][_i].i16; 14812 14813 int16_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 14814 14815 _dst_val[_i].i16 = dst; 14816 } 14817 14818 break; 14819 } 14820 case 32: { 14821 14822 14823 14824 14825 for (unsigned _i = 0; _i < num_components; _i++) { 14826 const int32_t src0 = 14827 _src[0][_i].i32; 14828 const int32_t src1 = 14829 _src[1][_i].i32; 14830 14831 int32_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 14832 14833 _dst_val[_i].i32 = dst; 14834 } 14835 14836 break; 14837 } 14838 case 64: { 14839 14840 14841 14842 14843 for (unsigned _i = 0; _i < num_components; _i++) { 14844 const int64_t src0 = 14845 _src[0][_i].i64; 14846 const int64_t src1 = 14847 _src[1][_i].i64; 14848 14849 int64_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 14850 14851 _dst_val[_i].i64 = dst; 14852 } 14853 14854 break; 14855 } 14856 14857 default: 14858 unreachable("unknown bit width"); 14859 } 14860} 14861static void 14862evaluate_ishl(nir_const_value *_dst_val, 14863 MAYBE_UNUSED unsigned num_components, 14864 unsigned bit_size, 14865 MAYBE_UNUSED nir_const_value **_src) 14866{ 14867 switch (bit_size) { 14868 case 1: { 14869 14870 14871 14872 14873 for (unsigned _i = 0; _i < num_components; _i++) { 14874 /* 1-bit integers use a 0/-1 convention */ 14875 const int1_t src0 = -(int1_t)_src[0][_i].b; 14876 const uint32_t src1 = 14877 _src[1][_i].u32; 14878 14879 int1_t dst = src0 << (src1 & (sizeof(src0) * 8 - 1)); 14880 14881 /* 1-bit integers get truncated */ 14882 _dst_val[_i].b = dst & 1; 14883 } 14884 14885 break; 14886 } 14887 case 8: { 14888 14889 14890 14891 14892 for (unsigned _i = 0; _i < num_components; _i++) { 14893 const int8_t src0 = 14894 _src[0][_i].i8; 14895 const uint32_t src1 = 14896 _src[1][_i].u32; 14897 14898 int8_t dst = src0 << (src1 & (sizeof(src0) * 8 - 1)); 14899 14900 _dst_val[_i].i8 = dst; 14901 } 14902 14903 break; 14904 } 14905 case 16: { 14906 14907 14908 14909 14910 for (unsigned _i = 0; _i < num_components; _i++) { 14911 const int16_t src0 = 14912 _src[0][_i].i16; 14913 const uint32_t src1 = 14914 _src[1][_i].u32; 14915 14916 int16_t dst = src0 << (src1 & (sizeof(src0) * 8 - 1)); 14917 14918 _dst_val[_i].i16 = dst; 14919 } 14920 14921 break; 14922 } 14923 case 32: { 14924 14925 14926 14927 14928 for (unsigned _i = 0; _i < num_components; _i++) { 14929 const int32_t src0 = 14930 _src[0][_i].i32; 14931 const uint32_t src1 = 14932 _src[1][_i].u32; 14933 14934 int32_t dst = src0 << (src1 & (sizeof(src0) * 8 - 1)); 14935 14936 _dst_val[_i].i32 = dst; 14937 } 14938 14939 break; 14940 } 14941 case 64: { 14942 14943 14944 14945 14946 for (unsigned _i = 0; _i < num_components; _i++) { 14947 const int64_t src0 = 14948 _src[0][_i].i64; 14949 const uint32_t src1 = 14950 _src[1][_i].u32; 14951 14952 int64_t dst = src0 << (src1 & (sizeof(src0) * 8 - 1)); 14953 14954 _dst_val[_i].i64 = dst; 14955 } 14956 14957 break; 14958 } 14959 14960 default: 14961 unreachable("unknown bit width"); 14962 } 14963} 14964static void 14965evaluate_ishr(nir_const_value *_dst_val, 14966 MAYBE_UNUSED unsigned num_components, 14967 unsigned bit_size, 14968 MAYBE_UNUSED nir_const_value **_src) 14969{ 14970 switch (bit_size) { 14971 case 1: { 14972 14973 14974 14975 14976 for (unsigned _i = 0; _i < num_components; _i++) { 14977 /* 1-bit integers use a 0/-1 convention */ 14978 const int1_t src0 = -(int1_t)_src[0][_i].b; 14979 const uint32_t src1 = 14980 _src[1][_i].u32; 14981 14982 int1_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 14983 14984 /* 1-bit integers get truncated */ 14985 _dst_val[_i].b = dst & 1; 14986 } 14987 14988 break; 14989 } 14990 case 8: { 14991 14992 14993 14994 14995 for (unsigned _i = 0; _i < num_components; _i++) { 14996 const int8_t src0 = 14997 _src[0][_i].i8; 14998 const uint32_t src1 = 14999 _src[1][_i].u32; 15000 15001 int8_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 15002 15003 _dst_val[_i].i8 = dst; 15004 } 15005 15006 break; 15007 } 15008 case 16: { 15009 15010 15011 15012 15013 for (unsigned _i = 0; _i < num_components; _i++) { 15014 const int16_t src0 = 15015 _src[0][_i].i16; 15016 const uint32_t src1 = 15017 _src[1][_i].u32; 15018 15019 int16_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 15020 15021 _dst_val[_i].i16 = dst; 15022 } 15023 15024 break; 15025 } 15026 case 32: { 15027 15028 15029 15030 15031 for (unsigned _i = 0; _i < num_components; _i++) { 15032 const int32_t src0 = 15033 _src[0][_i].i32; 15034 const uint32_t src1 = 15035 _src[1][_i].u32; 15036 15037 int32_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 15038 15039 _dst_val[_i].i32 = dst; 15040 } 15041 15042 break; 15043 } 15044 case 64: { 15045 15046 15047 15048 15049 for (unsigned _i = 0; _i < num_components; _i++) { 15050 const int64_t src0 = 15051 _src[0][_i].i64; 15052 const uint32_t src1 = 15053 _src[1][_i].u32; 15054 15055 int64_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 15056 15057 _dst_val[_i].i64 = dst; 15058 } 15059 15060 break; 15061 } 15062 15063 default: 15064 unreachable("unknown bit width"); 15065 } 15066} 15067static void 15068evaluate_isign(nir_const_value *_dst_val, 15069 MAYBE_UNUSED unsigned num_components, 15070 unsigned bit_size, 15071 MAYBE_UNUSED nir_const_value **_src) 15072{ 15073 switch (bit_size) { 15074 case 1: { 15075 15076 15077 15078 15079 for (unsigned _i = 0; _i < num_components; _i++) { 15080 /* 1-bit integers use a 0/-1 convention */ 15081 const int1_t src0 = -(int1_t)_src[0][_i].b; 15082 15083 int1_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); 15084 15085 /* 1-bit integers get truncated */ 15086 _dst_val[_i].b = dst & 1; 15087 } 15088 15089 break; 15090 } 15091 case 8: { 15092 15093 15094 15095 15096 for (unsigned _i = 0; _i < num_components; _i++) { 15097 const int8_t src0 = 15098 _src[0][_i].i8; 15099 15100 int8_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); 15101 15102 _dst_val[_i].i8 = dst; 15103 } 15104 15105 break; 15106 } 15107 case 16: { 15108 15109 15110 15111 15112 for (unsigned _i = 0; _i < num_components; _i++) { 15113 const int16_t src0 = 15114 _src[0][_i].i16; 15115 15116 int16_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); 15117 15118 _dst_val[_i].i16 = dst; 15119 } 15120 15121 break; 15122 } 15123 case 32: { 15124 15125 15126 15127 15128 for (unsigned _i = 0; _i < num_components; _i++) { 15129 const int32_t src0 = 15130 _src[0][_i].i32; 15131 15132 int32_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); 15133 15134 _dst_val[_i].i32 = dst; 15135 } 15136 15137 break; 15138 } 15139 case 64: { 15140 15141 15142 15143 15144 for (unsigned _i = 0; _i < num_components; _i++) { 15145 const int64_t src0 = 15146 _src[0][_i].i64; 15147 15148 int64_t dst = (src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1); 15149 15150 _dst_val[_i].i64 = dst; 15151 } 15152 15153 break; 15154 } 15155 15156 default: 15157 unreachable("unknown bit width"); 15158 } 15159} 15160static void 15161evaluate_isub(nir_const_value *_dst_val, 15162 MAYBE_UNUSED unsigned num_components, 15163 unsigned bit_size, 15164 MAYBE_UNUSED nir_const_value **_src) 15165{ 15166 switch (bit_size) { 15167 case 1: { 15168 15169 15170 15171 15172 for (unsigned _i = 0; _i < num_components; _i++) { 15173 /* 1-bit integers use a 0/-1 convention */ 15174 const int1_t src0 = -(int1_t)_src[0][_i].b; 15175 /* 1-bit integers use a 0/-1 convention */ 15176 const int1_t src1 = -(int1_t)_src[1][_i].b; 15177 15178 int1_t dst = src0 - src1; 15179 15180 /* 1-bit integers get truncated */ 15181 _dst_val[_i].b = dst & 1; 15182 } 15183 15184 break; 15185 } 15186 case 8: { 15187 15188 15189 15190 15191 for (unsigned _i = 0; _i < num_components; _i++) { 15192 const int8_t src0 = 15193 _src[0][_i].i8; 15194 const int8_t src1 = 15195 _src[1][_i].i8; 15196 15197 int8_t dst = src0 - src1; 15198 15199 _dst_val[_i].i8 = dst; 15200 } 15201 15202 break; 15203 } 15204 case 16: { 15205 15206 15207 15208 15209 for (unsigned _i = 0; _i < num_components; _i++) { 15210 const int16_t src0 = 15211 _src[0][_i].i16; 15212 const int16_t src1 = 15213 _src[1][_i].i16; 15214 15215 int16_t dst = src0 - src1; 15216 15217 _dst_val[_i].i16 = dst; 15218 } 15219 15220 break; 15221 } 15222 case 32: { 15223 15224 15225 15226 15227 for (unsigned _i = 0; _i < num_components; _i++) { 15228 const int32_t src0 = 15229 _src[0][_i].i32; 15230 const int32_t src1 = 15231 _src[1][_i].i32; 15232 15233 int32_t dst = src0 - src1; 15234 15235 _dst_val[_i].i32 = dst; 15236 } 15237 15238 break; 15239 } 15240 case 64: { 15241 15242 15243 15244 15245 for (unsigned _i = 0; _i < num_components; _i++) { 15246 const int64_t src0 = 15247 _src[0][_i].i64; 15248 const int64_t src1 = 15249 _src[1][_i].i64; 15250 15251 int64_t dst = src0 - src1; 15252 15253 _dst_val[_i].i64 = dst; 15254 } 15255 15256 break; 15257 } 15258 15259 default: 15260 unreachable("unknown bit width"); 15261 } 15262} 15263static void 15264evaluate_isub_sat(nir_const_value *_dst_val, 15265 MAYBE_UNUSED unsigned num_components, 15266 unsigned bit_size, 15267 MAYBE_UNUSED nir_const_value **_src) 15268{ 15269 switch (bit_size) { 15270 case 1: { 15271 15272 15273 15274 15275 for (unsigned _i = 0; _i < num_components; _i++) { 15276 /* 1-bit integers use a 0/-1 convention */ 15277 const int1_t src0 = -(int1_t)_src[0][_i].b; 15278 /* 1-bit integers use a 0/-1 convention */ 15279 const int1_t src1 = -(int1_t)_src[1][_i].b; 15280 15281 int1_t dst = 15282 src1 < 0 ? 15283 (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : 15284 (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) 15285; 15286 15287 /* 1-bit integers get truncated */ 15288 _dst_val[_i].b = dst & 1; 15289 } 15290 15291 break; 15292 } 15293 case 8: { 15294 15295 15296 15297 15298 for (unsigned _i = 0; _i < num_components; _i++) { 15299 const int8_t src0 = 15300 _src[0][_i].i8; 15301 const int8_t src1 = 15302 _src[1][_i].i8; 15303 15304 int8_t dst = 15305 src1 < 0 ? 15306 (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : 15307 (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) 15308; 15309 15310 _dst_val[_i].i8 = dst; 15311 } 15312 15313 break; 15314 } 15315 case 16: { 15316 15317 15318 15319 15320 for (unsigned _i = 0; _i < num_components; _i++) { 15321 const int16_t src0 = 15322 _src[0][_i].i16; 15323 const int16_t src1 = 15324 _src[1][_i].i16; 15325 15326 int16_t dst = 15327 src1 < 0 ? 15328 (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : 15329 (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) 15330; 15331 15332 _dst_val[_i].i16 = dst; 15333 } 15334 15335 break; 15336 } 15337 case 32: { 15338 15339 15340 15341 15342 for (unsigned _i = 0; _i < num_components; _i++) { 15343 const int32_t src0 = 15344 _src[0][_i].i32; 15345 const int32_t src1 = 15346 _src[1][_i].i32; 15347 15348 int32_t dst = 15349 src1 < 0 ? 15350 (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : 15351 (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) 15352; 15353 15354 _dst_val[_i].i32 = dst; 15355 } 15356 15357 break; 15358 } 15359 case 64: { 15360 15361 15362 15363 15364 for (unsigned _i = 0; _i < num_components; _i++) { 15365 const int64_t src0 = 15366 _src[0][_i].i64; 15367 const int64_t src1 = 15368 _src[1][_i].i64; 15369 15370 int64_t dst = 15371 src1 < 0 ? 15372 (src0 - src1 < src0 ? (1ull << (bit_size - 1)) - 1 : src0 - src1) : 15373 (src0 < src0 - src1 ? (1ull << (bit_size - 1)) : src0 - src1) 15374; 15375 15376 _dst_val[_i].i64 = dst; 15377 } 15378 15379 break; 15380 } 15381 15382 default: 15383 unreachable("unknown bit width"); 15384 } 15385} 15386static void 15387evaluate_ixor(nir_const_value *_dst_val, 15388 MAYBE_UNUSED unsigned num_components, 15389 unsigned bit_size, 15390 MAYBE_UNUSED nir_const_value **_src) 15391{ 15392 switch (bit_size) { 15393 case 1: { 15394 15395 15396 15397 15398 for (unsigned _i = 0; _i < num_components; _i++) { 15399 const uint1_t src0 = 15400 _src[0][_i].b; 15401 const uint1_t src1 = 15402 _src[1][_i].b; 15403 15404 uint1_t dst = src0 ^ src1; 15405 15406 /* 1-bit integers get truncated */ 15407 _dst_val[_i].b = dst & 1; 15408 } 15409 15410 break; 15411 } 15412 case 8: { 15413 15414 15415 15416 15417 for (unsigned _i = 0; _i < num_components; _i++) { 15418 const uint8_t src0 = 15419 _src[0][_i].u8; 15420 const uint8_t src1 = 15421 _src[1][_i].u8; 15422 15423 uint8_t dst = src0 ^ src1; 15424 15425 _dst_val[_i].u8 = dst; 15426 } 15427 15428 break; 15429 } 15430 case 16: { 15431 15432 15433 15434 15435 for (unsigned _i = 0; _i < num_components; _i++) { 15436 const uint16_t src0 = 15437 _src[0][_i].u16; 15438 const uint16_t src1 = 15439 _src[1][_i].u16; 15440 15441 uint16_t dst = src0 ^ src1; 15442 15443 _dst_val[_i].u16 = dst; 15444 } 15445 15446 break; 15447 } 15448 case 32: { 15449 15450 15451 15452 15453 for (unsigned _i = 0; _i < num_components; _i++) { 15454 const uint32_t src0 = 15455 _src[0][_i].u32; 15456 const uint32_t src1 = 15457 _src[1][_i].u32; 15458 15459 uint32_t dst = src0 ^ src1; 15460 15461 _dst_val[_i].u32 = dst; 15462 } 15463 15464 break; 15465 } 15466 case 64: { 15467 15468 15469 15470 15471 for (unsigned _i = 0; _i < num_components; _i++) { 15472 const uint64_t src0 = 15473 _src[0][_i].u64; 15474 const uint64_t src1 = 15475 _src[1][_i].u64; 15476 15477 uint64_t dst = src0 ^ src1; 15478 15479 _dst_val[_i].u64 = dst; 15480 } 15481 15482 break; 15483 } 15484 15485 default: 15486 unreachable("unknown bit width"); 15487 } 15488} 15489static void 15490evaluate_ldexp(nir_const_value *_dst_val, 15491 MAYBE_UNUSED unsigned num_components, 15492 unsigned bit_size, 15493 MAYBE_UNUSED nir_const_value **_src) 15494{ 15495 switch (bit_size) { 15496 case 16: { 15497 15498 15499 15500 15501 for (unsigned _i = 0; _i < num_components; _i++) { 15502 const float src0 = 15503 _mesa_half_to_float(_src[0][_i].u16); 15504 const int32_t src1 = 15505 _src[1][_i].i32; 15506 15507 float16_t dst; 15508 15509 15510dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); 15511/* flush denormals to zero. */ 15512if (!isnormal(dst)) 15513 dst = copysignf(0.0f, src0); 15514 15515 15516 _dst_val[_i].u16 = _mesa_float_to_half(dst); 15517 } 15518 15519 break; 15520 } 15521 case 32: { 15522 15523 15524 15525 15526 for (unsigned _i = 0; _i < num_components; _i++) { 15527 const float32_t src0 = 15528 _src[0][_i].f32; 15529 const int32_t src1 = 15530 _src[1][_i].i32; 15531 15532 float32_t dst; 15533 15534 15535dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); 15536/* flush denormals to zero. */ 15537if (!isnormal(dst)) 15538 dst = copysignf(0.0f, src0); 15539 15540 15541 _dst_val[_i].f32 = dst; 15542 } 15543 15544 break; 15545 } 15546 case 64: { 15547 15548 15549 15550 15551 for (unsigned _i = 0; _i < num_components; _i++) { 15552 const float64_t src0 = 15553 _src[0][_i].f64; 15554 const int32_t src1 = 15555 _src[1][_i].i32; 15556 15557 float64_t dst; 15558 15559 15560dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1); 15561/* flush denormals to zero. */ 15562if (!isnormal(dst)) 15563 dst = copysignf(0.0f, src0); 15564 15565 15566 _dst_val[_i].f64 = dst; 15567 } 15568 15569 break; 15570 } 15571 15572 default: 15573 unreachable("unknown bit width"); 15574 } 15575} 15576static void 15577evaluate_pack_32_2x16(nir_const_value *_dst_val, 15578 MAYBE_UNUSED unsigned num_components, 15579 UNUSED unsigned bit_size, 15580 MAYBE_UNUSED nir_const_value **_src) 15581{ 15582 15583 15584 15585 15586 const struct uint16_vec src0 = { 15587 _src[0][0].u16, 15588 _src[0][1].u16, 15589 0, 15590 0, 15591 }; 15592 15593 struct uint32_vec dst; 15594 15595 dst.x = src0.x | ((uint32_t)src0.y << 16); 15596 15597 _dst_val[0].u32 = dst.x; 15598 15599} 15600static void 15601evaluate_pack_32_2x16_split(nir_const_value *_dst_val, 15602 MAYBE_UNUSED unsigned num_components, 15603 UNUSED unsigned bit_size, 15604 MAYBE_UNUSED nir_const_value **_src) 15605{ 15606 15607 15608 15609 15610 for (unsigned _i = 0; _i < num_components; _i++) { 15611 const uint16_t src0 = 15612 _src[0][_i].u16; 15613 const uint16_t src1 = 15614 _src[1][_i].u16; 15615 15616 uint32_t dst = src0 | ((uint32_t)src1 << 16); 15617 15618 _dst_val[_i].u32 = dst; 15619 } 15620 15621} 15622static void 15623evaluate_pack_64_2x32(nir_const_value *_dst_val, 15624 MAYBE_UNUSED unsigned num_components, 15625 UNUSED unsigned bit_size, 15626 MAYBE_UNUSED nir_const_value **_src) 15627{ 15628 15629 15630 15631 15632 const struct uint32_vec src0 = { 15633 _src[0][0].u32, 15634 _src[0][1].u32, 15635 0, 15636 0, 15637 }; 15638 15639 struct uint64_vec dst; 15640 15641 dst.x = src0.x | ((uint64_t)src0.y << 32); 15642 15643 _dst_val[0].u64 = dst.x; 15644 15645} 15646static void 15647evaluate_pack_64_2x32_split(nir_const_value *_dst_val, 15648 MAYBE_UNUSED unsigned num_components, 15649 UNUSED unsigned bit_size, 15650 MAYBE_UNUSED nir_const_value **_src) 15651{ 15652 15653 15654 15655 15656 for (unsigned _i = 0; _i < num_components; _i++) { 15657 const uint32_t src0 = 15658 _src[0][_i].u32; 15659 const uint32_t src1 = 15660 _src[1][_i].u32; 15661 15662 uint64_t dst = src0 | ((uint64_t)src1 << 32); 15663 15664 _dst_val[_i].u64 = dst; 15665 } 15666 15667} 15668static void 15669evaluate_pack_64_4x16(nir_const_value *_dst_val, 15670 MAYBE_UNUSED unsigned num_components, 15671 UNUSED unsigned bit_size, 15672 MAYBE_UNUSED nir_const_value **_src) 15673{ 15674 15675 15676 15677 15678 const struct uint16_vec src0 = { 15679 _src[0][0].u16, 15680 _src[0][1].u16, 15681 _src[0][2].u16, 15682 _src[0][3].u16, 15683 }; 15684 15685 struct uint64_vec dst; 15686 15687 dst.x = src0.x | ((uint64_t)src0.y << 16) | ((uint64_t)src0.z << 32) | ((uint64_t)src0.w << 48); 15688 15689 _dst_val[0].u64 = dst.x; 15690 15691} 15692static void 15693evaluate_pack_half_2x16(nir_const_value *_dst_val, 15694 MAYBE_UNUSED unsigned num_components, 15695 UNUSED unsigned bit_size, 15696 MAYBE_UNUSED nir_const_value **_src) 15697{ 15698 15699 15700 15701 15702 const struct float32_vec src0 = { 15703 _src[0][0].f32, 15704 _src[0][1].f32, 15705 0, 15706 0, 15707 }; 15708 15709 struct uint32_vec dst; 15710 15711 15712dst.x = (uint32_t) pack_half_1x16(src0.x); 15713dst.x |= ((uint32_t) pack_half_1x16(src0.y)) << 16; 15714 15715 15716 _dst_val[0].u32 = dst.x; 15717 15718} 15719static void 15720evaluate_pack_half_2x16_split(nir_const_value *_dst_val, 15721 MAYBE_UNUSED unsigned num_components, 15722 UNUSED unsigned bit_size, 15723 MAYBE_UNUSED nir_const_value **_src) 15724{ 15725 15726 15727 15728 15729 const struct float32_vec src0 = { 15730 _src[0][0].f32, 15731 0, 15732 0, 15733 0, 15734 }; 15735 15736 const struct float32_vec src1 = { 15737 _src[1][0].f32, 15738 0, 15739 0, 15740 0, 15741 }; 15742 15743 struct uint32_vec dst; 15744 15745 dst.x = dst.y = dst.z = dst.w = pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16); 15746 15747 _dst_val[0].u32 = dst.x; 15748 15749} 15750static void 15751evaluate_pack_snorm_2x16(nir_const_value *_dst_val, 15752 MAYBE_UNUSED unsigned num_components, 15753 UNUSED unsigned bit_size, 15754 MAYBE_UNUSED nir_const_value **_src) 15755{ 15756 15757 15758 15759 15760 const struct float32_vec src0 = { 15761 _src[0][0].f32, 15762 _src[0][1].f32, 15763 0, 15764 0, 15765 }; 15766 15767 struct uint32_vec dst; 15768 15769 15770dst.x = (uint32_t) pack_snorm_1x16(src0.x); 15771dst.x |= ((uint32_t) pack_snorm_1x16(src0.y)) << 16; 15772 15773 15774 _dst_val[0].u32 = dst.x; 15775 15776} 15777static void 15778evaluate_pack_snorm_4x8(nir_const_value *_dst_val, 15779 MAYBE_UNUSED unsigned num_components, 15780 UNUSED unsigned bit_size, 15781 MAYBE_UNUSED nir_const_value **_src) 15782{ 15783 15784 15785 15786 15787 const struct float32_vec src0 = { 15788 _src[0][0].f32, 15789 _src[0][1].f32, 15790 _src[0][2].f32, 15791 _src[0][3].f32, 15792 }; 15793 15794 struct uint32_vec dst; 15795 15796 15797dst.x = (uint32_t) pack_snorm_1x8(src0.x); 15798dst.x |= ((uint32_t) pack_snorm_1x8(src0.y)) << 8; 15799dst.x |= ((uint32_t) pack_snorm_1x8(src0.z)) << 16; 15800dst.x |= ((uint32_t) pack_snorm_1x8(src0.w)) << 24; 15801 15802 15803 _dst_val[0].u32 = dst.x; 15804 15805} 15806static void 15807evaluate_pack_unorm_2x16(nir_const_value *_dst_val, 15808 MAYBE_UNUSED unsigned num_components, 15809 UNUSED unsigned bit_size, 15810 MAYBE_UNUSED nir_const_value **_src) 15811{ 15812 15813 15814 15815 15816 const struct float32_vec src0 = { 15817 _src[0][0].f32, 15818 _src[0][1].f32, 15819 0, 15820 0, 15821 }; 15822 15823 struct uint32_vec dst; 15824 15825 15826dst.x = (uint32_t) pack_unorm_1x16(src0.x); 15827dst.x |= ((uint32_t) pack_unorm_1x16(src0.y)) << 16; 15828 15829 15830 _dst_val[0].u32 = dst.x; 15831 15832} 15833static void 15834evaluate_pack_unorm_4x8(nir_const_value *_dst_val, 15835 MAYBE_UNUSED unsigned num_components, 15836 UNUSED unsigned bit_size, 15837 MAYBE_UNUSED nir_const_value **_src) 15838{ 15839 15840 15841 15842 15843 const struct float32_vec src0 = { 15844 _src[0][0].f32, 15845 _src[0][1].f32, 15846 _src[0][2].f32, 15847 _src[0][3].f32, 15848 }; 15849 15850 struct uint32_vec dst; 15851 15852 15853dst.x = (uint32_t) pack_unorm_1x8(src0.x); 15854dst.x |= ((uint32_t) pack_unorm_1x8(src0.y)) << 8; 15855dst.x |= ((uint32_t) pack_unorm_1x8(src0.z)) << 16; 15856dst.x |= ((uint32_t) pack_unorm_1x8(src0.w)) << 24; 15857 15858 15859 _dst_val[0].u32 = dst.x; 15860 15861} 15862static void 15863evaluate_pack_uvec2_to_uint(nir_const_value *_dst_val, 15864 MAYBE_UNUSED unsigned num_components, 15865 UNUSED unsigned bit_size, 15866 MAYBE_UNUSED nir_const_value **_src) 15867{ 15868 15869 15870 15871 15872 const struct uint32_vec src0 = { 15873 _src[0][0].u32, 15874 _src[0][1].u32, 15875 0, 15876 0, 15877 }; 15878 15879 struct uint32_vec dst; 15880 15881 15882dst.x = (src0.x & 0xffff) | (src0.y << 16); 15883 15884 15885 _dst_val[0].u32 = dst.x; 15886 15887} 15888static void 15889evaluate_pack_uvec4_to_uint(nir_const_value *_dst_val, 15890 MAYBE_UNUSED unsigned num_components, 15891 UNUSED unsigned bit_size, 15892 MAYBE_UNUSED nir_const_value **_src) 15893{ 15894 15895 15896 15897 15898 const struct uint32_vec src0 = { 15899 _src[0][0].u32, 15900 _src[0][1].u32, 15901 _src[0][2].u32, 15902 _src[0][3].u32, 15903 }; 15904 15905 struct uint32_vec dst; 15906 15907 15908dst.x = (src0.x << 0) | 15909 (src0.y << 8) | 15910 (src0.z << 16) | 15911 (src0.w << 24); 15912 15913 15914 _dst_val[0].u32 = dst.x; 15915 15916} 15917static void 15918evaluate_seq(nir_const_value *_dst_val, 15919 MAYBE_UNUSED unsigned num_components, 15920 UNUSED unsigned bit_size, 15921 MAYBE_UNUSED nir_const_value **_src) 15922{ 15923 15924 15925 15926 15927 for (unsigned _i = 0; _i < num_components; _i++) { 15928 const float32_t src0 = 15929 _src[0][_i].f32; 15930 const float32_t src1 = 15931 _src[1][_i].f32; 15932 15933 float32_t dst = (src0 == src1) ? 1.0f : 0.0f; 15934 15935 _dst_val[_i].f32 = dst; 15936 } 15937 15938} 15939static void 15940evaluate_sge(nir_const_value *_dst_val, 15941 MAYBE_UNUSED unsigned num_components, 15942 unsigned bit_size, 15943 MAYBE_UNUSED nir_const_value **_src) 15944{ 15945 switch (bit_size) { 15946 case 16: { 15947 15948 15949 15950 15951 for (unsigned _i = 0; _i < num_components; _i++) { 15952 const float src0 = 15953 _mesa_half_to_float(_src[0][_i].u16); 15954 const float src1 = 15955 _mesa_half_to_float(_src[1][_i].u16); 15956 15957 float16_t dst = (src0 >= src1) ? 1.0f : 0.0f; 15958 15959 _dst_val[_i].u16 = _mesa_float_to_half(dst); 15960 } 15961 15962 break; 15963 } 15964 case 32: { 15965 15966 15967 15968 15969 for (unsigned _i = 0; _i < num_components; _i++) { 15970 const float32_t src0 = 15971 _src[0][_i].f32; 15972 const float32_t src1 = 15973 _src[1][_i].f32; 15974 15975 float32_t dst = (src0 >= src1) ? 1.0f : 0.0f; 15976 15977 _dst_val[_i].f32 = dst; 15978 } 15979 15980 break; 15981 } 15982 case 64: { 15983 15984 15985 15986 15987 for (unsigned _i = 0; _i < num_components; _i++) { 15988 const float64_t src0 = 15989 _src[0][_i].f64; 15990 const float64_t src1 = 15991 _src[1][_i].f64; 15992 15993 float64_t dst = (src0 >= src1) ? 1.0f : 0.0f; 15994 15995 _dst_val[_i].f64 = dst; 15996 } 15997 15998 break; 15999 } 16000 16001 default: 16002 unreachable("unknown bit width"); 16003 } 16004} 16005static void 16006evaluate_slt(nir_const_value *_dst_val, 16007 MAYBE_UNUSED unsigned num_components, 16008 UNUSED unsigned bit_size, 16009 MAYBE_UNUSED nir_const_value **_src) 16010{ 16011 16012 16013 16014 16015 for (unsigned _i = 0; _i < num_components; _i++) { 16016 const float32_t src0 = 16017 _src[0][_i].f32; 16018 const float32_t src1 = 16019 _src[1][_i].f32; 16020 16021 float32_t dst = (src0 < src1) ? 1.0f : 0.0f; 16022 16023 _dst_val[_i].f32 = dst; 16024 } 16025 16026} 16027static void 16028evaluate_sne(nir_const_value *_dst_val, 16029 MAYBE_UNUSED unsigned num_components, 16030 UNUSED unsigned bit_size, 16031 MAYBE_UNUSED nir_const_value **_src) 16032{ 16033 16034 16035 16036 16037 for (unsigned _i = 0; _i < num_components; _i++) { 16038 const float32_t src0 = 16039 _src[0][_i].f32; 16040 const float32_t src1 = 16041 _src[1][_i].f32; 16042 16043 float32_t dst = (src0 != src1) ? 1.0f : 0.0f; 16044 16045 _dst_val[_i].f32 = dst; 16046 } 16047 16048} 16049static void 16050evaluate_u2f16(nir_const_value *_dst_val, 16051 MAYBE_UNUSED unsigned num_components, 16052 unsigned bit_size, 16053 MAYBE_UNUSED nir_const_value **_src) 16054{ 16055 switch (bit_size) { 16056 case 1: { 16057 16058 16059 16060 16061 for (unsigned _i = 0; _i < num_components; _i++) { 16062 const uint1_t src0 = 16063 _src[0][_i].b; 16064 16065 float16_t dst = src0; 16066 16067 _dst_val[_i].u16 = _mesa_float_to_half(dst); 16068 } 16069 16070 break; 16071 } 16072 case 8: { 16073 16074 16075 16076 16077 for (unsigned _i = 0; _i < num_components; _i++) { 16078 const uint8_t src0 = 16079 _src[0][_i].u8; 16080 16081 float16_t dst = src0; 16082 16083 _dst_val[_i].u16 = _mesa_float_to_half(dst); 16084 } 16085 16086 break; 16087 } 16088 case 16: { 16089 16090 16091 16092 16093 for (unsigned _i = 0; _i < num_components; _i++) { 16094 const uint16_t src0 = 16095 _src[0][_i].u16; 16096 16097 float16_t dst = src0; 16098 16099 _dst_val[_i].u16 = _mesa_float_to_half(dst); 16100 } 16101 16102 break; 16103 } 16104 case 32: { 16105 16106 16107 16108 16109 for (unsigned _i = 0; _i < num_components; _i++) { 16110 const uint32_t src0 = 16111 _src[0][_i].u32; 16112 16113 float16_t dst = src0; 16114 16115 _dst_val[_i].u16 = _mesa_float_to_half(dst); 16116 } 16117 16118 break; 16119 } 16120 case 64: { 16121 16122 16123 16124 16125 for (unsigned _i = 0; _i < num_components; _i++) { 16126 const uint64_t src0 = 16127 _src[0][_i].u64; 16128 16129 float16_t dst = src0; 16130 16131 _dst_val[_i].u16 = _mesa_float_to_half(dst); 16132 } 16133 16134 break; 16135 } 16136 16137 default: 16138 unreachable("unknown bit width"); 16139 } 16140} 16141static void 16142evaluate_u2f32(nir_const_value *_dst_val, 16143 MAYBE_UNUSED unsigned num_components, 16144 unsigned bit_size, 16145 MAYBE_UNUSED nir_const_value **_src) 16146{ 16147 switch (bit_size) { 16148 case 1: { 16149 16150 16151 16152 16153 for (unsigned _i = 0; _i < num_components; _i++) { 16154 const uint1_t src0 = 16155 _src[0][_i].b; 16156 16157 float32_t dst = src0; 16158 16159 _dst_val[_i].f32 = dst; 16160 } 16161 16162 break; 16163 } 16164 case 8: { 16165 16166 16167 16168 16169 for (unsigned _i = 0; _i < num_components; _i++) { 16170 const uint8_t src0 = 16171 _src[0][_i].u8; 16172 16173 float32_t dst = src0; 16174 16175 _dst_val[_i].f32 = dst; 16176 } 16177 16178 break; 16179 } 16180 case 16: { 16181 16182 16183 16184 16185 for (unsigned _i = 0; _i < num_components; _i++) { 16186 const uint16_t src0 = 16187 _src[0][_i].u16; 16188 16189 float32_t dst = src0; 16190 16191 _dst_val[_i].f32 = dst; 16192 } 16193 16194 break; 16195 } 16196 case 32: { 16197 16198 16199 16200 16201 for (unsigned _i = 0; _i < num_components; _i++) { 16202 const uint32_t src0 = 16203 _src[0][_i].u32; 16204 16205 float32_t dst = src0; 16206 16207 _dst_val[_i].f32 = dst; 16208 } 16209 16210 break; 16211 } 16212 case 64: { 16213 16214 16215 16216 16217 for (unsigned _i = 0; _i < num_components; _i++) { 16218 const uint64_t src0 = 16219 _src[0][_i].u64; 16220 16221 float32_t dst = src0; 16222 16223 _dst_val[_i].f32 = dst; 16224 } 16225 16226 break; 16227 } 16228 16229 default: 16230 unreachable("unknown bit width"); 16231 } 16232} 16233static void 16234evaluate_u2f64(nir_const_value *_dst_val, 16235 MAYBE_UNUSED unsigned num_components, 16236 unsigned bit_size, 16237 MAYBE_UNUSED nir_const_value **_src) 16238{ 16239 switch (bit_size) { 16240 case 1: { 16241 16242 16243 16244 16245 for (unsigned _i = 0; _i < num_components; _i++) { 16246 const uint1_t src0 = 16247 _src[0][_i].b; 16248 16249 float64_t dst = src0; 16250 16251 _dst_val[_i].f64 = dst; 16252 } 16253 16254 break; 16255 } 16256 case 8: { 16257 16258 16259 16260 16261 for (unsigned _i = 0; _i < num_components; _i++) { 16262 const uint8_t src0 = 16263 _src[0][_i].u8; 16264 16265 float64_t dst = src0; 16266 16267 _dst_val[_i].f64 = dst; 16268 } 16269 16270 break; 16271 } 16272 case 16: { 16273 16274 16275 16276 16277 for (unsigned _i = 0; _i < num_components; _i++) { 16278 const uint16_t src0 = 16279 _src[0][_i].u16; 16280 16281 float64_t dst = src0; 16282 16283 _dst_val[_i].f64 = dst; 16284 } 16285 16286 break; 16287 } 16288 case 32: { 16289 16290 16291 16292 16293 for (unsigned _i = 0; _i < num_components; _i++) { 16294 const uint32_t src0 = 16295 _src[0][_i].u32; 16296 16297 float64_t dst = src0; 16298 16299 _dst_val[_i].f64 = dst; 16300 } 16301 16302 break; 16303 } 16304 case 64: { 16305 16306 16307 16308 16309 for (unsigned _i = 0; _i < num_components; _i++) { 16310 const uint64_t src0 = 16311 _src[0][_i].u64; 16312 16313 float64_t dst = src0; 16314 16315 _dst_val[_i].f64 = dst; 16316 } 16317 16318 break; 16319 } 16320 16321 default: 16322 unreachable("unknown bit width"); 16323 } 16324} 16325static void 16326evaluate_u2u1(nir_const_value *_dst_val, 16327 MAYBE_UNUSED unsigned num_components, 16328 unsigned bit_size, 16329 MAYBE_UNUSED nir_const_value **_src) 16330{ 16331 switch (bit_size) { 16332 case 1: { 16333 16334 16335 16336 16337 for (unsigned _i = 0; _i < num_components; _i++) { 16338 const uint1_t src0 = 16339 _src[0][_i].b; 16340 16341 uint1_t dst = src0; 16342 16343 /* 1-bit integers get truncated */ 16344 _dst_val[_i].b = dst & 1; 16345 } 16346 16347 break; 16348 } 16349 case 8: { 16350 16351 16352 16353 16354 for (unsigned _i = 0; _i < num_components; _i++) { 16355 const uint8_t src0 = 16356 _src[0][_i].u8; 16357 16358 uint1_t dst = src0; 16359 16360 /* 1-bit integers get truncated */ 16361 _dst_val[_i].b = dst & 1; 16362 } 16363 16364 break; 16365 } 16366 case 16: { 16367 16368 16369 16370 16371 for (unsigned _i = 0; _i < num_components; _i++) { 16372 const uint16_t src0 = 16373 _src[0][_i].u16; 16374 16375 uint1_t dst = src0; 16376 16377 /* 1-bit integers get truncated */ 16378 _dst_val[_i].b = dst & 1; 16379 } 16380 16381 break; 16382 } 16383 case 32: { 16384 16385 16386 16387 16388 for (unsigned _i = 0; _i < num_components; _i++) { 16389 const uint32_t src0 = 16390 _src[0][_i].u32; 16391 16392 uint1_t dst = src0; 16393 16394 /* 1-bit integers get truncated */ 16395 _dst_val[_i].b = dst & 1; 16396 } 16397 16398 break; 16399 } 16400 case 64: { 16401 16402 16403 16404 16405 for (unsigned _i = 0; _i < num_components; _i++) { 16406 const uint64_t src0 = 16407 _src[0][_i].u64; 16408 16409 uint1_t dst = src0; 16410 16411 /* 1-bit integers get truncated */ 16412 _dst_val[_i].b = dst & 1; 16413 } 16414 16415 break; 16416 } 16417 16418 default: 16419 unreachable("unknown bit width"); 16420 } 16421} 16422static void 16423evaluate_u2u16(nir_const_value *_dst_val, 16424 MAYBE_UNUSED unsigned num_components, 16425 unsigned bit_size, 16426 MAYBE_UNUSED nir_const_value **_src) 16427{ 16428 switch (bit_size) { 16429 case 1: { 16430 16431 16432 16433 16434 for (unsigned _i = 0; _i < num_components; _i++) { 16435 const uint1_t src0 = 16436 _src[0][_i].b; 16437 16438 uint16_t dst = src0; 16439 16440 _dst_val[_i].u16 = dst; 16441 } 16442 16443 break; 16444 } 16445 case 8: { 16446 16447 16448 16449 16450 for (unsigned _i = 0; _i < num_components; _i++) { 16451 const uint8_t src0 = 16452 _src[0][_i].u8; 16453 16454 uint16_t dst = src0; 16455 16456 _dst_val[_i].u16 = dst; 16457 } 16458 16459 break; 16460 } 16461 case 16: { 16462 16463 16464 16465 16466 for (unsigned _i = 0; _i < num_components; _i++) { 16467 const uint16_t src0 = 16468 _src[0][_i].u16; 16469 16470 uint16_t dst = src0; 16471 16472 _dst_val[_i].u16 = dst; 16473 } 16474 16475 break; 16476 } 16477 case 32: { 16478 16479 16480 16481 16482 for (unsigned _i = 0; _i < num_components; _i++) { 16483 const uint32_t src0 = 16484 _src[0][_i].u32; 16485 16486 uint16_t dst = src0; 16487 16488 _dst_val[_i].u16 = dst; 16489 } 16490 16491 break; 16492 } 16493 case 64: { 16494 16495 16496 16497 16498 for (unsigned _i = 0; _i < num_components; _i++) { 16499 const uint64_t src0 = 16500 _src[0][_i].u64; 16501 16502 uint16_t dst = src0; 16503 16504 _dst_val[_i].u16 = dst; 16505 } 16506 16507 break; 16508 } 16509 16510 default: 16511 unreachable("unknown bit width"); 16512 } 16513} 16514static void 16515evaluate_u2u32(nir_const_value *_dst_val, 16516 MAYBE_UNUSED unsigned num_components, 16517 unsigned bit_size, 16518 MAYBE_UNUSED nir_const_value **_src) 16519{ 16520 switch (bit_size) { 16521 case 1: { 16522 16523 16524 16525 16526 for (unsigned _i = 0; _i < num_components; _i++) { 16527 const uint1_t src0 = 16528 _src[0][_i].b; 16529 16530 uint32_t dst = src0; 16531 16532 _dst_val[_i].u32 = dst; 16533 } 16534 16535 break; 16536 } 16537 case 8: { 16538 16539 16540 16541 16542 for (unsigned _i = 0; _i < num_components; _i++) { 16543 const uint8_t src0 = 16544 _src[0][_i].u8; 16545 16546 uint32_t dst = src0; 16547 16548 _dst_val[_i].u32 = dst; 16549 } 16550 16551 break; 16552 } 16553 case 16: { 16554 16555 16556 16557 16558 for (unsigned _i = 0; _i < num_components; _i++) { 16559 const uint16_t src0 = 16560 _src[0][_i].u16; 16561 16562 uint32_t dst = src0; 16563 16564 _dst_val[_i].u32 = dst; 16565 } 16566 16567 break; 16568 } 16569 case 32: { 16570 16571 16572 16573 16574 for (unsigned _i = 0; _i < num_components; _i++) { 16575 const uint32_t src0 = 16576 _src[0][_i].u32; 16577 16578 uint32_t dst = src0; 16579 16580 _dst_val[_i].u32 = dst; 16581 } 16582 16583 break; 16584 } 16585 case 64: { 16586 16587 16588 16589 16590 for (unsigned _i = 0; _i < num_components; _i++) { 16591 const uint64_t src0 = 16592 _src[0][_i].u64; 16593 16594 uint32_t dst = src0; 16595 16596 _dst_val[_i].u32 = dst; 16597 } 16598 16599 break; 16600 } 16601 16602 default: 16603 unreachable("unknown bit width"); 16604 } 16605} 16606static void 16607evaluate_u2u64(nir_const_value *_dst_val, 16608 MAYBE_UNUSED unsigned num_components, 16609 unsigned bit_size, 16610 MAYBE_UNUSED nir_const_value **_src) 16611{ 16612 switch (bit_size) { 16613 case 1: { 16614 16615 16616 16617 16618 for (unsigned _i = 0; _i < num_components; _i++) { 16619 const uint1_t src0 = 16620 _src[0][_i].b; 16621 16622 uint64_t dst = src0; 16623 16624 _dst_val[_i].u64 = dst; 16625 } 16626 16627 break; 16628 } 16629 case 8: { 16630 16631 16632 16633 16634 for (unsigned _i = 0; _i < num_components; _i++) { 16635 const uint8_t src0 = 16636 _src[0][_i].u8; 16637 16638 uint64_t dst = src0; 16639 16640 _dst_val[_i].u64 = dst; 16641 } 16642 16643 break; 16644 } 16645 case 16: { 16646 16647 16648 16649 16650 for (unsigned _i = 0; _i < num_components; _i++) { 16651 const uint16_t src0 = 16652 _src[0][_i].u16; 16653 16654 uint64_t dst = src0; 16655 16656 _dst_val[_i].u64 = dst; 16657 } 16658 16659 break; 16660 } 16661 case 32: { 16662 16663 16664 16665 16666 for (unsigned _i = 0; _i < num_components; _i++) { 16667 const uint32_t src0 = 16668 _src[0][_i].u32; 16669 16670 uint64_t dst = src0; 16671 16672 _dst_val[_i].u64 = dst; 16673 } 16674 16675 break; 16676 } 16677 case 64: { 16678 16679 16680 16681 16682 for (unsigned _i = 0; _i < num_components; _i++) { 16683 const uint64_t src0 = 16684 _src[0][_i].u64; 16685 16686 uint64_t dst = src0; 16687 16688 _dst_val[_i].u64 = dst; 16689 } 16690 16691 break; 16692 } 16693 16694 default: 16695 unreachable("unknown bit width"); 16696 } 16697} 16698static void 16699evaluate_u2u8(nir_const_value *_dst_val, 16700 MAYBE_UNUSED unsigned num_components, 16701 unsigned bit_size, 16702 MAYBE_UNUSED nir_const_value **_src) 16703{ 16704 switch (bit_size) { 16705 case 1: { 16706 16707 16708 16709 16710 for (unsigned _i = 0; _i < num_components; _i++) { 16711 const uint1_t src0 = 16712 _src[0][_i].b; 16713 16714 uint8_t dst = src0; 16715 16716 _dst_val[_i].u8 = dst; 16717 } 16718 16719 break; 16720 } 16721 case 8: { 16722 16723 16724 16725 16726 for (unsigned _i = 0; _i < num_components; _i++) { 16727 const uint8_t src0 = 16728 _src[0][_i].u8; 16729 16730 uint8_t dst = src0; 16731 16732 _dst_val[_i].u8 = dst; 16733 } 16734 16735 break; 16736 } 16737 case 16: { 16738 16739 16740 16741 16742 for (unsigned _i = 0; _i < num_components; _i++) { 16743 const uint16_t src0 = 16744 _src[0][_i].u16; 16745 16746 uint8_t dst = src0; 16747 16748 _dst_val[_i].u8 = dst; 16749 } 16750 16751 break; 16752 } 16753 case 32: { 16754 16755 16756 16757 16758 for (unsigned _i = 0; _i < num_components; _i++) { 16759 const uint32_t src0 = 16760 _src[0][_i].u32; 16761 16762 uint8_t dst = src0; 16763 16764 _dst_val[_i].u8 = dst; 16765 } 16766 16767 break; 16768 } 16769 case 64: { 16770 16771 16772 16773 16774 for (unsigned _i = 0; _i < num_components; _i++) { 16775 const uint64_t src0 = 16776 _src[0][_i].u64; 16777 16778 uint8_t dst = src0; 16779 16780 _dst_val[_i].u8 = dst; 16781 } 16782 16783 break; 16784 } 16785 16786 default: 16787 unreachable("unknown bit width"); 16788 } 16789} 16790static void 16791evaluate_uadd_carry(nir_const_value *_dst_val, 16792 MAYBE_UNUSED unsigned num_components, 16793 unsigned bit_size, 16794 MAYBE_UNUSED nir_const_value **_src) 16795{ 16796 switch (bit_size) { 16797 case 1: { 16798 16799 16800 16801 16802 for (unsigned _i = 0; _i < num_components; _i++) { 16803 const uint1_t src0 = 16804 _src[0][_i].b; 16805 const uint1_t src1 = 16806 _src[1][_i].b; 16807 16808 uint1_t dst = src0 + src1 < src0; 16809 16810 /* 1-bit integers get truncated */ 16811 _dst_val[_i].b = dst & 1; 16812 } 16813 16814 break; 16815 } 16816 case 8: { 16817 16818 16819 16820 16821 for (unsigned _i = 0; _i < num_components; _i++) { 16822 const uint8_t src0 = 16823 _src[0][_i].u8; 16824 const uint8_t src1 = 16825 _src[1][_i].u8; 16826 16827 uint8_t dst = src0 + src1 < src0; 16828 16829 _dst_val[_i].u8 = dst; 16830 } 16831 16832 break; 16833 } 16834 case 16: { 16835 16836 16837 16838 16839 for (unsigned _i = 0; _i < num_components; _i++) { 16840 const uint16_t src0 = 16841 _src[0][_i].u16; 16842 const uint16_t src1 = 16843 _src[1][_i].u16; 16844 16845 uint16_t dst = src0 + src1 < src0; 16846 16847 _dst_val[_i].u16 = dst; 16848 } 16849 16850 break; 16851 } 16852 case 32: { 16853 16854 16855 16856 16857 for (unsigned _i = 0; _i < num_components; _i++) { 16858 const uint32_t src0 = 16859 _src[0][_i].u32; 16860 const uint32_t src1 = 16861 _src[1][_i].u32; 16862 16863 uint32_t dst = src0 + src1 < src0; 16864 16865 _dst_val[_i].u32 = dst; 16866 } 16867 16868 break; 16869 } 16870 case 64: { 16871 16872 16873 16874 16875 for (unsigned _i = 0; _i < num_components; _i++) { 16876 const uint64_t src0 = 16877 _src[0][_i].u64; 16878 const uint64_t src1 = 16879 _src[1][_i].u64; 16880 16881 uint64_t dst = src0 + src1 < src0; 16882 16883 _dst_val[_i].u64 = dst; 16884 } 16885 16886 break; 16887 } 16888 16889 default: 16890 unreachable("unknown bit width"); 16891 } 16892} 16893static void 16894evaluate_uadd_sat(nir_const_value *_dst_val, 16895 MAYBE_UNUSED unsigned num_components, 16896 unsigned bit_size, 16897 MAYBE_UNUSED nir_const_value **_src) 16898{ 16899 switch (bit_size) { 16900 case 1: { 16901 16902 16903 16904 16905 for (unsigned _i = 0; _i < num_components; _i++) { 16906 const uint1_t src0 = 16907 _src[0][_i].b; 16908 const uint1_t src1 = 16909 _src[1][_i].b; 16910 16911 uint1_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); 16912 16913 /* 1-bit integers get truncated */ 16914 _dst_val[_i].b = dst & 1; 16915 } 16916 16917 break; 16918 } 16919 case 8: { 16920 16921 16922 16923 16924 for (unsigned _i = 0; _i < num_components; _i++) { 16925 const uint8_t src0 = 16926 _src[0][_i].u8; 16927 const uint8_t src1 = 16928 _src[1][_i].u8; 16929 16930 uint8_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); 16931 16932 _dst_val[_i].u8 = dst; 16933 } 16934 16935 break; 16936 } 16937 case 16: { 16938 16939 16940 16941 16942 for (unsigned _i = 0; _i < num_components; _i++) { 16943 const uint16_t src0 = 16944 _src[0][_i].u16; 16945 const uint16_t src1 = 16946 _src[1][_i].u16; 16947 16948 uint16_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); 16949 16950 _dst_val[_i].u16 = dst; 16951 } 16952 16953 break; 16954 } 16955 case 32: { 16956 16957 16958 16959 16960 for (unsigned _i = 0; _i < num_components; _i++) { 16961 const uint32_t src0 = 16962 _src[0][_i].u32; 16963 const uint32_t src1 = 16964 _src[1][_i].u32; 16965 16966 uint32_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); 16967 16968 _dst_val[_i].u32 = dst; 16969 } 16970 16971 break; 16972 } 16973 case 64: { 16974 16975 16976 16977 16978 for (unsigned _i = 0; _i < num_components; _i++) { 16979 const uint64_t src0 = 16980 _src[0][_i].u64; 16981 const uint64_t src1 = 16982 _src[1][_i].u64; 16983 16984 uint64_t dst = (src0 + src1) < src0 ? MAX_UINT_FOR_SIZE(sizeof(src0) * 8) : (src0 + src1); 16985 16986 _dst_val[_i].u64 = dst; 16987 } 16988 16989 break; 16990 } 16991 16992 default: 16993 unreachable("unknown bit width"); 16994 } 16995} 16996static void 16997evaluate_ubfe(nir_const_value *_dst_val, 16998 MAYBE_UNUSED unsigned num_components, 16999 UNUSED unsigned bit_size, 17000 MAYBE_UNUSED nir_const_value **_src) 17001{ 17002 17003 17004 17005 17006 for (unsigned _i = 0; _i < num_components; _i++) { 17007 const uint32_t src0 = 17008 _src[0][_i].u32; 17009 const int32_t src1 = 17010 _src[1][_i].i32; 17011 const int32_t src2 = 17012 _src[2][_i].i32; 17013 17014 uint32_t dst; 17015 17016 17017unsigned base = src0; 17018int offset = src1, bits = src2; 17019if (bits == 0) { 17020 dst = 0; 17021} else if (bits < 0 || offset < 0) { 17022 dst = 0; /* undefined */ 17023} else if (offset + bits < 32) { 17024 dst = (base << (32 - bits - offset)) >> (32 - bits); 17025} else { 17026 dst = base >> offset; 17027} 17028 17029 17030 _dst_val[_i].u32 = dst; 17031 } 17032 17033} 17034static void 17035evaluate_ubitfield_extract(nir_const_value *_dst_val, 17036 MAYBE_UNUSED unsigned num_components, 17037 UNUSED unsigned bit_size, 17038 MAYBE_UNUSED nir_const_value **_src) 17039{ 17040 17041 17042 17043 17044 for (unsigned _i = 0; _i < num_components; _i++) { 17045 const uint32_t src0 = 17046 _src[0][_i].u32; 17047 const int32_t src1 = 17048 _src[1][_i].i32; 17049 const int32_t src2 = 17050 _src[2][_i].i32; 17051 17052 uint32_t dst; 17053 17054 17055unsigned base = src0; 17056int offset = src1, bits = src2; 17057if (bits == 0) { 17058 dst = 0; 17059} else if (bits < 0 || offset < 0 || offset + bits > 32) { 17060 dst = 0; /* undefined per the spec */ 17061} else { 17062 dst = (base >> offset) & ((1ull << bits) - 1); 17063} 17064 17065 17066 _dst_val[_i].u32 = dst; 17067 } 17068 17069} 17070static void 17071evaluate_udiv(nir_const_value *_dst_val, 17072 MAYBE_UNUSED unsigned num_components, 17073 unsigned bit_size, 17074 MAYBE_UNUSED nir_const_value **_src) 17075{ 17076 switch (bit_size) { 17077 case 1: { 17078 17079 17080 17081 17082 for (unsigned _i = 0; _i < num_components; _i++) { 17083 const uint1_t src0 = 17084 _src[0][_i].b; 17085 const uint1_t src1 = 17086 _src[1][_i].b; 17087 17088 uint1_t dst = src1 == 0 ? 0 : (src0 / src1); 17089 17090 /* 1-bit integers get truncated */ 17091 _dst_val[_i].b = dst & 1; 17092 } 17093 17094 break; 17095 } 17096 case 8: { 17097 17098 17099 17100 17101 for (unsigned _i = 0; _i < num_components; _i++) { 17102 const uint8_t src0 = 17103 _src[0][_i].u8; 17104 const uint8_t src1 = 17105 _src[1][_i].u8; 17106 17107 uint8_t dst = src1 == 0 ? 0 : (src0 / src1); 17108 17109 _dst_val[_i].u8 = dst; 17110 } 17111 17112 break; 17113 } 17114 case 16: { 17115 17116 17117 17118 17119 for (unsigned _i = 0; _i < num_components; _i++) { 17120 const uint16_t src0 = 17121 _src[0][_i].u16; 17122 const uint16_t src1 = 17123 _src[1][_i].u16; 17124 17125 uint16_t dst = src1 == 0 ? 0 : (src0 / src1); 17126 17127 _dst_val[_i].u16 = dst; 17128 } 17129 17130 break; 17131 } 17132 case 32: { 17133 17134 17135 17136 17137 for (unsigned _i = 0; _i < num_components; _i++) { 17138 const uint32_t src0 = 17139 _src[0][_i].u32; 17140 const uint32_t src1 = 17141 _src[1][_i].u32; 17142 17143 uint32_t dst = src1 == 0 ? 0 : (src0 / src1); 17144 17145 _dst_val[_i].u32 = dst; 17146 } 17147 17148 break; 17149 } 17150 case 64: { 17151 17152 17153 17154 17155 for (unsigned _i = 0; _i < num_components; _i++) { 17156 const uint64_t src0 = 17157 _src[0][_i].u64; 17158 const uint64_t src1 = 17159 _src[1][_i].u64; 17160 17161 uint64_t dst = src1 == 0 ? 0 : (src0 / src1); 17162 17163 _dst_val[_i].u64 = dst; 17164 } 17165 17166 break; 17167 } 17168 17169 default: 17170 unreachable("unknown bit width"); 17171 } 17172} 17173static void 17174evaluate_ufind_msb(nir_const_value *_dst_val, 17175 MAYBE_UNUSED unsigned num_components, 17176 unsigned bit_size, 17177 MAYBE_UNUSED nir_const_value **_src) 17178{ 17179 switch (bit_size) { 17180 case 1: { 17181 17182 17183 17184 17185 for (unsigned _i = 0; _i < num_components; _i++) { 17186 const uint1_t src0 = 17187 _src[0][_i].b; 17188 17189 int32_t dst; 17190 17191 17192dst = -1; 17193for (int bit = bit_size - 1; bit >= 0; bit--) { 17194 if ((src0 >> bit) & 1) { 17195 dst = bit; 17196 break; 17197 } 17198} 17199 17200 17201 _dst_val[_i].i32 = dst; 17202 } 17203 17204 break; 17205 } 17206 case 8: { 17207 17208 17209 17210 17211 for (unsigned _i = 0; _i < num_components; _i++) { 17212 const uint8_t src0 = 17213 _src[0][_i].u8; 17214 17215 int32_t dst; 17216 17217 17218dst = -1; 17219for (int bit = bit_size - 1; bit >= 0; bit--) { 17220 if ((src0 >> bit) & 1) { 17221 dst = bit; 17222 break; 17223 } 17224} 17225 17226 17227 _dst_val[_i].i32 = dst; 17228 } 17229 17230 break; 17231 } 17232 case 16: { 17233 17234 17235 17236 17237 for (unsigned _i = 0; _i < num_components; _i++) { 17238 const uint16_t src0 = 17239 _src[0][_i].u16; 17240 17241 int32_t dst; 17242 17243 17244dst = -1; 17245for (int bit = bit_size - 1; bit >= 0; bit--) { 17246 if ((src0 >> bit) & 1) { 17247 dst = bit; 17248 break; 17249 } 17250} 17251 17252 17253 _dst_val[_i].i32 = dst; 17254 } 17255 17256 break; 17257 } 17258 case 32: { 17259 17260 17261 17262 17263 for (unsigned _i = 0; _i < num_components; _i++) { 17264 const uint32_t src0 = 17265 _src[0][_i].u32; 17266 17267 int32_t dst; 17268 17269 17270dst = -1; 17271for (int bit = bit_size - 1; bit >= 0; bit--) { 17272 if ((src0 >> bit) & 1) { 17273 dst = bit; 17274 break; 17275 } 17276} 17277 17278 17279 _dst_val[_i].i32 = dst; 17280 } 17281 17282 break; 17283 } 17284 case 64: { 17285 17286 17287 17288 17289 for (unsigned _i = 0; _i < num_components; _i++) { 17290 const uint64_t src0 = 17291 _src[0][_i].u64; 17292 17293 int32_t dst; 17294 17295 17296dst = -1; 17297for (int bit = bit_size - 1; bit >= 0; bit--) { 17298 if ((src0 >> bit) & 1) { 17299 dst = bit; 17300 break; 17301 } 17302} 17303 17304 17305 _dst_val[_i].i32 = dst; 17306 } 17307 17308 break; 17309 } 17310 17311 default: 17312 unreachable("unknown bit width"); 17313 } 17314} 17315static void 17316evaluate_uge(nir_const_value *_dst_val, 17317 MAYBE_UNUSED unsigned num_components, 17318 unsigned bit_size, 17319 MAYBE_UNUSED nir_const_value **_src) 17320{ 17321 switch (bit_size) { 17322 case 1: { 17323 17324 17325 17326 17327 for (unsigned _i = 0; _i < num_components; _i++) { 17328 const uint1_t src0 = 17329 _src[0][_i].b; 17330 const uint1_t src1 = 17331 _src[1][_i].b; 17332 17333 bool1_t dst = src0 >= src1; 17334 17335 _dst_val[_i].b = -(int)dst; 17336 } 17337 17338 break; 17339 } 17340 case 8: { 17341 17342 17343 17344 17345 for (unsigned _i = 0; _i < num_components; _i++) { 17346 const uint8_t src0 = 17347 _src[0][_i].u8; 17348 const uint8_t src1 = 17349 _src[1][_i].u8; 17350 17351 bool1_t dst = src0 >= src1; 17352 17353 _dst_val[_i].b = -(int)dst; 17354 } 17355 17356 break; 17357 } 17358 case 16: { 17359 17360 17361 17362 17363 for (unsigned _i = 0; _i < num_components; _i++) { 17364 const uint16_t src0 = 17365 _src[0][_i].u16; 17366 const uint16_t src1 = 17367 _src[1][_i].u16; 17368 17369 bool1_t dst = src0 >= src1; 17370 17371 _dst_val[_i].b = -(int)dst; 17372 } 17373 17374 break; 17375 } 17376 case 32: { 17377 17378 17379 17380 17381 for (unsigned _i = 0; _i < num_components; _i++) { 17382 const uint32_t src0 = 17383 _src[0][_i].u32; 17384 const uint32_t src1 = 17385 _src[1][_i].u32; 17386 17387 bool1_t dst = src0 >= src1; 17388 17389 _dst_val[_i].b = -(int)dst; 17390 } 17391 17392 break; 17393 } 17394 case 64: { 17395 17396 17397 17398 17399 for (unsigned _i = 0; _i < num_components; _i++) { 17400 const uint64_t src0 = 17401 _src[0][_i].u64; 17402 const uint64_t src1 = 17403 _src[1][_i].u64; 17404 17405 bool1_t dst = src0 >= src1; 17406 17407 _dst_val[_i].b = -(int)dst; 17408 } 17409 17410 break; 17411 } 17412 17413 default: 17414 unreachable("unknown bit width"); 17415 } 17416} 17417static void 17418evaluate_uge32(nir_const_value *_dst_val, 17419 MAYBE_UNUSED unsigned num_components, 17420 unsigned bit_size, 17421 MAYBE_UNUSED nir_const_value **_src) 17422{ 17423 switch (bit_size) { 17424 case 1: { 17425 17426 17427 17428 17429 for (unsigned _i = 0; _i < num_components; _i++) { 17430 const uint1_t src0 = 17431 _src[0][_i].b; 17432 const uint1_t src1 = 17433 _src[1][_i].b; 17434 17435 bool32_t dst = src0 >= src1; 17436 17437 _dst_val[_i].i32 = -(int)dst; 17438 } 17439 17440 break; 17441 } 17442 case 8: { 17443 17444 17445 17446 17447 for (unsigned _i = 0; _i < num_components; _i++) { 17448 const uint8_t src0 = 17449 _src[0][_i].u8; 17450 const uint8_t src1 = 17451 _src[1][_i].u8; 17452 17453 bool32_t dst = src0 >= src1; 17454 17455 _dst_val[_i].i32 = -(int)dst; 17456 } 17457 17458 break; 17459 } 17460 case 16: { 17461 17462 17463 17464 17465 for (unsigned _i = 0; _i < num_components; _i++) { 17466 const uint16_t src0 = 17467 _src[0][_i].u16; 17468 const uint16_t src1 = 17469 _src[1][_i].u16; 17470 17471 bool32_t dst = src0 >= src1; 17472 17473 _dst_val[_i].i32 = -(int)dst; 17474 } 17475 17476 break; 17477 } 17478 case 32: { 17479 17480 17481 17482 17483 for (unsigned _i = 0; _i < num_components; _i++) { 17484 const uint32_t src0 = 17485 _src[0][_i].u32; 17486 const uint32_t src1 = 17487 _src[1][_i].u32; 17488 17489 bool32_t dst = src0 >= src1; 17490 17491 _dst_val[_i].i32 = -(int)dst; 17492 } 17493 17494 break; 17495 } 17496 case 64: { 17497 17498 17499 17500 17501 for (unsigned _i = 0; _i < num_components; _i++) { 17502 const uint64_t src0 = 17503 _src[0][_i].u64; 17504 const uint64_t src1 = 17505 _src[1][_i].u64; 17506 17507 bool32_t dst = src0 >= src1; 17508 17509 _dst_val[_i].i32 = -(int)dst; 17510 } 17511 17512 break; 17513 } 17514 17515 default: 17516 unreachable("unknown bit width"); 17517 } 17518} 17519static void 17520evaluate_uhadd(nir_const_value *_dst_val, 17521 MAYBE_UNUSED unsigned num_components, 17522 unsigned bit_size, 17523 MAYBE_UNUSED nir_const_value **_src) 17524{ 17525 switch (bit_size) { 17526 case 1: { 17527 17528 17529 17530 17531 for (unsigned _i = 0; _i < num_components; _i++) { 17532 const uint1_t src0 = 17533 _src[0][_i].b; 17534 const uint1_t src1 = 17535 _src[1][_i].b; 17536 17537 uint1_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 17538 17539 /* 1-bit integers get truncated */ 17540 _dst_val[_i].b = dst & 1; 17541 } 17542 17543 break; 17544 } 17545 case 8: { 17546 17547 17548 17549 17550 for (unsigned _i = 0; _i < num_components; _i++) { 17551 const uint8_t src0 = 17552 _src[0][_i].u8; 17553 const uint8_t src1 = 17554 _src[1][_i].u8; 17555 17556 uint8_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 17557 17558 _dst_val[_i].u8 = dst; 17559 } 17560 17561 break; 17562 } 17563 case 16: { 17564 17565 17566 17567 17568 for (unsigned _i = 0; _i < num_components; _i++) { 17569 const uint16_t src0 = 17570 _src[0][_i].u16; 17571 const uint16_t src1 = 17572 _src[1][_i].u16; 17573 17574 uint16_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 17575 17576 _dst_val[_i].u16 = dst; 17577 } 17578 17579 break; 17580 } 17581 case 32: { 17582 17583 17584 17585 17586 for (unsigned _i = 0; _i < num_components; _i++) { 17587 const uint32_t src0 = 17588 _src[0][_i].u32; 17589 const uint32_t src1 = 17590 _src[1][_i].u32; 17591 17592 uint32_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 17593 17594 _dst_val[_i].u32 = dst; 17595 } 17596 17597 break; 17598 } 17599 case 64: { 17600 17601 17602 17603 17604 for (unsigned _i = 0; _i < num_components; _i++) { 17605 const uint64_t src0 = 17606 _src[0][_i].u64; 17607 const uint64_t src1 = 17608 _src[1][_i].u64; 17609 17610 uint64_t dst = (src0 & src1) + ((src0 ^ src1) >> 1); 17611 17612 _dst_val[_i].u64 = dst; 17613 } 17614 17615 break; 17616 } 17617 17618 default: 17619 unreachable("unknown bit width"); 17620 } 17621} 17622static void 17623evaluate_ult(nir_const_value *_dst_val, 17624 MAYBE_UNUSED unsigned num_components, 17625 unsigned bit_size, 17626 MAYBE_UNUSED nir_const_value **_src) 17627{ 17628 switch (bit_size) { 17629 case 1: { 17630 17631 17632 17633 17634 for (unsigned _i = 0; _i < num_components; _i++) { 17635 const uint1_t src0 = 17636 _src[0][_i].b; 17637 const uint1_t src1 = 17638 _src[1][_i].b; 17639 17640 bool1_t dst = src0 < src1; 17641 17642 _dst_val[_i].b = -(int)dst; 17643 } 17644 17645 break; 17646 } 17647 case 8: { 17648 17649 17650 17651 17652 for (unsigned _i = 0; _i < num_components; _i++) { 17653 const uint8_t src0 = 17654 _src[0][_i].u8; 17655 const uint8_t src1 = 17656 _src[1][_i].u8; 17657 17658 bool1_t dst = src0 < src1; 17659 17660 _dst_val[_i].b = -(int)dst; 17661 } 17662 17663 break; 17664 } 17665 case 16: { 17666 17667 17668 17669 17670 for (unsigned _i = 0; _i < num_components; _i++) { 17671 const uint16_t src0 = 17672 _src[0][_i].u16; 17673 const uint16_t src1 = 17674 _src[1][_i].u16; 17675 17676 bool1_t dst = src0 < src1; 17677 17678 _dst_val[_i].b = -(int)dst; 17679 } 17680 17681 break; 17682 } 17683 case 32: { 17684 17685 17686 17687 17688 for (unsigned _i = 0; _i < num_components; _i++) { 17689 const uint32_t src0 = 17690 _src[0][_i].u32; 17691 const uint32_t src1 = 17692 _src[1][_i].u32; 17693 17694 bool1_t dst = src0 < src1; 17695 17696 _dst_val[_i].b = -(int)dst; 17697 } 17698 17699 break; 17700 } 17701 case 64: { 17702 17703 17704 17705 17706 for (unsigned _i = 0; _i < num_components; _i++) { 17707 const uint64_t src0 = 17708 _src[0][_i].u64; 17709 const uint64_t src1 = 17710 _src[1][_i].u64; 17711 17712 bool1_t dst = src0 < src1; 17713 17714 _dst_val[_i].b = -(int)dst; 17715 } 17716 17717 break; 17718 } 17719 17720 default: 17721 unreachable("unknown bit width"); 17722 } 17723} 17724static void 17725evaluate_ult32(nir_const_value *_dst_val, 17726 MAYBE_UNUSED unsigned num_components, 17727 unsigned bit_size, 17728 MAYBE_UNUSED nir_const_value **_src) 17729{ 17730 switch (bit_size) { 17731 case 1: { 17732 17733 17734 17735 17736 for (unsigned _i = 0; _i < num_components; _i++) { 17737 const uint1_t src0 = 17738 _src[0][_i].b; 17739 const uint1_t src1 = 17740 _src[1][_i].b; 17741 17742 bool32_t dst = src0 < src1; 17743 17744 _dst_val[_i].i32 = -(int)dst; 17745 } 17746 17747 break; 17748 } 17749 case 8: { 17750 17751 17752 17753 17754 for (unsigned _i = 0; _i < num_components; _i++) { 17755 const uint8_t src0 = 17756 _src[0][_i].u8; 17757 const uint8_t src1 = 17758 _src[1][_i].u8; 17759 17760 bool32_t dst = src0 < src1; 17761 17762 _dst_val[_i].i32 = -(int)dst; 17763 } 17764 17765 break; 17766 } 17767 case 16: { 17768 17769 17770 17771 17772 for (unsigned _i = 0; _i < num_components; _i++) { 17773 const uint16_t src0 = 17774 _src[0][_i].u16; 17775 const uint16_t src1 = 17776 _src[1][_i].u16; 17777 17778 bool32_t dst = src0 < src1; 17779 17780 _dst_val[_i].i32 = -(int)dst; 17781 } 17782 17783 break; 17784 } 17785 case 32: { 17786 17787 17788 17789 17790 for (unsigned _i = 0; _i < num_components; _i++) { 17791 const uint32_t src0 = 17792 _src[0][_i].u32; 17793 const uint32_t src1 = 17794 _src[1][_i].u32; 17795 17796 bool32_t dst = src0 < src1; 17797 17798 _dst_val[_i].i32 = -(int)dst; 17799 } 17800 17801 break; 17802 } 17803 case 64: { 17804 17805 17806 17807 17808 for (unsigned _i = 0; _i < num_components; _i++) { 17809 const uint64_t src0 = 17810 _src[0][_i].u64; 17811 const uint64_t src1 = 17812 _src[1][_i].u64; 17813 17814 bool32_t dst = src0 < src1; 17815 17816 _dst_val[_i].i32 = -(int)dst; 17817 } 17818 17819 break; 17820 } 17821 17822 default: 17823 unreachable("unknown bit width"); 17824 } 17825} 17826static void 17827evaluate_umax(nir_const_value *_dst_val, 17828 MAYBE_UNUSED unsigned num_components, 17829 unsigned bit_size, 17830 MAYBE_UNUSED nir_const_value **_src) 17831{ 17832 switch (bit_size) { 17833 case 1: { 17834 17835 17836 17837 17838 for (unsigned _i = 0; _i < num_components; _i++) { 17839 const uint1_t src0 = 17840 _src[0][_i].b; 17841 const uint1_t src1 = 17842 _src[1][_i].b; 17843 17844 uint1_t dst = src1 > src0 ? src1 : src0; 17845 17846 /* 1-bit integers get truncated */ 17847 _dst_val[_i].b = dst & 1; 17848 } 17849 17850 break; 17851 } 17852 case 8: { 17853 17854 17855 17856 17857 for (unsigned _i = 0; _i < num_components; _i++) { 17858 const uint8_t src0 = 17859 _src[0][_i].u8; 17860 const uint8_t src1 = 17861 _src[1][_i].u8; 17862 17863 uint8_t dst = src1 > src0 ? src1 : src0; 17864 17865 _dst_val[_i].u8 = dst; 17866 } 17867 17868 break; 17869 } 17870 case 16: { 17871 17872 17873 17874 17875 for (unsigned _i = 0; _i < num_components; _i++) { 17876 const uint16_t src0 = 17877 _src[0][_i].u16; 17878 const uint16_t src1 = 17879 _src[1][_i].u16; 17880 17881 uint16_t dst = src1 > src0 ? src1 : src0; 17882 17883 _dst_val[_i].u16 = dst; 17884 } 17885 17886 break; 17887 } 17888 case 32: { 17889 17890 17891 17892 17893 for (unsigned _i = 0; _i < num_components; _i++) { 17894 const uint32_t src0 = 17895 _src[0][_i].u32; 17896 const uint32_t src1 = 17897 _src[1][_i].u32; 17898 17899 uint32_t dst = src1 > src0 ? src1 : src0; 17900 17901 _dst_val[_i].u32 = dst; 17902 } 17903 17904 break; 17905 } 17906 case 64: { 17907 17908 17909 17910 17911 for (unsigned _i = 0; _i < num_components; _i++) { 17912 const uint64_t src0 = 17913 _src[0][_i].u64; 17914 const uint64_t src1 = 17915 _src[1][_i].u64; 17916 17917 uint64_t dst = src1 > src0 ? src1 : src0; 17918 17919 _dst_val[_i].u64 = dst; 17920 } 17921 17922 break; 17923 } 17924 17925 default: 17926 unreachable("unknown bit width"); 17927 } 17928} 17929static void 17930evaluate_umax3(nir_const_value *_dst_val, 17931 MAYBE_UNUSED unsigned num_components, 17932 unsigned bit_size, 17933 MAYBE_UNUSED nir_const_value **_src) 17934{ 17935 switch (bit_size) { 17936 case 1: { 17937 17938 17939 17940 17941 for (unsigned _i = 0; _i < num_components; _i++) { 17942 const uint1_t src0 = 17943 _src[0][_i].b; 17944 const uint1_t src1 = 17945 _src[1][_i].b; 17946 const uint1_t src2 = 17947 _src[2][_i].b; 17948 17949 uint1_t dst = MAX2(src0, MAX2(src1, src2)); 17950 17951 /* 1-bit integers get truncated */ 17952 _dst_val[_i].b = dst & 1; 17953 } 17954 17955 break; 17956 } 17957 case 8: { 17958 17959 17960 17961 17962 for (unsigned _i = 0; _i < num_components; _i++) { 17963 const uint8_t src0 = 17964 _src[0][_i].u8; 17965 const uint8_t src1 = 17966 _src[1][_i].u8; 17967 const uint8_t src2 = 17968 _src[2][_i].u8; 17969 17970 uint8_t dst = MAX2(src0, MAX2(src1, src2)); 17971 17972 _dst_val[_i].u8 = dst; 17973 } 17974 17975 break; 17976 } 17977 case 16: { 17978 17979 17980 17981 17982 for (unsigned _i = 0; _i < num_components; _i++) { 17983 const uint16_t src0 = 17984 _src[0][_i].u16; 17985 const uint16_t src1 = 17986 _src[1][_i].u16; 17987 const uint16_t src2 = 17988 _src[2][_i].u16; 17989 17990 uint16_t dst = MAX2(src0, MAX2(src1, src2)); 17991 17992 _dst_val[_i].u16 = dst; 17993 } 17994 17995 break; 17996 } 17997 case 32: { 17998 17999 18000 18001 18002 for (unsigned _i = 0; _i < num_components; _i++) { 18003 const uint32_t src0 = 18004 _src[0][_i].u32; 18005 const uint32_t src1 = 18006 _src[1][_i].u32; 18007 const uint32_t src2 = 18008 _src[2][_i].u32; 18009 18010 uint32_t dst = MAX2(src0, MAX2(src1, src2)); 18011 18012 _dst_val[_i].u32 = dst; 18013 } 18014 18015 break; 18016 } 18017 case 64: { 18018 18019 18020 18021 18022 for (unsigned _i = 0; _i < num_components; _i++) { 18023 const uint64_t src0 = 18024 _src[0][_i].u64; 18025 const uint64_t src1 = 18026 _src[1][_i].u64; 18027 const uint64_t src2 = 18028 _src[2][_i].u64; 18029 18030 uint64_t dst = MAX2(src0, MAX2(src1, src2)); 18031 18032 _dst_val[_i].u64 = dst; 18033 } 18034 18035 break; 18036 } 18037 18038 default: 18039 unreachable("unknown bit width"); 18040 } 18041} 18042static void 18043evaluate_umax_4x8(nir_const_value *_dst_val, 18044 MAYBE_UNUSED unsigned num_components, 18045 UNUSED unsigned bit_size, 18046 MAYBE_UNUSED nir_const_value **_src) 18047{ 18048 18049 18050 18051 18052 for (unsigned _i = 0; _i < num_components; _i++) { 18053 const int32_t src0 = 18054 _src[0][_i].i32; 18055 const int32_t src1 = 18056 _src[1][_i].i32; 18057 18058 int32_t dst; 18059 18060 18061dst = 0; 18062for (int i = 0; i < 32; i += 8) { 18063 dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; 18064} 18065 18066 18067 _dst_val[_i].i32 = dst; 18068 } 18069 18070} 18071static void 18072evaluate_umed3(nir_const_value *_dst_val, 18073 MAYBE_UNUSED unsigned num_components, 18074 unsigned bit_size, 18075 MAYBE_UNUSED nir_const_value **_src) 18076{ 18077 switch (bit_size) { 18078 case 1: { 18079 18080 18081 18082 18083 for (unsigned _i = 0; _i < num_components; _i++) { 18084 const uint1_t src0 = 18085 _src[0][_i].b; 18086 const uint1_t src1 = 18087 _src[1][_i].b; 18088 const uint1_t src2 = 18089 _src[2][_i].b; 18090 18091 uint1_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 18092 18093 /* 1-bit integers get truncated */ 18094 _dst_val[_i].b = dst & 1; 18095 } 18096 18097 break; 18098 } 18099 case 8: { 18100 18101 18102 18103 18104 for (unsigned _i = 0; _i < num_components; _i++) { 18105 const uint8_t src0 = 18106 _src[0][_i].u8; 18107 const uint8_t src1 = 18108 _src[1][_i].u8; 18109 const uint8_t src2 = 18110 _src[2][_i].u8; 18111 18112 uint8_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 18113 18114 _dst_val[_i].u8 = dst; 18115 } 18116 18117 break; 18118 } 18119 case 16: { 18120 18121 18122 18123 18124 for (unsigned _i = 0; _i < num_components; _i++) { 18125 const uint16_t src0 = 18126 _src[0][_i].u16; 18127 const uint16_t src1 = 18128 _src[1][_i].u16; 18129 const uint16_t src2 = 18130 _src[2][_i].u16; 18131 18132 uint16_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 18133 18134 _dst_val[_i].u16 = dst; 18135 } 18136 18137 break; 18138 } 18139 case 32: { 18140 18141 18142 18143 18144 for (unsigned _i = 0; _i < num_components; _i++) { 18145 const uint32_t src0 = 18146 _src[0][_i].u32; 18147 const uint32_t src1 = 18148 _src[1][_i].u32; 18149 const uint32_t src2 = 18150 _src[2][_i].u32; 18151 18152 uint32_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 18153 18154 _dst_val[_i].u32 = dst; 18155 } 18156 18157 break; 18158 } 18159 case 64: { 18160 18161 18162 18163 18164 for (unsigned _i = 0; _i < num_components; _i++) { 18165 const uint64_t src0 = 18166 _src[0][_i].u64; 18167 const uint64_t src1 = 18168 _src[1][_i].u64; 18169 const uint64_t src2 = 18170 _src[2][_i].u64; 18171 18172 uint64_t dst = MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1)); 18173 18174 _dst_val[_i].u64 = dst; 18175 } 18176 18177 break; 18178 } 18179 18180 default: 18181 unreachable("unknown bit width"); 18182 } 18183} 18184static void 18185evaluate_umin(nir_const_value *_dst_val, 18186 MAYBE_UNUSED unsigned num_components, 18187 unsigned bit_size, 18188 MAYBE_UNUSED nir_const_value **_src) 18189{ 18190 switch (bit_size) { 18191 case 1: { 18192 18193 18194 18195 18196 for (unsigned _i = 0; _i < num_components; _i++) { 18197 const uint1_t src0 = 18198 _src[0][_i].b; 18199 const uint1_t src1 = 18200 _src[1][_i].b; 18201 18202 uint1_t dst = src1 > src0 ? src0 : src1; 18203 18204 /* 1-bit integers get truncated */ 18205 _dst_val[_i].b = dst & 1; 18206 } 18207 18208 break; 18209 } 18210 case 8: { 18211 18212 18213 18214 18215 for (unsigned _i = 0; _i < num_components; _i++) { 18216 const uint8_t src0 = 18217 _src[0][_i].u8; 18218 const uint8_t src1 = 18219 _src[1][_i].u8; 18220 18221 uint8_t dst = src1 > src0 ? src0 : src1; 18222 18223 _dst_val[_i].u8 = dst; 18224 } 18225 18226 break; 18227 } 18228 case 16: { 18229 18230 18231 18232 18233 for (unsigned _i = 0; _i < num_components; _i++) { 18234 const uint16_t src0 = 18235 _src[0][_i].u16; 18236 const uint16_t src1 = 18237 _src[1][_i].u16; 18238 18239 uint16_t dst = src1 > src0 ? src0 : src1; 18240 18241 _dst_val[_i].u16 = dst; 18242 } 18243 18244 break; 18245 } 18246 case 32: { 18247 18248 18249 18250 18251 for (unsigned _i = 0; _i < num_components; _i++) { 18252 const uint32_t src0 = 18253 _src[0][_i].u32; 18254 const uint32_t src1 = 18255 _src[1][_i].u32; 18256 18257 uint32_t dst = src1 > src0 ? src0 : src1; 18258 18259 _dst_val[_i].u32 = dst; 18260 } 18261 18262 break; 18263 } 18264 case 64: { 18265 18266 18267 18268 18269 for (unsigned _i = 0; _i < num_components; _i++) { 18270 const uint64_t src0 = 18271 _src[0][_i].u64; 18272 const uint64_t src1 = 18273 _src[1][_i].u64; 18274 18275 uint64_t dst = src1 > src0 ? src0 : src1; 18276 18277 _dst_val[_i].u64 = dst; 18278 } 18279 18280 break; 18281 } 18282 18283 default: 18284 unreachable("unknown bit width"); 18285 } 18286} 18287static void 18288evaluate_umin3(nir_const_value *_dst_val, 18289 MAYBE_UNUSED unsigned num_components, 18290 unsigned bit_size, 18291 MAYBE_UNUSED nir_const_value **_src) 18292{ 18293 switch (bit_size) { 18294 case 1: { 18295 18296 18297 18298 18299 for (unsigned _i = 0; _i < num_components; _i++) { 18300 const uint1_t src0 = 18301 _src[0][_i].b; 18302 const uint1_t src1 = 18303 _src[1][_i].b; 18304 const uint1_t src2 = 18305 _src[2][_i].b; 18306 18307 uint1_t dst = MIN2(src0, MIN2(src1, src2)); 18308 18309 /* 1-bit integers get truncated */ 18310 _dst_val[_i].b = dst & 1; 18311 } 18312 18313 break; 18314 } 18315 case 8: { 18316 18317 18318 18319 18320 for (unsigned _i = 0; _i < num_components; _i++) { 18321 const uint8_t src0 = 18322 _src[0][_i].u8; 18323 const uint8_t src1 = 18324 _src[1][_i].u8; 18325 const uint8_t src2 = 18326 _src[2][_i].u8; 18327 18328 uint8_t dst = MIN2(src0, MIN2(src1, src2)); 18329 18330 _dst_val[_i].u8 = dst; 18331 } 18332 18333 break; 18334 } 18335 case 16: { 18336 18337 18338 18339 18340 for (unsigned _i = 0; _i < num_components; _i++) { 18341 const uint16_t src0 = 18342 _src[0][_i].u16; 18343 const uint16_t src1 = 18344 _src[1][_i].u16; 18345 const uint16_t src2 = 18346 _src[2][_i].u16; 18347 18348 uint16_t dst = MIN2(src0, MIN2(src1, src2)); 18349 18350 _dst_val[_i].u16 = dst; 18351 } 18352 18353 break; 18354 } 18355 case 32: { 18356 18357 18358 18359 18360 for (unsigned _i = 0; _i < num_components; _i++) { 18361 const uint32_t src0 = 18362 _src[0][_i].u32; 18363 const uint32_t src1 = 18364 _src[1][_i].u32; 18365 const uint32_t src2 = 18366 _src[2][_i].u32; 18367 18368 uint32_t dst = MIN2(src0, MIN2(src1, src2)); 18369 18370 _dst_val[_i].u32 = dst; 18371 } 18372 18373 break; 18374 } 18375 case 64: { 18376 18377 18378 18379 18380 for (unsigned _i = 0; _i < num_components; _i++) { 18381 const uint64_t src0 = 18382 _src[0][_i].u64; 18383 const uint64_t src1 = 18384 _src[1][_i].u64; 18385 const uint64_t src2 = 18386 _src[2][_i].u64; 18387 18388 uint64_t dst = MIN2(src0, MIN2(src1, src2)); 18389 18390 _dst_val[_i].u64 = dst; 18391 } 18392 18393 break; 18394 } 18395 18396 default: 18397 unreachable("unknown bit width"); 18398 } 18399} 18400static void 18401evaluate_umin_4x8(nir_const_value *_dst_val, 18402 MAYBE_UNUSED unsigned num_components, 18403 UNUSED unsigned bit_size, 18404 MAYBE_UNUSED nir_const_value **_src) 18405{ 18406 18407 18408 18409 18410 for (unsigned _i = 0; _i < num_components; _i++) { 18411 const int32_t src0 = 18412 _src[0][_i].i32; 18413 const int32_t src1 = 18414 _src[1][_i].i32; 18415 18416 int32_t dst; 18417 18418 18419dst = 0; 18420for (int i = 0; i < 32; i += 8) { 18421 dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; 18422} 18423 18424 18425 _dst_val[_i].i32 = dst; 18426 } 18427 18428} 18429static void 18430evaluate_umod(nir_const_value *_dst_val, 18431 MAYBE_UNUSED unsigned num_components, 18432 unsigned bit_size, 18433 MAYBE_UNUSED nir_const_value **_src) 18434{ 18435 switch (bit_size) { 18436 case 1: { 18437 18438 18439 18440 18441 for (unsigned _i = 0; _i < num_components; _i++) { 18442 const uint1_t src0 = 18443 _src[0][_i].b; 18444 const uint1_t src1 = 18445 _src[1][_i].b; 18446 18447 uint1_t dst = src1 == 0 ? 0 : src0 % src1; 18448 18449 /* 1-bit integers get truncated */ 18450 _dst_val[_i].b = dst & 1; 18451 } 18452 18453 break; 18454 } 18455 case 8: { 18456 18457 18458 18459 18460 for (unsigned _i = 0; _i < num_components; _i++) { 18461 const uint8_t src0 = 18462 _src[0][_i].u8; 18463 const uint8_t src1 = 18464 _src[1][_i].u8; 18465 18466 uint8_t dst = src1 == 0 ? 0 : src0 % src1; 18467 18468 _dst_val[_i].u8 = dst; 18469 } 18470 18471 break; 18472 } 18473 case 16: { 18474 18475 18476 18477 18478 for (unsigned _i = 0; _i < num_components; _i++) { 18479 const uint16_t src0 = 18480 _src[0][_i].u16; 18481 const uint16_t src1 = 18482 _src[1][_i].u16; 18483 18484 uint16_t dst = src1 == 0 ? 0 : src0 % src1; 18485 18486 _dst_val[_i].u16 = dst; 18487 } 18488 18489 break; 18490 } 18491 case 32: { 18492 18493 18494 18495 18496 for (unsigned _i = 0; _i < num_components; _i++) { 18497 const uint32_t src0 = 18498 _src[0][_i].u32; 18499 const uint32_t src1 = 18500 _src[1][_i].u32; 18501 18502 uint32_t dst = src1 == 0 ? 0 : src0 % src1; 18503 18504 _dst_val[_i].u32 = dst; 18505 } 18506 18507 break; 18508 } 18509 case 64: { 18510 18511 18512 18513 18514 for (unsigned _i = 0; _i < num_components; _i++) { 18515 const uint64_t src0 = 18516 _src[0][_i].u64; 18517 const uint64_t src1 = 18518 _src[1][_i].u64; 18519 18520 uint64_t dst = src1 == 0 ? 0 : src0 % src1; 18521 18522 _dst_val[_i].u64 = dst; 18523 } 18524 18525 break; 18526 } 18527 18528 default: 18529 unreachable("unknown bit width"); 18530 } 18531} 18532static void 18533evaluate_umul_2x32_64(nir_const_value *_dst_val, 18534 MAYBE_UNUSED unsigned num_components, 18535 UNUSED unsigned bit_size, 18536 MAYBE_UNUSED nir_const_value **_src) 18537{ 18538 18539 18540 18541 18542 for (unsigned _i = 0; _i < num_components; _i++) { 18543 const uint32_t src0 = 18544 _src[0][_i].u32; 18545 const uint32_t src1 = 18546 _src[1][_i].u32; 18547 18548 uint64_t dst = (uint64_t)src0 * (uint64_t)src1; 18549 18550 _dst_val[_i].u64 = dst; 18551 } 18552 18553} 18554static void 18555evaluate_umul_high(nir_const_value *_dst_val, 18556 MAYBE_UNUSED unsigned num_components, 18557 unsigned bit_size, 18558 MAYBE_UNUSED nir_const_value **_src) 18559{ 18560 switch (bit_size) { 18561 case 1: { 18562 18563 18564 18565 18566 for (unsigned _i = 0; _i < num_components; _i++) { 18567 const uint1_t src0 = 18568 _src[0][_i].b; 18569 const uint1_t src1 = 18570 _src[1][_i].b; 18571 18572 uint1_t dst; 18573 18574 18575if (bit_size == 64) { 18576 /* The casts are kind-of annoying but needed to prevent compiler warnings. */ 18577 uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; 18578 uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; 18579 uint32_t prod_u32[4]; 18580 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 18581 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 18582} else { 18583 dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; 18584} 18585 18586 18587 /* 1-bit integers get truncated */ 18588 _dst_val[_i].b = dst & 1; 18589 } 18590 18591 break; 18592 } 18593 case 8: { 18594 18595 18596 18597 18598 for (unsigned _i = 0; _i < num_components; _i++) { 18599 const uint8_t src0 = 18600 _src[0][_i].u8; 18601 const uint8_t src1 = 18602 _src[1][_i].u8; 18603 18604 uint8_t dst; 18605 18606 18607if (bit_size == 64) { 18608 /* The casts are kind-of annoying but needed to prevent compiler warnings. */ 18609 uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; 18610 uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; 18611 uint32_t prod_u32[4]; 18612 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 18613 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 18614} else { 18615 dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; 18616} 18617 18618 18619 _dst_val[_i].u8 = dst; 18620 } 18621 18622 break; 18623 } 18624 case 16: { 18625 18626 18627 18628 18629 for (unsigned _i = 0; _i < num_components; _i++) { 18630 const uint16_t src0 = 18631 _src[0][_i].u16; 18632 const uint16_t src1 = 18633 _src[1][_i].u16; 18634 18635 uint16_t dst; 18636 18637 18638if (bit_size == 64) { 18639 /* The casts are kind-of annoying but needed to prevent compiler warnings. */ 18640 uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; 18641 uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; 18642 uint32_t prod_u32[4]; 18643 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 18644 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 18645} else { 18646 dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; 18647} 18648 18649 18650 _dst_val[_i].u16 = dst; 18651 } 18652 18653 break; 18654 } 18655 case 32: { 18656 18657 18658 18659 18660 for (unsigned _i = 0; _i < num_components; _i++) { 18661 const uint32_t src0 = 18662 _src[0][_i].u32; 18663 const uint32_t src1 = 18664 _src[1][_i].u32; 18665 18666 uint32_t dst; 18667 18668 18669if (bit_size == 64) { 18670 /* The casts are kind-of annoying but needed to prevent compiler warnings. */ 18671 uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; 18672 uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; 18673 uint32_t prod_u32[4]; 18674 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 18675 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 18676} else { 18677 dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; 18678} 18679 18680 18681 _dst_val[_i].u32 = dst; 18682 } 18683 18684 break; 18685 } 18686 case 64: { 18687 18688 18689 18690 18691 for (unsigned _i = 0; _i < num_components; _i++) { 18692 const uint64_t src0 = 18693 _src[0][_i].u64; 18694 const uint64_t src1 = 18695 _src[1][_i].u64; 18696 18697 uint64_t dst; 18698 18699 18700if (bit_size == 64) { 18701 /* The casts are kind-of annoying but needed to prevent compiler warnings. */ 18702 uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 }; 18703 uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 }; 18704 uint32_t prod_u32[4]; 18705 ubm_mul_u32arr(prod_u32, src0_u32, src1_u32); 18706 dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32); 18707} else { 18708 dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size; 18709} 18710 18711 18712 _dst_val[_i].u64 = dst; 18713 } 18714 18715 break; 18716 } 18717 18718 default: 18719 unreachable("unknown bit width"); 18720 } 18721} 18722static void 18723evaluate_umul_unorm_4x8(nir_const_value *_dst_val, 18724 MAYBE_UNUSED unsigned num_components, 18725 UNUSED unsigned bit_size, 18726 MAYBE_UNUSED nir_const_value **_src) 18727{ 18728 18729 18730 18731 18732 for (unsigned _i = 0; _i < num_components; _i++) { 18733 const int32_t src0 = 18734 _src[0][_i].i32; 18735 const int32_t src1 = 18736 _src[1][_i].i32; 18737 18738 int32_t dst; 18739 18740 18741dst = 0; 18742for (int i = 0; i < 32; i += 8) { 18743 int src0_chan = (src0 >> i) & 0xff; 18744 int src1_chan = (src1 >> i) & 0xff; 18745 dst |= ((src0_chan * src1_chan) / 255) << i; 18746} 18747 18748 18749 _dst_val[_i].i32 = dst; 18750 } 18751 18752} 18753static void 18754evaluate_unpack_32_2x16(nir_const_value *_dst_val, 18755 MAYBE_UNUSED unsigned num_components, 18756 UNUSED unsigned bit_size, 18757 MAYBE_UNUSED nir_const_value **_src) 18758{ 18759 18760 18761 18762 18763 const struct uint32_vec src0 = { 18764 _src[0][0].u32, 18765 0, 18766 0, 18767 0, 18768 }; 18769 18770 struct uint16_vec dst; 18771 18772 dst.x = src0.x; dst.y = src0.x >> 16; 18773 18774 _dst_val[0].u16 = dst.x; 18775 _dst_val[1].u16 = dst.y; 18776 18777} 18778static void 18779evaluate_unpack_32_2x16_split_x(nir_const_value *_dst_val, 18780 MAYBE_UNUSED unsigned num_components, 18781 UNUSED unsigned bit_size, 18782 MAYBE_UNUSED nir_const_value **_src) 18783{ 18784 18785 18786 18787 18788 for (unsigned _i = 0; _i < num_components; _i++) { 18789 const uint32_t src0 = 18790 _src[0][_i].u32; 18791 18792 uint16_t dst = src0; 18793 18794 _dst_val[_i].u16 = dst; 18795 } 18796 18797} 18798static void 18799evaluate_unpack_32_2x16_split_y(nir_const_value *_dst_val, 18800 MAYBE_UNUSED unsigned num_components, 18801 UNUSED unsigned bit_size, 18802 MAYBE_UNUSED nir_const_value **_src) 18803{ 18804 18805 18806 18807 18808 for (unsigned _i = 0; _i < num_components; _i++) { 18809 const uint32_t src0 = 18810 _src[0][_i].u32; 18811 18812 uint16_t dst = src0 >> 16; 18813 18814 _dst_val[_i].u16 = dst; 18815 } 18816 18817} 18818static void 18819evaluate_unpack_64_2x32(nir_const_value *_dst_val, 18820 MAYBE_UNUSED unsigned num_components, 18821 UNUSED unsigned bit_size, 18822 MAYBE_UNUSED nir_const_value **_src) 18823{ 18824 18825 18826 18827 18828 const struct uint64_vec src0 = { 18829 _src[0][0].u64, 18830 0, 18831 0, 18832 0, 18833 }; 18834 18835 struct uint32_vec dst; 18836 18837 dst.x = src0.x; dst.y = src0.x >> 32; 18838 18839 _dst_val[0].u32 = dst.x; 18840 _dst_val[1].u32 = dst.y; 18841 18842} 18843static void 18844evaluate_unpack_64_2x32_split_x(nir_const_value *_dst_val, 18845 MAYBE_UNUSED unsigned num_components, 18846 UNUSED unsigned bit_size, 18847 MAYBE_UNUSED nir_const_value **_src) 18848{ 18849 18850 18851 18852 18853 for (unsigned _i = 0; _i < num_components; _i++) { 18854 const uint64_t src0 = 18855 _src[0][_i].u64; 18856 18857 uint32_t dst = src0; 18858 18859 _dst_val[_i].u32 = dst; 18860 } 18861 18862} 18863static void 18864evaluate_unpack_64_2x32_split_y(nir_const_value *_dst_val, 18865 MAYBE_UNUSED unsigned num_components, 18866 UNUSED unsigned bit_size, 18867 MAYBE_UNUSED nir_const_value **_src) 18868{ 18869 18870 18871 18872 18873 for (unsigned _i = 0; _i < num_components; _i++) { 18874 const uint64_t src0 = 18875 _src[0][_i].u64; 18876 18877 uint32_t dst = src0 >> 32; 18878 18879 _dst_val[_i].u32 = dst; 18880 } 18881 18882} 18883static void 18884evaluate_unpack_64_4x16(nir_const_value *_dst_val, 18885 MAYBE_UNUSED unsigned num_components, 18886 UNUSED unsigned bit_size, 18887 MAYBE_UNUSED nir_const_value **_src) 18888{ 18889 18890 18891 18892 18893 const struct uint64_vec src0 = { 18894 _src[0][0].u64, 18895 0, 18896 0, 18897 0, 18898 }; 18899 18900 struct uint16_vec dst; 18901 18902 dst.x = src0.x; dst.y = src0.x >> 16; dst.z = src0.x >> 32; dst.w = src0.w >> 48; 18903 18904 _dst_val[0].u16 = dst.x; 18905 _dst_val[1].u16 = dst.y; 18906 _dst_val[2].u16 = dst.z; 18907 _dst_val[3].u16 = dst.w; 18908 18909} 18910static void 18911evaluate_unpack_half_2x16(nir_const_value *_dst_val, 18912 MAYBE_UNUSED unsigned num_components, 18913 UNUSED unsigned bit_size, 18914 MAYBE_UNUSED nir_const_value **_src) 18915{ 18916 18917 18918 18919 18920 const struct uint32_vec src0 = { 18921 _src[0][0].u32, 18922 0, 18923 0, 18924 0, 18925 }; 18926 18927 struct float32_vec dst; 18928 18929 18930dst.x = unpack_half_1x16((uint16_t)(src0.x & 0xffff)); 18931dst.y = unpack_half_1x16((uint16_t)(src0.x << 16)); 18932 18933 18934 _dst_val[0].f32 = dst.x; 18935 _dst_val[1].f32 = dst.y; 18936 18937} 18938static void 18939evaluate_unpack_half_2x16_split_x(nir_const_value *_dst_val, 18940 MAYBE_UNUSED unsigned num_components, 18941 UNUSED unsigned bit_size, 18942 MAYBE_UNUSED nir_const_value **_src) 18943{ 18944 18945 18946 18947 18948 for (unsigned _i = 0; _i < num_components; _i++) { 18949 const uint32_t src0 = 18950 _src[0][_i].u32; 18951 18952 float32_t dst = unpack_half_1x16((uint16_t)(src0 & 0xffff)); 18953 18954 _dst_val[_i].f32 = dst; 18955 } 18956 18957} 18958static void 18959evaluate_unpack_half_2x16_split_y(nir_const_value *_dst_val, 18960 MAYBE_UNUSED unsigned num_components, 18961 UNUSED unsigned bit_size, 18962 MAYBE_UNUSED nir_const_value **_src) 18963{ 18964 18965 18966 18967 18968 for (unsigned _i = 0; _i < num_components; _i++) { 18969 const uint32_t src0 = 18970 _src[0][_i].u32; 18971 18972 float32_t dst = unpack_half_1x16((uint16_t)(src0 >> 16)); 18973 18974 _dst_val[_i].f32 = dst; 18975 } 18976 18977} 18978static void 18979evaluate_unpack_snorm_2x16(nir_const_value *_dst_val, 18980 MAYBE_UNUSED unsigned num_components, 18981 UNUSED unsigned bit_size, 18982 MAYBE_UNUSED nir_const_value **_src) 18983{ 18984 18985 18986 18987 18988 const struct uint32_vec src0 = { 18989 _src[0][0].u32, 18990 0, 18991 0, 18992 0, 18993 }; 18994 18995 struct float32_vec dst; 18996 18997 18998dst.x = unpack_snorm_1x16((uint16_t)(src0.x & 0xffff)); 18999dst.y = unpack_snorm_1x16((uint16_t)(src0.x << 16)); 19000 19001 19002 _dst_val[0].f32 = dst.x; 19003 _dst_val[1].f32 = dst.y; 19004 19005} 19006static void 19007evaluate_unpack_snorm_4x8(nir_const_value *_dst_val, 19008 MAYBE_UNUSED unsigned num_components, 19009 UNUSED unsigned bit_size, 19010 MAYBE_UNUSED nir_const_value **_src) 19011{ 19012 19013 19014 19015 19016 const struct uint32_vec src0 = { 19017 _src[0][0].u32, 19018 0, 19019 0, 19020 0, 19021 }; 19022 19023 struct float32_vec dst; 19024 19025 19026dst.x = unpack_snorm_1x8((uint8_t)(src0.x & 0xff)); 19027dst.y = unpack_snorm_1x8((uint8_t)((src0.x >> 8) & 0xff)); 19028dst.z = unpack_snorm_1x8((uint8_t)((src0.x >> 16) & 0xff)); 19029dst.w = unpack_snorm_1x8((uint8_t)(src0.x >> 24)); 19030 19031 19032 _dst_val[0].f32 = dst.x; 19033 _dst_val[1].f32 = dst.y; 19034 _dst_val[2].f32 = dst.z; 19035 _dst_val[3].f32 = dst.w; 19036 19037} 19038static void 19039evaluate_unpack_unorm_2x16(nir_const_value *_dst_val, 19040 MAYBE_UNUSED unsigned num_components, 19041 UNUSED unsigned bit_size, 19042 MAYBE_UNUSED nir_const_value **_src) 19043{ 19044 19045 19046 19047 19048 const struct uint32_vec src0 = { 19049 _src[0][0].u32, 19050 0, 19051 0, 19052 0, 19053 }; 19054 19055 struct float32_vec dst; 19056 19057 19058dst.x = unpack_unorm_1x16((uint16_t)(src0.x & 0xffff)); 19059dst.y = unpack_unorm_1x16((uint16_t)(src0.x << 16)); 19060 19061 19062 _dst_val[0].f32 = dst.x; 19063 _dst_val[1].f32 = dst.y; 19064 19065} 19066static void 19067evaluate_unpack_unorm_4x8(nir_const_value *_dst_val, 19068 MAYBE_UNUSED unsigned num_components, 19069 UNUSED unsigned bit_size, 19070 MAYBE_UNUSED nir_const_value **_src) 19071{ 19072 19073 19074 19075 19076 const struct uint32_vec src0 = { 19077 _src[0][0].u32, 19078 0, 19079 0, 19080 0, 19081 }; 19082 19083 struct float32_vec dst; 19084 19085 19086dst.x = unpack_unorm_1x8((uint8_t)(src0.x & 0xff)); 19087dst.y = unpack_unorm_1x8((uint8_t)((src0.x >> 8) & 0xff)); 19088dst.z = unpack_unorm_1x8((uint8_t)((src0.x >> 16) & 0xff)); 19089dst.w = unpack_unorm_1x8((uint8_t)(src0.x >> 24)); 19090 19091 19092 _dst_val[0].f32 = dst.x; 19093 _dst_val[1].f32 = dst.y; 19094 _dst_val[2].f32 = dst.z; 19095 _dst_val[3].f32 = dst.w; 19096 19097} 19098static void 19099evaluate_urhadd(nir_const_value *_dst_val, 19100 MAYBE_UNUSED unsigned num_components, 19101 unsigned bit_size, 19102 MAYBE_UNUSED nir_const_value **_src) 19103{ 19104 switch (bit_size) { 19105 case 1: { 19106 19107 19108 19109 19110 for (unsigned _i = 0; _i < num_components; _i++) { 19111 const uint1_t src0 = 19112 _src[0][_i].b; 19113 const uint1_t src1 = 19114 _src[1][_i].b; 19115 19116 uint1_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 19117 19118 /* 1-bit integers get truncated */ 19119 _dst_val[_i].b = dst & 1; 19120 } 19121 19122 break; 19123 } 19124 case 8: { 19125 19126 19127 19128 19129 for (unsigned _i = 0; _i < num_components; _i++) { 19130 const uint8_t src0 = 19131 _src[0][_i].u8; 19132 const uint8_t src1 = 19133 _src[1][_i].u8; 19134 19135 uint8_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 19136 19137 _dst_val[_i].u8 = dst; 19138 } 19139 19140 break; 19141 } 19142 case 16: { 19143 19144 19145 19146 19147 for (unsigned _i = 0; _i < num_components; _i++) { 19148 const uint16_t src0 = 19149 _src[0][_i].u16; 19150 const uint16_t src1 = 19151 _src[1][_i].u16; 19152 19153 uint16_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 19154 19155 _dst_val[_i].u16 = dst; 19156 } 19157 19158 break; 19159 } 19160 case 32: { 19161 19162 19163 19164 19165 for (unsigned _i = 0; _i < num_components; _i++) { 19166 const uint32_t src0 = 19167 _src[0][_i].u32; 19168 const uint32_t src1 = 19169 _src[1][_i].u32; 19170 19171 uint32_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 19172 19173 _dst_val[_i].u32 = dst; 19174 } 19175 19176 break; 19177 } 19178 case 64: { 19179 19180 19181 19182 19183 for (unsigned _i = 0; _i < num_components; _i++) { 19184 const uint64_t src0 = 19185 _src[0][_i].u64; 19186 const uint64_t src1 = 19187 _src[1][_i].u64; 19188 19189 uint64_t dst = (src0 | src1) + ((src0 ^ src1) >> 1); 19190 19191 _dst_val[_i].u64 = dst; 19192 } 19193 19194 break; 19195 } 19196 19197 default: 19198 unreachable("unknown bit width"); 19199 } 19200} 19201static void 19202evaluate_usadd_4x8(nir_const_value *_dst_val, 19203 MAYBE_UNUSED unsigned num_components, 19204 UNUSED unsigned bit_size, 19205 MAYBE_UNUSED nir_const_value **_src) 19206{ 19207 19208 19209 19210 19211 for (unsigned _i = 0; _i < num_components; _i++) { 19212 const int32_t src0 = 19213 _src[0][_i].i32; 19214 const int32_t src1 = 19215 _src[1][_i].i32; 19216 19217 int32_t dst; 19218 19219 19220dst = 0; 19221for (int i = 0; i < 32; i += 8) { 19222 dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; 19223} 19224 19225 19226 _dst_val[_i].i32 = dst; 19227 } 19228 19229} 19230static void 19231evaluate_ushr(nir_const_value *_dst_val, 19232 MAYBE_UNUSED unsigned num_components, 19233 unsigned bit_size, 19234 MAYBE_UNUSED nir_const_value **_src) 19235{ 19236 switch (bit_size) { 19237 case 1: { 19238 19239 19240 19241 19242 for (unsigned _i = 0; _i < num_components; _i++) { 19243 const uint1_t src0 = 19244 _src[0][_i].b; 19245 const uint32_t src1 = 19246 _src[1][_i].u32; 19247 19248 uint1_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 19249 19250 /* 1-bit integers get truncated */ 19251 _dst_val[_i].b = dst & 1; 19252 } 19253 19254 break; 19255 } 19256 case 8: { 19257 19258 19259 19260 19261 for (unsigned _i = 0; _i < num_components; _i++) { 19262 const uint8_t src0 = 19263 _src[0][_i].u8; 19264 const uint32_t src1 = 19265 _src[1][_i].u32; 19266 19267 uint8_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 19268 19269 _dst_val[_i].u8 = dst; 19270 } 19271 19272 break; 19273 } 19274 case 16: { 19275 19276 19277 19278 19279 for (unsigned _i = 0; _i < num_components; _i++) { 19280 const uint16_t src0 = 19281 _src[0][_i].u16; 19282 const uint32_t src1 = 19283 _src[1][_i].u32; 19284 19285 uint16_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 19286 19287 _dst_val[_i].u16 = dst; 19288 } 19289 19290 break; 19291 } 19292 case 32: { 19293 19294 19295 19296 19297 for (unsigned _i = 0; _i < num_components; _i++) { 19298 const uint32_t src0 = 19299 _src[0][_i].u32; 19300 const uint32_t src1 = 19301 _src[1][_i].u32; 19302 19303 uint32_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 19304 19305 _dst_val[_i].u32 = dst; 19306 } 19307 19308 break; 19309 } 19310 case 64: { 19311 19312 19313 19314 19315 for (unsigned _i = 0; _i < num_components; _i++) { 19316 const uint64_t src0 = 19317 _src[0][_i].u64; 19318 const uint32_t src1 = 19319 _src[1][_i].u32; 19320 19321 uint64_t dst = src0 >> (src1 & (sizeof(src0) * 8 - 1)); 19322 19323 _dst_val[_i].u64 = dst; 19324 } 19325 19326 break; 19327 } 19328 19329 default: 19330 unreachable("unknown bit width"); 19331 } 19332} 19333static void 19334evaluate_ussub_4x8(nir_const_value *_dst_val, 19335 MAYBE_UNUSED unsigned num_components, 19336 UNUSED unsigned bit_size, 19337 MAYBE_UNUSED nir_const_value **_src) 19338{ 19339 19340 19341 19342 19343 for (unsigned _i = 0; _i < num_components; _i++) { 19344 const int32_t src0 = 19345 _src[0][_i].i32; 19346 const int32_t src1 = 19347 _src[1][_i].i32; 19348 19349 int32_t dst; 19350 19351 19352dst = 0; 19353for (int i = 0; i < 32; i += 8) { 19354 int src0_chan = (src0 >> i) & 0xff; 19355 int src1_chan = (src1 >> i) & 0xff; 19356 if (src0_chan > src1_chan) 19357 dst |= (src0_chan - src1_chan) << i; 19358} 19359 19360 19361 _dst_val[_i].i32 = dst; 19362 } 19363 19364} 19365static void 19366evaluate_usub_borrow(nir_const_value *_dst_val, 19367 MAYBE_UNUSED unsigned num_components, 19368 unsigned bit_size, 19369 MAYBE_UNUSED nir_const_value **_src) 19370{ 19371 switch (bit_size) { 19372 case 1: { 19373 19374 19375 19376 19377 for (unsigned _i = 0; _i < num_components; _i++) { 19378 const uint1_t src0 = 19379 _src[0][_i].b; 19380 const uint1_t src1 = 19381 _src[1][_i].b; 19382 19383 uint1_t dst = src0 < src1; 19384 19385 /* 1-bit integers get truncated */ 19386 _dst_val[_i].b = dst & 1; 19387 } 19388 19389 break; 19390 } 19391 case 8: { 19392 19393 19394 19395 19396 for (unsigned _i = 0; _i < num_components; _i++) { 19397 const uint8_t src0 = 19398 _src[0][_i].u8; 19399 const uint8_t src1 = 19400 _src[1][_i].u8; 19401 19402 uint8_t dst = src0 < src1; 19403 19404 _dst_val[_i].u8 = dst; 19405 } 19406 19407 break; 19408 } 19409 case 16: { 19410 19411 19412 19413 19414 for (unsigned _i = 0; _i < num_components; _i++) { 19415 const uint16_t src0 = 19416 _src[0][_i].u16; 19417 const uint16_t src1 = 19418 _src[1][_i].u16; 19419 19420 uint16_t dst = src0 < src1; 19421 19422 _dst_val[_i].u16 = dst; 19423 } 19424 19425 break; 19426 } 19427 case 32: { 19428 19429 19430 19431 19432 for (unsigned _i = 0; _i < num_components; _i++) { 19433 const uint32_t src0 = 19434 _src[0][_i].u32; 19435 const uint32_t src1 = 19436 _src[1][_i].u32; 19437 19438 uint32_t dst = src0 < src1; 19439 19440 _dst_val[_i].u32 = dst; 19441 } 19442 19443 break; 19444 } 19445 case 64: { 19446 19447 19448 19449 19450 for (unsigned _i = 0; _i < num_components; _i++) { 19451 const uint64_t src0 = 19452 _src[0][_i].u64; 19453 const uint64_t src1 = 19454 _src[1][_i].u64; 19455 19456 uint64_t dst = src0 < src1; 19457 19458 _dst_val[_i].u64 = dst; 19459 } 19460 19461 break; 19462 } 19463 19464 default: 19465 unreachable("unknown bit width"); 19466 } 19467} 19468static void 19469evaluate_usub_sat(nir_const_value *_dst_val, 19470 MAYBE_UNUSED unsigned num_components, 19471 unsigned bit_size, 19472 MAYBE_UNUSED nir_const_value **_src) 19473{ 19474 switch (bit_size) { 19475 case 1: { 19476 19477 19478 19479 19480 for (unsigned _i = 0; _i < num_components; _i++) { 19481 const uint1_t src0 = 19482 _src[0][_i].b; 19483 const uint1_t src1 = 19484 _src[1][_i].b; 19485 19486 uint1_t dst = src0 < src1 ? 0 : src0 - src1; 19487 19488 /* 1-bit integers get truncated */ 19489 _dst_val[_i].b = dst & 1; 19490 } 19491 19492 break; 19493 } 19494 case 8: { 19495 19496 19497 19498 19499 for (unsigned _i = 0; _i < num_components; _i++) { 19500 const uint8_t src0 = 19501 _src[0][_i].u8; 19502 const uint8_t src1 = 19503 _src[1][_i].u8; 19504 19505 uint8_t dst = src0 < src1 ? 0 : src0 - src1; 19506 19507 _dst_val[_i].u8 = dst; 19508 } 19509 19510 break; 19511 } 19512 case 16: { 19513 19514 19515 19516 19517 for (unsigned _i = 0; _i < num_components; _i++) { 19518 const uint16_t src0 = 19519 _src[0][_i].u16; 19520 const uint16_t src1 = 19521 _src[1][_i].u16; 19522 19523 uint16_t dst = src0 < src1 ? 0 : src0 - src1; 19524 19525 _dst_val[_i].u16 = dst; 19526 } 19527 19528 break; 19529 } 19530 case 32: { 19531 19532 19533 19534 19535 for (unsigned _i = 0; _i < num_components; _i++) { 19536 const uint32_t src0 = 19537 _src[0][_i].u32; 19538 const uint32_t src1 = 19539 _src[1][_i].u32; 19540 19541 uint32_t dst = src0 < src1 ? 0 : src0 - src1; 19542 19543 _dst_val[_i].u32 = dst; 19544 } 19545 19546 break; 19547 } 19548 case 64: { 19549 19550 19551 19552 19553 for (unsigned _i = 0; _i < num_components; _i++) { 19554 const uint64_t src0 = 19555 _src[0][_i].u64; 19556 const uint64_t src1 = 19557 _src[1][_i].u64; 19558 19559 uint64_t dst = src0 < src1 ? 0 : src0 - src1; 19560 19561 _dst_val[_i].u64 = dst; 19562 } 19563 19564 break; 19565 } 19566 19567 default: 19568 unreachable("unknown bit width"); 19569 } 19570} 19571static void 19572evaluate_vec2(nir_const_value *_dst_val, 19573 MAYBE_UNUSED unsigned num_components, 19574 unsigned bit_size, 19575 MAYBE_UNUSED nir_const_value **_src) 19576{ 19577 switch (bit_size) { 19578 case 1: { 19579 19580 19581 19582 19583 const struct uint1_vec src0 = { 19584 _src[0][0].b, 19585 0, 19586 0, 19587 0, 19588 }; 19589 19590 const struct uint1_vec src1 = { 19591 _src[1][0].b, 19592 0, 19593 0, 19594 0, 19595 }; 19596 19597 struct uint1_vec dst; 19598 19599 19600dst.x = src0.x; 19601dst.y = src1.x; 19602 19603 19604 /* 1-bit integers get truncated */ 19605 _dst_val[0].b = dst.x & 1; 19606 /* 1-bit integers get truncated */ 19607 _dst_val[1].b = dst.y & 1; 19608 19609 break; 19610 } 19611 case 8: { 19612 19613 19614 19615 19616 const struct uint8_vec src0 = { 19617 _src[0][0].u8, 19618 0, 19619 0, 19620 0, 19621 }; 19622 19623 const struct uint8_vec src1 = { 19624 _src[1][0].u8, 19625 0, 19626 0, 19627 0, 19628 }; 19629 19630 struct uint8_vec dst; 19631 19632 19633dst.x = src0.x; 19634dst.y = src1.x; 19635 19636 19637 _dst_val[0].u8 = dst.x; 19638 _dst_val[1].u8 = dst.y; 19639 19640 break; 19641 } 19642 case 16: { 19643 19644 19645 19646 19647 const struct uint16_vec src0 = { 19648 _src[0][0].u16, 19649 0, 19650 0, 19651 0, 19652 }; 19653 19654 const struct uint16_vec src1 = { 19655 _src[1][0].u16, 19656 0, 19657 0, 19658 0, 19659 }; 19660 19661 struct uint16_vec dst; 19662 19663 19664dst.x = src0.x; 19665dst.y = src1.x; 19666 19667 19668 _dst_val[0].u16 = dst.x; 19669 _dst_val[1].u16 = dst.y; 19670 19671 break; 19672 } 19673 case 32: { 19674 19675 19676 19677 19678 const struct uint32_vec src0 = { 19679 _src[0][0].u32, 19680 0, 19681 0, 19682 0, 19683 }; 19684 19685 const struct uint32_vec src1 = { 19686 _src[1][0].u32, 19687 0, 19688 0, 19689 0, 19690 }; 19691 19692 struct uint32_vec dst; 19693 19694 19695dst.x = src0.x; 19696dst.y = src1.x; 19697 19698 19699 _dst_val[0].u32 = dst.x; 19700 _dst_val[1].u32 = dst.y; 19701 19702 break; 19703 } 19704 case 64: { 19705 19706 19707 19708 19709 const struct uint64_vec src0 = { 19710 _src[0][0].u64, 19711 0, 19712 0, 19713 0, 19714 }; 19715 19716 const struct uint64_vec src1 = { 19717 _src[1][0].u64, 19718 0, 19719 0, 19720 0, 19721 }; 19722 19723 struct uint64_vec dst; 19724 19725 19726dst.x = src0.x; 19727dst.y = src1.x; 19728 19729 19730 _dst_val[0].u64 = dst.x; 19731 _dst_val[1].u64 = dst.y; 19732 19733 break; 19734 } 19735 19736 default: 19737 unreachable("unknown bit width"); 19738 } 19739} 19740static void 19741evaluate_vec3(nir_const_value *_dst_val, 19742 MAYBE_UNUSED unsigned num_components, 19743 unsigned bit_size, 19744 MAYBE_UNUSED nir_const_value **_src) 19745{ 19746 switch (bit_size) { 19747 case 1: { 19748 19749 19750 19751 19752 const struct uint1_vec src0 = { 19753 _src[0][0].b, 19754 0, 19755 0, 19756 0, 19757 }; 19758 19759 const struct uint1_vec src1 = { 19760 _src[1][0].b, 19761 0, 19762 0, 19763 0, 19764 }; 19765 19766 const struct uint1_vec src2 = { 19767 _src[2][0].b, 19768 0, 19769 0, 19770 0, 19771 }; 19772 19773 struct uint1_vec dst; 19774 19775 19776dst.x = src0.x; 19777dst.y = src1.x; 19778dst.z = src2.x; 19779 19780 19781 /* 1-bit integers get truncated */ 19782 _dst_val[0].b = dst.x & 1; 19783 /* 1-bit integers get truncated */ 19784 _dst_val[1].b = dst.y & 1; 19785 /* 1-bit integers get truncated */ 19786 _dst_val[2].b = dst.z & 1; 19787 19788 break; 19789 } 19790 case 8: { 19791 19792 19793 19794 19795 const struct uint8_vec src0 = { 19796 _src[0][0].u8, 19797 0, 19798 0, 19799 0, 19800 }; 19801 19802 const struct uint8_vec src1 = { 19803 _src[1][0].u8, 19804 0, 19805 0, 19806 0, 19807 }; 19808 19809 const struct uint8_vec src2 = { 19810 _src[2][0].u8, 19811 0, 19812 0, 19813 0, 19814 }; 19815 19816 struct uint8_vec dst; 19817 19818 19819dst.x = src0.x; 19820dst.y = src1.x; 19821dst.z = src2.x; 19822 19823 19824 _dst_val[0].u8 = dst.x; 19825 _dst_val[1].u8 = dst.y; 19826 _dst_val[2].u8 = dst.z; 19827 19828 break; 19829 } 19830 case 16: { 19831 19832 19833 19834 19835 const struct uint16_vec src0 = { 19836 _src[0][0].u16, 19837 0, 19838 0, 19839 0, 19840 }; 19841 19842 const struct uint16_vec src1 = { 19843 _src[1][0].u16, 19844 0, 19845 0, 19846 0, 19847 }; 19848 19849 const struct uint16_vec src2 = { 19850 _src[2][0].u16, 19851 0, 19852 0, 19853 0, 19854 }; 19855 19856 struct uint16_vec dst; 19857 19858 19859dst.x = src0.x; 19860dst.y = src1.x; 19861dst.z = src2.x; 19862 19863 19864 _dst_val[0].u16 = dst.x; 19865 _dst_val[1].u16 = dst.y; 19866 _dst_val[2].u16 = dst.z; 19867 19868 break; 19869 } 19870 case 32: { 19871 19872 19873 19874 19875 const struct uint32_vec src0 = { 19876 _src[0][0].u32, 19877 0, 19878 0, 19879 0, 19880 }; 19881 19882 const struct uint32_vec src1 = { 19883 _src[1][0].u32, 19884 0, 19885 0, 19886 0, 19887 }; 19888 19889 const struct uint32_vec src2 = { 19890 _src[2][0].u32, 19891 0, 19892 0, 19893 0, 19894 }; 19895 19896 struct uint32_vec dst; 19897 19898 19899dst.x = src0.x; 19900dst.y = src1.x; 19901dst.z = src2.x; 19902 19903 19904 _dst_val[0].u32 = dst.x; 19905 _dst_val[1].u32 = dst.y; 19906 _dst_val[2].u32 = dst.z; 19907 19908 break; 19909 } 19910 case 64: { 19911 19912 19913 19914 19915 const struct uint64_vec src0 = { 19916 _src[0][0].u64, 19917 0, 19918 0, 19919 0, 19920 }; 19921 19922 const struct uint64_vec src1 = { 19923 _src[1][0].u64, 19924 0, 19925 0, 19926 0, 19927 }; 19928 19929 const struct uint64_vec src2 = { 19930 _src[2][0].u64, 19931 0, 19932 0, 19933 0, 19934 }; 19935 19936 struct uint64_vec dst; 19937 19938 19939dst.x = src0.x; 19940dst.y = src1.x; 19941dst.z = src2.x; 19942 19943 19944 _dst_val[0].u64 = dst.x; 19945 _dst_val[1].u64 = dst.y; 19946 _dst_val[2].u64 = dst.z; 19947 19948 break; 19949 } 19950 19951 default: 19952 unreachable("unknown bit width"); 19953 } 19954} 19955static void 19956evaluate_vec4(nir_const_value *_dst_val, 19957 MAYBE_UNUSED unsigned num_components, 19958 unsigned bit_size, 19959 MAYBE_UNUSED nir_const_value **_src) 19960{ 19961 switch (bit_size) { 19962 case 1: { 19963 19964 19965 19966 19967 const struct uint1_vec src0 = { 19968 _src[0][0].b, 19969 0, 19970 0, 19971 0, 19972 }; 19973 19974 const struct uint1_vec src1 = { 19975 _src[1][0].b, 19976 0, 19977 0, 19978 0, 19979 }; 19980 19981 const struct uint1_vec src2 = { 19982 _src[2][0].b, 19983 0, 19984 0, 19985 0, 19986 }; 19987 19988 const struct uint1_vec src3 = { 19989 _src[3][0].b, 19990 0, 19991 0, 19992 0, 19993 }; 19994 19995 struct uint1_vec dst; 19996 19997 19998dst.x = src0.x; 19999dst.y = src1.x; 20000dst.z = src2.x; 20001dst.w = src3.x; 20002 20003 20004 /* 1-bit integers get truncated */ 20005 _dst_val[0].b = dst.x & 1; 20006 /* 1-bit integers get truncated */ 20007 _dst_val[1].b = dst.y & 1; 20008 /* 1-bit integers get truncated */ 20009 _dst_val[2].b = dst.z & 1; 20010 /* 1-bit integers get truncated */ 20011 _dst_val[3].b = dst.w & 1; 20012 20013 break; 20014 } 20015 case 8: { 20016 20017 20018 20019 20020 const struct uint8_vec src0 = { 20021 _src[0][0].u8, 20022 0, 20023 0, 20024 0, 20025 }; 20026 20027 const struct uint8_vec src1 = { 20028 _src[1][0].u8, 20029 0, 20030 0, 20031 0, 20032 }; 20033 20034 const struct uint8_vec src2 = { 20035 _src[2][0].u8, 20036 0, 20037 0, 20038 0, 20039 }; 20040 20041 const struct uint8_vec src3 = { 20042 _src[3][0].u8, 20043 0, 20044 0, 20045 0, 20046 }; 20047 20048 struct uint8_vec dst; 20049 20050 20051dst.x = src0.x; 20052dst.y = src1.x; 20053dst.z = src2.x; 20054dst.w = src3.x; 20055 20056 20057 _dst_val[0].u8 = dst.x; 20058 _dst_val[1].u8 = dst.y; 20059 _dst_val[2].u8 = dst.z; 20060 _dst_val[3].u8 = dst.w; 20061 20062 break; 20063 } 20064 case 16: { 20065 20066 20067 20068 20069 const struct uint16_vec src0 = { 20070 _src[0][0].u16, 20071 0, 20072 0, 20073 0, 20074 }; 20075 20076 const struct uint16_vec src1 = { 20077 _src[1][0].u16, 20078 0, 20079 0, 20080 0, 20081 }; 20082 20083 const struct uint16_vec src2 = { 20084 _src[2][0].u16, 20085 0, 20086 0, 20087 0, 20088 }; 20089 20090 const struct uint16_vec src3 = { 20091 _src[3][0].u16, 20092 0, 20093 0, 20094 0, 20095 }; 20096 20097 struct uint16_vec dst; 20098 20099 20100dst.x = src0.x; 20101dst.y = src1.x; 20102dst.z = src2.x; 20103dst.w = src3.x; 20104 20105 20106 _dst_val[0].u16 = dst.x; 20107 _dst_val[1].u16 = dst.y; 20108 _dst_val[2].u16 = dst.z; 20109 _dst_val[3].u16 = dst.w; 20110 20111 break; 20112 } 20113 case 32: { 20114 20115 20116 20117 20118 const struct uint32_vec src0 = { 20119 _src[0][0].u32, 20120 0, 20121 0, 20122 0, 20123 }; 20124 20125 const struct uint32_vec src1 = { 20126 _src[1][0].u32, 20127 0, 20128 0, 20129 0, 20130 }; 20131 20132 const struct uint32_vec src2 = { 20133 _src[2][0].u32, 20134 0, 20135 0, 20136 0, 20137 }; 20138 20139 const struct uint32_vec src3 = { 20140 _src[3][0].u32, 20141 0, 20142 0, 20143 0, 20144 }; 20145 20146 struct uint32_vec dst; 20147 20148 20149dst.x = src0.x; 20150dst.y = src1.x; 20151dst.z = src2.x; 20152dst.w = src3.x; 20153 20154 20155 _dst_val[0].u32 = dst.x; 20156 _dst_val[1].u32 = dst.y; 20157 _dst_val[2].u32 = dst.z; 20158 _dst_val[3].u32 = dst.w; 20159 20160 break; 20161 } 20162 case 64: { 20163 20164 20165 20166 20167 const struct uint64_vec src0 = { 20168 _src[0][0].u64, 20169 0, 20170 0, 20171 0, 20172 }; 20173 20174 const struct uint64_vec src1 = { 20175 _src[1][0].u64, 20176 0, 20177 0, 20178 0, 20179 }; 20180 20181 const struct uint64_vec src2 = { 20182 _src[2][0].u64, 20183 0, 20184 0, 20185 0, 20186 }; 20187 20188 const struct uint64_vec src3 = { 20189 _src[3][0].u64, 20190 0, 20191 0, 20192 0, 20193 }; 20194 20195 struct uint64_vec dst; 20196 20197 20198dst.x = src0.x; 20199dst.y = src1.x; 20200dst.z = src2.x; 20201dst.w = src3.x; 20202 20203 20204 _dst_val[0].u64 = dst.x; 20205 _dst_val[1].u64 = dst.y; 20206 _dst_val[2].u64 = dst.z; 20207 _dst_val[3].u64 = dst.w; 20208 20209 break; 20210 } 20211 20212 default: 20213 unreachable("unknown bit width"); 20214 } 20215} 20216 20217void 20218nir_eval_const_opcode(nir_op op, nir_const_value *dest, 20219 unsigned num_components, unsigned bit_width, 20220 nir_const_value **src) 20221{ 20222 switch (op) { 20223 case nir_op_b2f16: 20224 evaluate_b2f16(dest, num_components, bit_width, src); 20225 return; 20226 case nir_op_b2f32: 20227 evaluate_b2f32(dest, num_components, bit_width, src); 20228 return; 20229 case nir_op_b2f64: 20230 evaluate_b2f64(dest, num_components, bit_width, src); 20231 return; 20232 case nir_op_b2i1: 20233 evaluate_b2i1(dest, num_components, bit_width, src); 20234 return; 20235 case nir_op_b2i16: 20236 evaluate_b2i16(dest, num_components, bit_width, src); 20237 return; 20238 case nir_op_b2i32: 20239 evaluate_b2i32(dest, num_components, bit_width, src); 20240 return; 20241 case nir_op_b2i64: 20242 evaluate_b2i64(dest, num_components, bit_width, src); 20243 return; 20244 case nir_op_b2i8: 20245 evaluate_b2i8(dest, num_components, bit_width, src); 20246 return; 20247 case nir_op_b32all_fequal2: 20248 evaluate_b32all_fequal2(dest, num_components, bit_width, src); 20249 return; 20250 case nir_op_b32all_fequal3: 20251 evaluate_b32all_fequal3(dest, num_components, bit_width, src); 20252 return; 20253 case nir_op_b32all_fequal4: 20254 evaluate_b32all_fequal4(dest, num_components, bit_width, src); 20255 return; 20256 case nir_op_b32all_iequal2: 20257 evaluate_b32all_iequal2(dest, num_components, bit_width, src); 20258 return; 20259 case nir_op_b32all_iequal3: 20260 evaluate_b32all_iequal3(dest, num_components, bit_width, src); 20261 return; 20262 case nir_op_b32all_iequal4: 20263 evaluate_b32all_iequal4(dest, num_components, bit_width, src); 20264 return; 20265 case nir_op_b32any_fnequal2: 20266 evaluate_b32any_fnequal2(dest, num_components, bit_width, src); 20267 return; 20268 case nir_op_b32any_fnequal3: 20269 evaluate_b32any_fnequal3(dest, num_components, bit_width, src); 20270 return; 20271 case nir_op_b32any_fnequal4: 20272 evaluate_b32any_fnequal4(dest, num_components, bit_width, src); 20273 return; 20274 case nir_op_b32any_inequal2: 20275 evaluate_b32any_inequal2(dest, num_components, bit_width, src); 20276 return; 20277 case nir_op_b32any_inequal3: 20278 evaluate_b32any_inequal3(dest, num_components, bit_width, src); 20279 return; 20280 case nir_op_b32any_inequal4: 20281 evaluate_b32any_inequal4(dest, num_components, bit_width, src); 20282 return; 20283 case nir_op_b32csel: 20284 evaluate_b32csel(dest, num_components, bit_width, src); 20285 return; 20286 case nir_op_ball_fequal2: 20287 evaluate_ball_fequal2(dest, num_components, bit_width, src); 20288 return; 20289 case nir_op_ball_fequal3: 20290 evaluate_ball_fequal3(dest, num_components, bit_width, src); 20291 return; 20292 case nir_op_ball_fequal4: 20293 evaluate_ball_fequal4(dest, num_components, bit_width, src); 20294 return; 20295 case nir_op_ball_iequal2: 20296 evaluate_ball_iequal2(dest, num_components, bit_width, src); 20297 return; 20298 case nir_op_ball_iequal3: 20299 evaluate_ball_iequal3(dest, num_components, bit_width, src); 20300 return; 20301 case nir_op_ball_iequal4: 20302 evaluate_ball_iequal4(dest, num_components, bit_width, src); 20303 return; 20304 case nir_op_bany_fnequal2: 20305 evaluate_bany_fnequal2(dest, num_components, bit_width, src); 20306 return; 20307 case nir_op_bany_fnequal3: 20308 evaluate_bany_fnequal3(dest, num_components, bit_width, src); 20309 return; 20310 case nir_op_bany_fnequal4: 20311 evaluate_bany_fnequal4(dest, num_components, bit_width, src); 20312 return; 20313 case nir_op_bany_inequal2: 20314 evaluate_bany_inequal2(dest, num_components, bit_width, src); 20315 return; 20316 case nir_op_bany_inequal3: 20317 evaluate_bany_inequal3(dest, num_components, bit_width, src); 20318 return; 20319 case nir_op_bany_inequal4: 20320 evaluate_bany_inequal4(dest, num_components, bit_width, src); 20321 return; 20322 case nir_op_bcsel: 20323 evaluate_bcsel(dest, num_components, bit_width, src); 20324 return; 20325 case nir_op_bfi: 20326 evaluate_bfi(dest, num_components, bit_width, src); 20327 return; 20328 case nir_op_bfm: 20329 evaluate_bfm(dest, num_components, bit_width, src); 20330 return; 20331 case nir_op_bit_count: 20332 evaluate_bit_count(dest, num_components, bit_width, src); 20333 return; 20334 case nir_op_bitfield_insert: 20335 evaluate_bitfield_insert(dest, num_components, bit_width, src); 20336 return; 20337 case nir_op_bitfield_reverse: 20338 evaluate_bitfield_reverse(dest, num_components, bit_width, src); 20339 return; 20340 case nir_op_cube_face_coord: 20341 evaluate_cube_face_coord(dest, num_components, bit_width, src); 20342 return; 20343 case nir_op_cube_face_index: 20344 evaluate_cube_face_index(dest, num_components, bit_width, src); 20345 return; 20346 case nir_op_extract_i16: 20347 evaluate_extract_i16(dest, num_components, bit_width, src); 20348 return; 20349 case nir_op_extract_i8: 20350 evaluate_extract_i8(dest, num_components, bit_width, src); 20351 return; 20352 case nir_op_extract_u16: 20353 evaluate_extract_u16(dest, num_components, bit_width, src); 20354 return; 20355 case nir_op_extract_u8: 20356 evaluate_extract_u8(dest, num_components, bit_width, src); 20357 return; 20358 case nir_op_f2b1: 20359 evaluate_f2b1(dest, num_components, bit_width, src); 20360 return; 20361 case nir_op_f2b32: 20362 evaluate_f2b32(dest, num_components, bit_width, src); 20363 return; 20364 case nir_op_f2f16: 20365 evaluate_f2f16(dest, num_components, bit_width, src); 20366 return; 20367 case nir_op_f2f16_rtne: 20368 evaluate_f2f16_rtne(dest, num_components, bit_width, src); 20369 return; 20370 case nir_op_f2f16_rtz: 20371 evaluate_f2f16_rtz(dest, num_components, bit_width, src); 20372 return; 20373 case nir_op_f2f32: 20374 evaluate_f2f32(dest, num_components, bit_width, src); 20375 return; 20376 case nir_op_f2f64: 20377 evaluate_f2f64(dest, num_components, bit_width, src); 20378 return; 20379 case nir_op_f2i1: 20380 evaluate_f2i1(dest, num_components, bit_width, src); 20381 return; 20382 case nir_op_f2i16: 20383 evaluate_f2i16(dest, num_components, bit_width, src); 20384 return; 20385 case nir_op_f2i32: 20386 evaluate_f2i32(dest, num_components, bit_width, src); 20387 return; 20388 case nir_op_f2i64: 20389 evaluate_f2i64(dest, num_components, bit_width, src); 20390 return; 20391 case nir_op_f2i8: 20392 evaluate_f2i8(dest, num_components, bit_width, src); 20393 return; 20394 case nir_op_f2u1: 20395 evaluate_f2u1(dest, num_components, bit_width, src); 20396 return; 20397 case nir_op_f2u16: 20398 evaluate_f2u16(dest, num_components, bit_width, src); 20399 return; 20400 case nir_op_f2u32: 20401 evaluate_f2u32(dest, num_components, bit_width, src); 20402 return; 20403 case nir_op_f2u64: 20404 evaluate_f2u64(dest, num_components, bit_width, src); 20405 return; 20406 case nir_op_f2u8: 20407 evaluate_f2u8(dest, num_components, bit_width, src); 20408 return; 20409 case nir_op_fabs: 20410 evaluate_fabs(dest, num_components, bit_width, src); 20411 return; 20412 case nir_op_fadd: 20413 evaluate_fadd(dest, num_components, bit_width, src); 20414 return; 20415 case nir_op_fall_equal2: 20416 evaluate_fall_equal2(dest, num_components, bit_width, src); 20417 return; 20418 case nir_op_fall_equal3: 20419 evaluate_fall_equal3(dest, num_components, bit_width, src); 20420 return; 20421 case nir_op_fall_equal4: 20422 evaluate_fall_equal4(dest, num_components, bit_width, src); 20423 return; 20424 case nir_op_fand: 20425 evaluate_fand(dest, num_components, bit_width, src); 20426 return; 20427 case nir_op_fany_nequal2: 20428 evaluate_fany_nequal2(dest, num_components, bit_width, src); 20429 return; 20430 case nir_op_fany_nequal3: 20431 evaluate_fany_nequal3(dest, num_components, bit_width, src); 20432 return; 20433 case nir_op_fany_nequal4: 20434 evaluate_fany_nequal4(dest, num_components, bit_width, src); 20435 return; 20436 case nir_op_fceil: 20437 evaluate_fceil(dest, num_components, bit_width, src); 20438 return; 20439 case nir_op_fcos: 20440 evaluate_fcos(dest, num_components, bit_width, src); 20441 return; 20442 case nir_op_fcsel: 20443 evaluate_fcsel(dest, num_components, bit_width, src); 20444 return; 20445 case nir_op_fddx: 20446 evaluate_fddx(dest, num_components, bit_width, src); 20447 return; 20448 case nir_op_fddx_coarse: 20449 evaluate_fddx_coarse(dest, num_components, bit_width, src); 20450 return; 20451 case nir_op_fddx_fine: 20452 evaluate_fddx_fine(dest, num_components, bit_width, src); 20453 return; 20454 case nir_op_fddy: 20455 evaluate_fddy(dest, num_components, bit_width, src); 20456 return; 20457 case nir_op_fddy_coarse: 20458 evaluate_fddy_coarse(dest, num_components, bit_width, src); 20459 return; 20460 case nir_op_fddy_fine: 20461 evaluate_fddy_fine(dest, num_components, bit_width, src); 20462 return; 20463 case nir_op_fdiv: 20464 evaluate_fdiv(dest, num_components, bit_width, src); 20465 return; 20466 case nir_op_fdot2: 20467 evaluate_fdot2(dest, num_components, bit_width, src); 20468 return; 20469 case nir_op_fdot3: 20470 evaluate_fdot3(dest, num_components, bit_width, src); 20471 return; 20472 case nir_op_fdot4: 20473 evaluate_fdot4(dest, num_components, bit_width, src); 20474 return; 20475 case nir_op_fdot_replicated2: 20476 evaluate_fdot_replicated2(dest, num_components, bit_width, src); 20477 return; 20478 case nir_op_fdot_replicated3: 20479 evaluate_fdot_replicated3(dest, num_components, bit_width, src); 20480 return; 20481 case nir_op_fdot_replicated4: 20482 evaluate_fdot_replicated4(dest, num_components, bit_width, src); 20483 return; 20484 case nir_op_fdph: 20485 evaluate_fdph(dest, num_components, bit_width, src); 20486 return; 20487 case nir_op_fdph_replicated: 20488 evaluate_fdph_replicated(dest, num_components, bit_width, src); 20489 return; 20490 case nir_op_feq: 20491 evaluate_feq(dest, num_components, bit_width, src); 20492 return; 20493 case nir_op_feq32: 20494 evaluate_feq32(dest, num_components, bit_width, src); 20495 return; 20496 case nir_op_fexp2: 20497 evaluate_fexp2(dest, num_components, bit_width, src); 20498 return; 20499 case nir_op_ffloor: 20500 evaluate_ffloor(dest, num_components, bit_width, src); 20501 return; 20502 case nir_op_ffma: 20503 evaluate_ffma(dest, num_components, bit_width, src); 20504 return; 20505 case nir_op_ffract: 20506 evaluate_ffract(dest, num_components, bit_width, src); 20507 return; 20508 case nir_op_fge: 20509 evaluate_fge(dest, num_components, bit_width, src); 20510 return; 20511 case nir_op_fge32: 20512 evaluate_fge32(dest, num_components, bit_width, src); 20513 return; 20514 case nir_op_find_lsb: 20515 evaluate_find_lsb(dest, num_components, bit_width, src); 20516 return; 20517 case nir_op_flog2: 20518 evaluate_flog2(dest, num_components, bit_width, src); 20519 return; 20520 case nir_op_flrp: 20521 evaluate_flrp(dest, num_components, bit_width, src); 20522 return; 20523 case nir_op_flt: 20524 evaluate_flt(dest, num_components, bit_width, src); 20525 return; 20526 case nir_op_flt32: 20527 evaluate_flt32(dest, num_components, bit_width, src); 20528 return; 20529 case nir_op_fmax: 20530 evaluate_fmax(dest, num_components, bit_width, src); 20531 return; 20532 case nir_op_fmax3: 20533 evaluate_fmax3(dest, num_components, bit_width, src); 20534 return; 20535 case nir_op_fmed3: 20536 evaluate_fmed3(dest, num_components, bit_width, src); 20537 return; 20538 case nir_op_fmin: 20539 evaluate_fmin(dest, num_components, bit_width, src); 20540 return; 20541 case nir_op_fmin3: 20542 evaluate_fmin3(dest, num_components, bit_width, src); 20543 return; 20544 case nir_op_fmod: 20545 evaluate_fmod(dest, num_components, bit_width, src); 20546 return; 20547 case nir_op_fmov: 20548 evaluate_fmov(dest, num_components, bit_width, src); 20549 return; 20550 case nir_op_fmul: 20551 evaluate_fmul(dest, num_components, bit_width, src); 20552 return; 20553 case nir_op_fne: 20554 evaluate_fne(dest, num_components, bit_width, src); 20555 return; 20556 case nir_op_fne32: 20557 evaluate_fne32(dest, num_components, bit_width, src); 20558 return; 20559 case nir_op_fneg: 20560 evaluate_fneg(dest, num_components, bit_width, src); 20561 return; 20562 case nir_op_fnoise1_1: 20563 evaluate_fnoise1_1(dest, num_components, bit_width, src); 20564 return; 20565 case nir_op_fnoise1_2: 20566 evaluate_fnoise1_2(dest, num_components, bit_width, src); 20567 return; 20568 case nir_op_fnoise1_3: 20569 evaluate_fnoise1_3(dest, num_components, bit_width, src); 20570 return; 20571 case nir_op_fnoise1_4: 20572 evaluate_fnoise1_4(dest, num_components, bit_width, src); 20573 return; 20574 case nir_op_fnoise2_1: 20575 evaluate_fnoise2_1(dest, num_components, bit_width, src); 20576 return; 20577 case nir_op_fnoise2_2: 20578 evaluate_fnoise2_2(dest, num_components, bit_width, src); 20579 return; 20580 case nir_op_fnoise2_3: 20581 evaluate_fnoise2_3(dest, num_components, bit_width, src); 20582 return; 20583 case nir_op_fnoise2_4: 20584 evaluate_fnoise2_4(dest, num_components, bit_width, src); 20585 return; 20586 case nir_op_fnoise3_1: 20587 evaluate_fnoise3_1(dest, num_components, bit_width, src); 20588 return; 20589 case nir_op_fnoise3_2: 20590 evaluate_fnoise3_2(dest, num_components, bit_width, src); 20591 return; 20592 case nir_op_fnoise3_3: 20593 evaluate_fnoise3_3(dest, num_components, bit_width, src); 20594 return; 20595 case nir_op_fnoise3_4: 20596 evaluate_fnoise3_4(dest, num_components, bit_width, src); 20597 return; 20598 case nir_op_fnoise4_1: 20599 evaluate_fnoise4_1(dest, num_components, bit_width, src); 20600 return; 20601 case nir_op_fnoise4_2: 20602 evaluate_fnoise4_2(dest, num_components, bit_width, src); 20603 return; 20604 case nir_op_fnoise4_3: 20605 evaluate_fnoise4_3(dest, num_components, bit_width, src); 20606 return; 20607 case nir_op_fnoise4_4: 20608 evaluate_fnoise4_4(dest, num_components, bit_width, src); 20609 return; 20610 case nir_op_fnot: 20611 evaluate_fnot(dest, num_components, bit_width, src); 20612 return; 20613 case nir_op_for: 20614 evaluate_for(dest, num_components, bit_width, src); 20615 return; 20616 case nir_op_fpow: 20617 evaluate_fpow(dest, num_components, bit_width, src); 20618 return; 20619 case nir_op_fquantize2f16: 20620 evaluate_fquantize2f16(dest, num_components, bit_width, src); 20621 return; 20622 case nir_op_frcp: 20623 evaluate_frcp(dest, num_components, bit_width, src); 20624 return; 20625 case nir_op_frem: 20626 evaluate_frem(dest, num_components, bit_width, src); 20627 return; 20628 case nir_op_frexp_exp: 20629 evaluate_frexp_exp(dest, num_components, bit_width, src); 20630 return; 20631 case nir_op_frexp_sig: 20632 evaluate_frexp_sig(dest, num_components, bit_width, src); 20633 return; 20634 case nir_op_fround_even: 20635 evaluate_fround_even(dest, num_components, bit_width, src); 20636 return; 20637 case nir_op_frsq: 20638 evaluate_frsq(dest, num_components, bit_width, src); 20639 return; 20640 case nir_op_fsat: 20641 evaluate_fsat(dest, num_components, bit_width, src); 20642 return; 20643 case nir_op_fsign: 20644 evaluate_fsign(dest, num_components, bit_width, src); 20645 return; 20646 case nir_op_fsin: 20647 evaluate_fsin(dest, num_components, bit_width, src); 20648 return; 20649 case nir_op_fsqrt: 20650 evaluate_fsqrt(dest, num_components, bit_width, src); 20651 return; 20652 case nir_op_fsub: 20653 evaluate_fsub(dest, num_components, bit_width, src); 20654 return; 20655 case nir_op_ftrunc: 20656 evaluate_ftrunc(dest, num_components, bit_width, src); 20657 return; 20658 case nir_op_fxor: 20659 evaluate_fxor(dest, num_components, bit_width, src); 20660 return; 20661 case nir_op_i2b1: 20662 evaluate_i2b1(dest, num_components, bit_width, src); 20663 return; 20664 case nir_op_i2b32: 20665 evaluate_i2b32(dest, num_components, bit_width, src); 20666 return; 20667 case nir_op_i2f16: 20668 evaluate_i2f16(dest, num_components, bit_width, src); 20669 return; 20670 case nir_op_i2f32: 20671 evaluate_i2f32(dest, num_components, bit_width, src); 20672 return; 20673 case nir_op_i2f64: 20674 evaluate_i2f64(dest, num_components, bit_width, src); 20675 return; 20676 case nir_op_i2i1: 20677 evaluate_i2i1(dest, num_components, bit_width, src); 20678 return; 20679 case nir_op_i2i16: 20680 evaluate_i2i16(dest, num_components, bit_width, src); 20681 return; 20682 case nir_op_i2i32: 20683 evaluate_i2i32(dest, num_components, bit_width, src); 20684 return; 20685 case nir_op_i2i64: 20686 evaluate_i2i64(dest, num_components, bit_width, src); 20687 return; 20688 case nir_op_i2i8: 20689 evaluate_i2i8(dest, num_components, bit_width, src); 20690 return; 20691 case nir_op_iabs: 20692 evaluate_iabs(dest, num_components, bit_width, src); 20693 return; 20694 case nir_op_iadd: 20695 evaluate_iadd(dest, num_components, bit_width, src); 20696 return; 20697 case nir_op_iadd_sat: 20698 evaluate_iadd_sat(dest, num_components, bit_width, src); 20699 return; 20700 case nir_op_iand: 20701 evaluate_iand(dest, num_components, bit_width, src); 20702 return; 20703 case nir_op_ibfe: 20704 evaluate_ibfe(dest, num_components, bit_width, src); 20705 return; 20706 case nir_op_ibitfield_extract: 20707 evaluate_ibitfield_extract(dest, num_components, bit_width, src); 20708 return; 20709 case nir_op_idiv: 20710 evaluate_idiv(dest, num_components, bit_width, src); 20711 return; 20712 case nir_op_ieq: 20713 evaluate_ieq(dest, num_components, bit_width, src); 20714 return; 20715 case nir_op_ieq32: 20716 evaluate_ieq32(dest, num_components, bit_width, src); 20717 return; 20718 case nir_op_ifind_msb: 20719 evaluate_ifind_msb(dest, num_components, bit_width, src); 20720 return; 20721 case nir_op_ige: 20722 evaluate_ige(dest, num_components, bit_width, src); 20723 return; 20724 case nir_op_ige32: 20725 evaluate_ige32(dest, num_components, bit_width, src); 20726 return; 20727 case nir_op_ihadd: 20728 evaluate_ihadd(dest, num_components, bit_width, src); 20729 return; 20730 case nir_op_ilt: 20731 evaluate_ilt(dest, num_components, bit_width, src); 20732 return; 20733 case nir_op_ilt32: 20734 evaluate_ilt32(dest, num_components, bit_width, src); 20735 return; 20736 case nir_op_imax: 20737 evaluate_imax(dest, num_components, bit_width, src); 20738 return; 20739 case nir_op_imax3: 20740 evaluate_imax3(dest, num_components, bit_width, src); 20741 return; 20742 case nir_op_imed3: 20743 evaluate_imed3(dest, num_components, bit_width, src); 20744 return; 20745 case nir_op_imin: 20746 evaluate_imin(dest, num_components, bit_width, src); 20747 return; 20748 case nir_op_imin3: 20749 evaluate_imin3(dest, num_components, bit_width, src); 20750 return; 20751 case nir_op_imod: 20752 evaluate_imod(dest, num_components, bit_width, src); 20753 return; 20754 case nir_op_imov: 20755 evaluate_imov(dest, num_components, bit_width, src); 20756 return; 20757 case nir_op_imul: 20758 evaluate_imul(dest, num_components, bit_width, src); 20759 return; 20760 case nir_op_imul_2x32_64: 20761 evaluate_imul_2x32_64(dest, num_components, bit_width, src); 20762 return; 20763 case nir_op_imul_high: 20764 evaluate_imul_high(dest, num_components, bit_width, src); 20765 return; 20766 case nir_op_ine: 20767 evaluate_ine(dest, num_components, bit_width, src); 20768 return; 20769 case nir_op_ine32: 20770 evaluate_ine32(dest, num_components, bit_width, src); 20771 return; 20772 case nir_op_ineg: 20773 evaluate_ineg(dest, num_components, bit_width, src); 20774 return; 20775 case nir_op_inot: 20776 evaluate_inot(dest, num_components, bit_width, src); 20777 return; 20778 case nir_op_ior: 20779 evaluate_ior(dest, num_components, bit_width, src); 20780 return; 20781 case nir_op_irem: 20782 evaluate_irem(dest, num_components, bit_width, src); 20783 return; 20784 case nir_op_irhadd: 20785 evaluate_irhadd(dest, num_components, bit_width, src); 20786 return; 20787 case nir_op_ishl: 20788 evaluate_ishl(dest, num_components, bit_width, src); 20789 return; 20790 case nir_op_ishr: 20791 evaluate_ishr(dest, num_components, bit_width, src); 20792 return; 20793 case nir_op_isign: 20794 evaluate_isign(dest, num_components, bit_width, src); 20795 return; 20796 case nir_op_isub: 20797 evaluate_isub(dest, num_components, bit_width, src); 20798 return; 20799 case nir_op_isub_sat: 20800 evaluate_isub_sat(dest, num_components, bit_width, src); 20801 return; 20802 case nir_op_ixor: 20803 evaluate_ixor(dest, num_components, bit_width, src); 20804 return; 20805 case nir_op_ldexp: 20806 evaluate_ldexp(dest, num_components, bit_width, src); 20807 return; 20808 case nir_op_pack_32_2x16: 20809 evaluate_pack_32_2x16(dest, num_components, bit_width, src); 20810 return; 20811 case nir_op_pack_32_2x16_split: 20812 evaluate_pack_32_2x16_split(dest, num_components, bit_width, src); 20813 return; 20814 case nir_op_pack_64_2x32: 20815 evaluate_pack_64_2x32(dest, num_components, bit_width, src); 20816 return; 20817 case nir_op_pack_64_2x32_split: 20818 evaluate_pack_64_2x32_split(dest, num_components, bit_width, src); 20819 return; 20820 case nir_op_pack_64_4x16: 20821 evaluate_pack_64_4x16(dest, num_components, bit_width, src); 20822 return; 20823 case nir_op_pack_half_2x16: 20824 evaluate_pack_half_2x16(dest, num_components, bit_width, src); 20825 return; 20826 case nir_op_pack_half_2x16_split: 20827 evaluate_pack_half_2x16_split(dest, num_components, bit_width, src); 20828 return; 20829 case nir_op_pack_snorm_2x16: 20830 evaluate_pack_snorm_2x16(dest, num_components, bit_width, src); 20831 return; 20832 case nir_op_pack_snorm_4x8: 20833 evaluate_pack_snorm_4x8(dest, num_components, bit_width, src); 20834 return; 20835 case nir_op_pack_unorm_2x16: 20836 evaluate_pack_unorm_2x16(dest, num_components, bit_width, src); 20837 return; 20838 case nir_op_pack_unorm_4x8: 20839 evaluate_pack_unorm_4x8(dest, num_components, bit_width, src); 20840 return; 20841 case nir_op_pack_uvec2_to_uint: 20842 evaluate_pack_uvec2_to_uint(dest, num_components, bit_width, src); 20843 return; 20844 case nir_op_pack_uvec4_to_uint: 20845 evaluate_pack_uvec4_to_uint(dest, num_components, bit_width, src); 20846 return; 20847 case nir_op_seq: 20848 evaluate_seq(dest, num_components, bit_width, src); 20849 return; 20850 case nir_op_sge: 20851 evaluate_sge(dest, num_components, bit_width, src); 20852 return; 20853 case nir_op_slt: 20854 evaluate_slt(dest, num_components, bit_width, src); 20855 return; 20856 case nir_op_sne: 20857 evaluate_sne(dest, num_components, bit_width, src); 20858 return; 20859 case nir_op_u2f16: 20860 evaluate_u2f16(dest, num_components, bit_width, src); 20861 return; 20862 case nir_op_u2f32: 20863 evaluate_u2f32(dest, num_components, bit_width, src); 20864 return; 20865 case nir_op_u2f64: 20866 evaluate_u2f64(dest, num_components, bit_width, src); 20867 return; 20868 case nir_op_u2u1: 20869 evaluate_u2u1(dest, num_components, bit_width, src); 20870 return; 20871 case nir_op_u2u16: 20872 evaluate_u2u16(dest, num_components, bit_width, src); 20873 return; 20874 case nir_op_u2u32: 20875 evaluate_u2u32(dest, num_components, bit_width, src); 20876 return; 20877 case nir_op_u2u64: 20878 evaluate_u2u64(dest, num_components, bit_width, src); 20879 return; 20880 case nir_op_u2u8: 20881 evaluate_u2u8(dest, num_components, bit_width, src); 20882 return; 20883 case nir_op_uadd_carry: 20884 evaluate_uadd_carry(dest, num_components, bit_width, src); 20885 return; 20886 case nir_op_uadd_sat: 20887 evaluate_uadd_sat(dest, num_components, bit_width, src); 20888 return; 20889 case nir_op_ubfe: 20890 evaluate_ubfe(dest, num_components, bit_width, src); 20891 return; 20892 case nir_op_ubitfield_extract: 20893 evaluate_ubitfield_extract(dest, num_components, bit_width, src); 20894 return; 20895 case nir_op_udiv: 20896 evaluate_udiv(dest, num_components, bit_width, src); 20897 return; 20898 case nir_op_ufind_msb: 20899 evaluate_ufind_msb(dest, num_components, bit_width, src); 20900 return; 20901 case nir_op_uge: 20902 evaluate_uge(dest, num_components, bit_width, src); 20903 return; 20904 case nir_op_uge32: 20905 evaluate_uge32(dest, num_components, bit_width, src); 20906 return; 20907 case nir_op_uhadd: 20908 evaluate_uhadd(dest, num_components, bit_width, src); 20909 return; 20910 case nir_op_ult: 20911 evaluate_ult(dest, num_components, bit_width, src); 20912 return; 20913 case nir_op_ult32: 20914 evaluate_ult32(dest, num_components, bit_width, src); 20915 return; 20916 case nir_op_umax: 20917 evaluate_umax(dest, num_components, bit_width, src); 20918 return; 20919 case nir_op_umax3: 20920 evaluate_umax3(dest, num_components, bit_width, src); 20921 return; 20922 case nir_op_umax_4x8: 20923 evaluate_umax_4x8(dest, num_components, bit_width, src); 20924 return; 20925 case nir_op_umed3: 20926 evaluate_umed3(dest, num_components, bit_width, src); 20927 return; 20928 case nir_op_umin: 20929 evaluate_umin(dest, num_components, bit_width, src); 20930 return; 20931 case nir_op_umin3: 20932 evaluate_umin3(dest, num_components, bit_width, src); 20933 return; 20934 case nir_op_umin_4x8: 20935 evaluate_umin_4x8(dest, num_components, bit_width, src); 20936 return; 20937 case nir_op_umod: 20938 evaluate_umod(dest, num_components, bit_width, src); 20939 return; 20940 case nir_op_umul_2x32_64: 20941 evaluate_umul_2x32_64(dest, num_components, bit_width, src); 20942 return; 20943 case nir_op_umul_high: 20944 evaluate_umul_high(dest, num_components, bit_width, src); 20945 return; 20946 case nir_op_umul_unorm_4x8: 20947 evaluate_umul_unorm_4x8(dest, num_components, bit_width, src); 20948 return; 20949 case nir_op_unpack_32_2x16: 20950 evaluate_unpack_32_2x16(dest, num_components, bit_width, src); 20951 return; 20952 case nir_op_unpack_32_2x16_split_x: 20953 evaluate_unpack_32_2x16_split_x(dest, num_components, bit_width, src); 20954 return; 20955 case nir_op_unpack_32_2x16_split_y: 20956 evaluate_unpack_32_2x16_split_y(dest, num_components, bit_width, src); 20957 return; 20958 case nir_op_unpack_64_2x32: 20959 evaluate_unpack_64_2x32(dest, num_components, bit_width, src); 20960 return; 20961 case nir_op_unpack_64_2x32_split_x: 20962 evaluate_unpack_64_2x32_split_x(dest, num_components, bit_width, src); 20963 return; 20964 case nir_op_unpack_64_2x32_split_y: 20965 evaluate_unpack_64_2x32_split_y(dest, num_components, bit_width, src); 20966 return; 20967 case nir_op_unpack_64_4x16: 20968 evaluate_unpack_64_4x16(dest, num_components, bit_width, src); 20969 return; 20970 case nir_op_unpack_half_2x16: 20971 evaluate_unpack_half_2x16(dest, num_components, bit_width, src); 20972 return; 20973 case nir_op_unpack_half_2x16_split_x: 20974 evaluate_unpack_half_2x16_split_x(dest, num_components, bit_width, src); 20975 return; 20976 case nir_op_unpack_half_2x16_split_y: 20977 evaluate_unpack_half_2x16_split_y(dest, num_components, bit_width, src); 20978 return; 20979 case nir_op_unpack_snorm_2x16: 20980 evaluate_unpack_snorm_2x16(dest, num_components, bit_width, src); 20981 return; 20982 case nir_op_unpack_snorm_4x8: 20983 evaluate_unpack_snorm_4x8(dest, num_components, bit_width, src); 20984 return; 20985 case nir_op_unpack_unorm_2x16: 20986 evaluate_unpack_unorm_2x16(dest, num_components, bit_width, src); 20987 return; 20988 case nir_op_unpack_unorm_4x8: 20989 evaluate_unpack_unorm_4x8(dest, num_components, bit_width, src); 20990 return; 20991 case nir_op_urhadd: 20992 evaluate_urhadd(dest, num_components, bit_width, src); 20993 return; 20994 case nir_op_usadd_4x8: 20995 evaluate_usadd_4x8(dest, num_components, bit_width, src); 20996 return; 20997 case nir_op_ushr: 20998 evaluate_ushr(dest, num_components, bit_width, src); 20999 return; 21000 case nir_op_ussub_4x8: 21001 evaluate_ussub_4x8(dest, num_components, bit_width, src); 21002 return; 21003 case nir_op_usub_borrow: 21004 evaluate_usub_borrow(dest, num_components, bit_width, src); 21005 return; 21006 case nir_op_usub_sat: 21007 evaluate_usub_sat(dest, num_components, bit_width, src); 21008 return; 21009 case nir_op_vec2: 21010 evaluate_vec2(dest, num_components, bit_width, src); 21011 return; 21012 case nir_op_vec3: 21013 evaluate_vec3(dest, num_components, bit_width, src); 21014 return; 21015 case nir_op_vec4: 21016 evaluate_vec4(dest, num_components, bit_width, src); 21017 return; 21018 default: 21019 unreachable("shouldn't get here"); 21020 } 21021} 21022