1/*- 2 * Copyright (c) 2025 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Nia Alarie. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* 31 * This should be compatible with what was shipped with SunPro. 32 * 33 * VIS Instruction Set User's Manual 34 * Sun Microsystems 35 * Part Number: 805-1394-03 36 * May 2001 37 * 38 * Version of available VIS instructions can be detected through 39 * the `machdep.vis` sysctl. A value of "0" means that such 40 * instructions are unavailable. All SPARCv9 hardware should support 41 * at least VIS 1, while VIS 2 requires UltraSPARC-III or newer. 42 * 43 * GCC needs -mvis for VIS, and -mvis2 for VIS 2. However, its 44 * builtins are incomplete and some cause problematic typing issues 45 * with Sun's API, so they're mostly avoided. 46 */ 47 48#ifndef _VIS_PROTO_H 49#define _VIS_PROTO_H 50 51#ifdef __cplusplus 52extern "C" { 53#endif 54 55#include "vis_types.h" 56 57#define _VISATTR \ 58 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 59 60/* 4.6.1 Arithmetic - addition and subtraction */ 61 62_VISATTR 63static __inline vis_d64 64vis_fpadd16(vis_d64 r1, vis_d64 r2) 65{ 66 vis_d64 out; 67 68 __asm("fpadd16 %1,%2,%0" 69 : "=f"(out) 70 : "f"(r1), "f"(r2)); 71 return out; 72} 73 74_VISATTR 75static __inline vis_d64 76vis_fpsub16(vis_d64 r1, vis_d64 r2) 77{ 78 vis_d64 out; 79 80 __asm("fpsub16 %1,%2,%0" 81 : "=f"(out) 82 : "f"(r1), "f"(r2)); 83 return out; 84} 85 86_VISATTR 87static __inline vis_d64 88vis_fpadd32(vis_d64 r1, vis_d64 r2) 89{ 90 vis_d64 out; 91 92 __asm("fpadd32 %1,%2,%0" 93 : "=f"(out) 94 : "f"(r1), "f"(r2)); 95 return out; 96} 97 98_VISATTR 99static __inline vis_d64 100vis_fpsub32(vis_d64 r1, vis_d64 r2) 101{ 102 vis_d64 out; 103 104 __asm("fpsub32 %1,%2,%0" 105 : "=f"(out) 106 : "f"(r1), "f"(r2)); 107 return out; 108} 109 110_VISATTR 111static __inline vis_f32 112vis_fpadd16s(vis_f32 r1, vis_f32 r2) 113{ 114 vis_f32 out; 115 116 __asm("fpadd16s %1,%2,%0" 117 : "=f"(out) 118 : "f"(r1), "f"(r2)); 119 return out; 120} 121 122_VISATTR 123static __inline vis_f32 124vis_fpsub16s(vis_f32 r1, vis_f32 r2) 125{ 126 vis_f32 out; 127 128 __asm("fpsub16s %1,%2,%0" 129 : "=f"(out) 130 : "f"(r1), "f"(r2)); 131 return out; 132} 133 134_VISATTR 135static __inline vis_f32 136vis_fpadd32s(vis_f32 r1, vis_f32 r2) 137{ 138 vis_f32 out; 139 140 __asm("fpadd32s %1,%2,%0" 141 : "=f"(out) 142 : "f"(r1), "f"(r2)); 143 return out; 144} 145 146_VISATTR 147static __inline vis_f32 148vis_fpsub32s(vis_f32 r1, vis_f32 r2) 149{ 150 vis_f32 out; 151 152 __asm("fpsub32s %1,%2,%0" 153 : "=f"(out) 154 : "f"(r1), "f"(r2)); 155 return out; 156} 157 158/* 4.7 Pixel formatting - packing */ 159 160_VISATTR 161static __inline vis_f32 162vis_fpack16(vis_d64 r1) 163{ 164 vis_f32 out; 165 166 __asm("fpack16 %1,%0" 167 : "=f"(out) 168 : "f"(r1)); 169 return out; 170} 171 172_VISATTR 173static __inline vis_d64 174vis_fpack32(vis_d64 r1, vis_d64 r2) 175{ 176 vis_d64 out; 177 178 __asm("fpack32 %1,%2,%0" 179 : "=f"(out) 180 : "f"(r1), "f"(r2)); 181 return out; 182} 183 184_VISATTR 185static __inline vis_f32 186vis_fpackfix(vis_d64 r1) 187{ 188 vis_f32 out; 189 190 __asm("fpackfix %1,%0" 191 : "=f"(out) 192 : "f"(r1)); 193 return out; 194} 195 196_VISATTR 197static __inline vis_d64 198vis_fexpand(vis_f32 r1) 199{ 200 vis_d64 out; 201 202 __asm("fexpand %1,%0" 203 : "=f"(out) 204 : "f"(r1)); 205 return out; 206} 207 208_VISATTR 209static __inline vis_d64 210vis_fpmerge(vis_f32 r1, vis_f32 r2) 211{ 212 vis_d64 out; 213 214 __asm("fpmerge %1,%2,%0" 215 : "=f"(out) 216 : "f"(r1), "f"(r2)); 217 return out; 218} 219 220/* 4.7.6 Aligned address calculation */ 221 222_VISATTR 223static __inline void * 224vis_alignaddr(void *addr, int offset) 225{ 226#if defined(__VIS__) && defined(__GNUC__) 227 return __builtin_vis_alignaddr(addr, offset); 228#else 229 void *out; 230 231 __asm("alginaddr %1,%2,%0" 232 : "=r"(out) 233 : "r"(addr), "r"(offset)); 234 return out; 235#endif 236} 237 238_VISATTR 239static __inline vis_d64 240vis_faligndata(vis_d64 hi, vis_d64 lo) 241{ 242 vis_d64 out; 243 244 __asm("faligndata %1,%2,%0" 245 : "=f"(out) 246 : "f"(hi), "f"(lo)); 247 return out; 248} 249 250/* 4.7.7 Edge handling */ 251 252_VISATTR 253static __inline vis_s32 254vis_edge8(void *a1, void *a2) 255{ 256#if defined(__VIS__) && defined(__GNUC__) 257 return __builtin_vis_edge8(a1, a2); 258#else 259 vis_s32 out; 260 261 __asm("edge8 %1,%2,%0" 262 : "=r"(out) 263 : "r"(a1), "f"(a2)); 264 return out; 265#endif 266} 267 268_VISATTR 269static __inline vis_s32 270vis_edge16(void *a1, void *a2) 271{ 272#if defined(__VIS__) && defined(__GNUC__) 273 return __builtin_vis_edge16(a1, a2); 274#else 275 vis_s32 out; 276 277 __asm("edge16 %1,%2,%0" 278 : "=r"(out) 279 : "r"(a1), "f"(a2)); 280 return out; 281#endif 282} 283 284_VISATTR 285static __inline vis_s32 286vis_edge32(void *a1, void *a2) 287{ 288#if defined(__VIS__) && defined(__GNUC__) 289 return __builtin_vis_edge32(a1, a2); 290#else 291 vis_s32 out; 292 293 __asm("edge32 %1,%2,%0" 294 : "=r"(out) 295 : "r"(a1), "f"(a2)); 296 return out; 297#endif 298} 299 300_VISATTR 301static __inline vis_s32 302vis_edge8l(void *a1, void *a2) 303{ 304#if defined(__VIS__) && defined(__GNUC__) 305 return __builtin_vis_edge8l(a1, a2); 306#else 307 vis_s32 out; 308 309 __asm("edge8l %1,%2,%0" 310 : "=r"(out) 311 : "r"(a1), "f"(a2)); 312 return out; 313#endif 314} 315 316_VISATTR 317static __inline vis_s32 318vis_edge16l(void *a1, void *a2) 319{ 320#if defined(__VIS__) && defined(__GNUC__) 321 return __builtin_vis_edge16l(a1, a2); 322#else 323 vis_s32 out; 324 325 __asm("edge16l %1,%2,%0" 326 : "=r"(out) 327 : "r"(a1), "f"(a2)); 328 return out; 329#endif 330} 331 332_VISATTR 333static __inline vis_s32 334vis_edge32l(void *a1, void *a2) 335{ 336#if defined(__VIS__) && defined(__GNUC__) 337 return __builtin_vis_edge32l(a1, a2); 338#else 339 vis_s32 out; 340 341 __asm("edge32l %1,%2,%0" 342 : "=r"(out) 343 : "r"(a1), "f"(a2)); 344 return out; 345#endif 346} 347 348/* 4.9 Array coordinate translation */ 349 350_VISATTR 351static __inline vis_addr 352_VISATTR 353vis_array8(vis_u64 d1, vis_s32 d2) 354{ 355#if defined(__VIS__) && defined(__GNUC__) 356 return __builtin_vis_array8(d1, d2); 357#else 358 vis_addr out; 359 360 __asm("array8 %1,%2,%0" 361 : "=r"(out) 362 : "r"(d1), "f"(d2)); 363 return out; 364#endif 365} 366 367_VISATTR 368static __inline vis_addr 369vis_array16(vis_u64 d1, vis_s32 d2) 370{ 371#if defined(__VIS__) && defined(__GNUC__) 372 return __builtin_vis_array16(d1, d2); 373#else 374 vis_addr out; 375 376 __asm("array16 %1,%2,%0" 377 : "=r"(out) 378 : "r"(d1), "f"(d2)); 379 return out; 380#endif 381} 382 383_VISATTR 384static __inline vis_addr 385vis_array32(vis_u64 d1, vis_s32 d2) 386{ 387#if defined(__VIS__) && defined(__GNUC__) 388 return __builtin_vis_array32(d1, d2); 389#else 390 vis_addr out; 391 392 __asm("array32 %1,%2,%0" 393 : "=r"(out) 394 : "r"(d1), "r"(d2)); 395 return out; 396#endif 397} 398 399/* 4.3.1 Graphics Status Register manipulation */ 400 401_VISATTR 402static __inline vis_u64 403vis_read_gsr64(void) 404{ 405#if defined(__VIS__) && defined(__GNUC__) 406 return __builtin_vis_read_gsr(); 407#else 408 vis_u64 out; 409 410 __asm("rd %%gsr,%0" 411 : "=r"(out)); 412 return out; 413#endif 414} 415 416_VISATTR 417static __inline void 418vis_write_gsr64(vis_u64 gsr) 419{ 420#if defined(__VIS__) && defined(__GNUC__) 421 __builtin_vis_write_gsr(gsr); 422#else 423 __asm("mov %0,%%gsr" 424 : 425 : "r"(gsr)); 426#endif 427} 428 429_VISATTR 430static __inline vis_u32 431vis_read_gsr32(void) 432{ 433 return vis_read_gsr64(); 434} 435 436_VISATTR 437static __inline void 438vis_write_gsr32(vis_u32 gsr) 439{ 440 vis_write_gsr64(gsr); 441} 442 443/* 4.3.2 Read and write to upper/lower components */ 444 445_VISATTR 446static __inline vis_f32 447vis_read_hi(vis_d64 var) 448{ 449 vis_u64 reg = *((vis_u64 *)&var); 450 vis_u32 hi = (reg >> 32) & 0xffffffff; 451 vis_f32 out = *((vis_f32 *)&hi); 452 return out; 453} 454 455_VISATTR 456static __inline vis_f32 457vis_read_lo(vis_d64 var) 458{ 459 vis_u64 reg = *((vis_u64 *)&var); 460 vis_u32 lo = reg & 0xffffffff; 461 vis_f32 out = *((vis_f32 *)&lo); 462 return out; 463} 464 465_VISATTR 466static __inline vis_d64 467vis_write_lo(vis_d64 in, vis_f32 lower) 468{ 469 vis_u64 out = *((vis_u64 *)&in); 470 vis_u32 hi = (out >> 32) & 0xffffffff; 471 vis_u32 lo = *((vis_u32 *)&lower); 472 473 out = ((vis_u64)hi << 32ULL) | lo; 474 return *((vis_d64 *)&out); 475} 476 477_VISATTR 478static __inline vis_d64 479vis_write_hi(vis_d64 in, vis_f32 upper) 480{ 481 vis_u64 out = *((vis_u64 *)&in); 482 vis_u32 hi = *((vis_u32 *)&upper); 483 vis_u32 lo = out & 0xffffffff; 484 485 out = ((vis_u64)hi << 32ULL) | lo; 486 return *((vis_d64 *)&out); 487} 488 489/* 4.3.3 Join two variables into a single */ 490 491_VISATTR 492static __inline vis_d64 493vis_freg_pair(vis_f32 f1, vis_f32 f2) 494{ 495 vis_u64 out; 496 vis_u32 r1 = *((vis_u32 *)&f1); 497 vis_u32 r2 = *((vis_u32 *)&f2); 498 499 out = ((vis_u64)r1 << 32ULL) | r2; 500 return *((vis_d64 *)&out); 501} 502 503/* 4.3.4 Place ints into FP register */ 504 505_VISATTR 506static __inline vis_f32 507vis_to_float(vis_u32 data) 508{ 509 return *((vis_f32 *)&data); 510} 511 512_VISATTR 513static __inline vis_d64 514vis_to_double(vis_u32 d1, vis_u32 d2) 515{ 516 vis_u64 out; 517 518 out = ((vis_u64)d1 << 32ULL) | d2; 519 return *((vis_d64 *)&out); 520} 521 522_VISATTR 523static __inline vis_d64 524vis_to_double_dup(vis_u32 data) 525{ 526 return vis_to_double(data, data); 527} 528 529_VISATTR 530static __inline vis_d64 531vis_ll_to_double(vis_u64 data) 532{ 533 return *((vis_d64 *)&data); 534} 535 536/* 4.6.2 Arithmetic - multiplication */ 537 538_VISATTR 539static __inline vis_d64 540vis_fmul8x16(vis_f32 pixels, vis_d64 scale) 541{ 542 vis_d64 out; 543 544 __asm("fmul8x16 %1,%2,%0" 545 : "=f"(out) 546 : "f"(pixels), "f"(scale)); 547 return out; 548} 549 550_VISATTR 551static __inline vis_d64 552vis_fmul8x16au(vis_f32 pixels, vis_f32 scale) 553{ 554 vis_d64 out; 555 556 __asm("fmul8x16au %1,%2,%0" 557 : "=f"(out) 558 : "f"(pixels), "f"(scale)); 559 return out; 560} 561 562_VISATTR 563static __inline vis_d64 564vis_fmul8x16al(vis_f32 pixels, vis_f32 scale) 565{ 566 vis_d64 out; 567 568 __asm("fmul8x16al %1,%2,%0" 569 : "=f"(out) 570 : "f"(pixels), "f"(scale)); 571 return out; 572} 573 574_VISATTR 575static __inline vis_d64 576vis_fmul8sux16(vis_d64 d1, vis_d64 d2) 577{ 578 vis_d64 out; 579 580 __asm("fmul8sux16 %1,%2,%0" 581 : "=f"(out) 582 : "f"(d1), "f"(d2)); 583 return out; 584} 585 586_VISATTR 587static __inline vis_d64 588vis_fmul8ulx16(vis_d64 d1, vis_d64 d2) 589{ 590 vis_d64 out; 591 592 __asm("fmul8ulx16 %1,%2,%0" 593 : "=f"(out) 594 : "f"(d1), "f"(d2)); 595 return out; 596} 597 598_VISATTR 599static __inline vis_d64 600vis_fmuld8sux16(vis_f32 d1, vis_f32 d2) 601{ 602 vis_d64 out; 603 604 __asm("fmuld8sux16 %1,%2,%0" 605 : "=f"(out) 606 : "f"(d1), "f"(d2)); 607 return out; 608} 609 610_VISATTR 611static __inline vis_d64 612vis_fmuld8ulx16(vis_f32 d1, vis_f32 d2) 613{ 614 vis_d64 out; 615 616 __asm("fmuld8ulx16 %1,%2,%0" 617 : "=f"(out) 618 : "f"(d1), "f"(d2)); 619 return out; 620} 621 622/* 4.5 Pixel compare */ 623 624_VISATTR 625static __inline int 626vis_fcmpgt16(vis_d64 d1, vis_d64 d2) 627{ 628 int out; 629 630 __asm("fcmpgt16 %1,%2,%0" 631 : "=r"(out) 632 : "f"(d1), "f"(d2)); 633 return out; 634} 635 636_VISATTR 637static __inline int 638vis_fcmple16(vis_d64 d1, vis_d64 d2) 639{ 640 int out; 641 642 __asm("fcmple16 %1,%2,%0" 643 : "=r"(out) 644 : "f"(d1), "f"(d2)); 645 return out; 646} 647 648_VISATTR 649static __inline int 650vis_fcmpeq16(vis_d64 d1, vis_d64 d2) 651{ 652 int out; 653 654 __asm("fcmpeq16 %1,%2,%0" 655 : "=r"(out) 656 : "f"(d1), "f"(d2)); 657 return out; 658} 659 660_VISATTR 661static __inline int 662vis_fcmpne16(vis_d64 d1, vis_d64 d2) 663{ 664 int out; 665 666 __asm("fcmpne16 %1,%2,%0" 667 : "=r"(out) 668 : "f"(d1), "f"(d2)); 669 return out; 670} 671 672_VISATTR 673static __inline int 674vis_fcmpgt32(vis_d64 d1, vis_d64 d2) 675{ 676 int out; 677 678 __asm("fcmpgt32 %1,%2,%0" 679 : "=r"(out) 680 : "f"(d1), "f"(d2)); 681 return out; 682} 683 684_VISATTR 685static __inline int 686vis_fcmple32(vis_d64 d1, vis_d64 d2) 687{ 688 int out; 689 690 __asm("fcmple32 %1,%2,%0" 691 : "=r"(out) 692 : "f"(d1), "f"(d2)); 693 return out; 694} 695 696_VISATTR 697static __inline int 698vis_fcmpeq32(vis_d64 d1, vis_d64 d2) 699{ 700 int out; 701 702 __asm("fcmpeq32 %1,%2,%0" 703 : "=r"(out) 704 : "f"(d1), "f"(d2)); 705 return out; 706} 707 708_VISATTR 709static __inline int 710vis_fcmpne32(vis_d64 d1, vis_d64 d2) 711{ 712 int out; 713 714 __asm("fcmpne32 %1,%2,%0" 715 : "=r"(out) 716 : "f"(d1), "f"(d2)); 717 return out; 718} 719 720_VISATTR 721static __inline int 722vis_fcmplt16(vis_d64 d1, vis_d64 d2) 723{ 724 return vis_fcmpgt16(d2, d1); 725} 726 727_VISATTR 728static __inline int 729vis_fcmpge16(vis_d64 d1, vis_d64 d2) 730{ 731 return vis_fcmple16(d2, d1); 732} 733 734_VISATTR 735static __inline int 736vis_fcmplt32(vis_d64 d1, vis_d64 d2) 737{ 738 return vis_fcmpgt32(d2, d1); 739} 740 741_VISATTR 742static __inline int 743vis_fcmpge32(vis_d64 d1, vis_d64 d2) 744{ 745 return vis_fcmple32(d2, d1); 746} 747 748/* 4.10 Pixel distance */ 749 750_VISATTR 751static __inline vis_d64 752vis_pdist(vis_d64 pixels1, vis_d64 pixels2, vis_d64 acc) 753{ 754 __asm("pdist %1,%2,%0" 755 : "+f"(acc) 756 : "f"(pixels1), "f"(pixels2)); 757 758 return acc; 759} 760 761/* 4.4.1 Logical instructions - fill variables */ 762 763_VISATTR 764static __inline vis_d64 765vis_fzero(void) 766{ 767 vis_d64 out; 768 769 __asm("fzero %0" 770 : "=f"(out)); 771 return out; 772} 773 774_VISATTR 775static __inline vis_d64 776vis_fone(void) 777{ 778 vis_d64 out; 779 780 __asm("fone %0" 781 : "=f"(out)); 782 return out; 783} 784 785_VISATTR 786static __inline vis_f32 787vis_fzeros(void) 788{ 789 vis_f32 out; 790 791 __asm("fzeros %0" 792 : "=f"(out)); 793 return out; 794} 795 796_VISATTR 797static __inline vis_f32 798vis_fones(void) 799{ 800 vis_f32 out; 801 802 __asm("fones %0" 803 : "=f"(out)); 804 return out; 805} 806 807/* 4.4.2 Logical instructions - copies and complements */ 808 809_VISATTR 810static __inline vis_d64 811vis_fsrc(vis_d64 r1) 812{ 813 vis_d64 out; 814 815 __asm("fsrc1 %1,%0" 816 : "=f"(out) 817 : "f"(r1)); 818 return out; 819} 820 821_VISATTR 822static __inline vis_d64 823vis_fnot(vis_d64 r1) 824{ 825 vis_d64 out; 826 827 __asm("fnot1 %1,%0" 828 : "=f"(out) 829 : "f"(r1)); 830 return out; 831} 832 833_VISATTR 834static __inline vis_f32 835vis_fsrcs(vis_f32 r1) 836{ 837 vis_f32 out; 838 839 __asm("fsrc1s %1,%0" 840 : "=f"(out) 841 : "f"(r1)); 842 return out; 843} 844 845_VISATTR 846static __inline vis_f32 847vis_fnots(vis_f32 r1) 848{ 849 vis_f32 out; 850 851 __asm("fnot1s %1,%0" 852 : "=f"(out) 853 : "f"(r1)); 854 return out; 855} 856 857/* 4.3 Logical instructions - bitwise */ 858 859_VISATTR 860static __inline vis_d64 861vis_for(vis_d64 r1, vis_d64 r2) 862{ 863 vis_d64 out; 864 __asm("for %1,%2,%0" 865 : "=f"(out) 866 : "f"(r1), "f"(r2)); 867 return out; 868} 869 870_VISATTR 871static __inline vis_d64 872vis_fand(vis_d64 r1, vis_d64 r2) 873{ 874 vis_d64 out; 875 __asm("fand %1,%2,%0" 876 : "=f"(out) 877 : "f"(r1), "f"(r2)); 878 return out; 879} 880 881_VISATTR 882static __inline vis_d64 883vis_fxor(vis_d64 r1, vis_d64 r2) 884{ 885 vis_d64 out; 886 __asm("fxor %1,%2,%0" 887 : "=f"(out) 888 : "f"(r1), "f"(r2)); 889 return out; 890} 891 892_VISATTR 893static __inline vis_d64 894vis_fnor(vis_d64 r1, vis_d64 r2) 895{ 896 vis_d64 out; 897 __asm("fnor %1,%2,%0" 898 : "=f"(out) 899 : "f"(r1), "f"(r2)); 900 return out; 901} 902 903_VISATTR 904static __inline vis_d64 905vis_fnand(vis_d64 r1, vis_d64 r2) 906{ 907 vis_d64 out; 908 __asm("fnand %1,%2,%0" 909 : "=f"(out) 910 : "f"(r1), "f"(r2)); 911 return out; 912} 913 914_VISATTR 915static __inline vis_d64 916vis_fxnor(vis_d64 r1, vis_d64 r2) 917{ 918 vis_d64 out; 919 __asm("fxnor %1,%2,%0" 920 : "=f"(out) 921 : "f"(r1), "f"(r2)); 922 return out; 923} 924 925_VISATTR 926static __inline vis_d64 927vis_fornot(vis_d64 r1, vis_d64 r2) 928{ 929 vis_d64 out; 930 __asm("fornot1 %1,%2,%0" 931 : "=f"(out) 932 : "f"(r1), "f"(r2)); 933 return out; 934} 935 936_VISATTR 937static __inline vis_d64 938vis_fandnot(vis_d64 r1, vis_d64 r2) 939{ 940 vis_d64 out; 941 __asm("fandnot1 %1,%2,%0" 942 : "=f"(out) 943 : "f"(r1), "f"(r2)); 944 return out; 945} 946 947_VISATTR 948static __inline vis_f32 949vis_fors(vis_f32 r1, vis_f32 r2) 950{ 951 vis_f32 out; 952 __asm("fors %1,%2,%0" 953 : "=f"(out) 954 : "f"(r1), "f"(r2)); 955 return out; 956} 957 958_VISATTR 959static __inline vis_f32 960vis_fands(vis_f32 r1, vis_f32 r2) 961{ 962 vis_f32 out; 963 __asm("fands %1,%2,%0" 964 : "=f"(out) 965 : "f"(r1), "f"(r2)); 966 return out; 967} 968 969_VISATTR 970static __inline vis_f32 971vis_fxors(vis_f32 r1, vis_f32 r2) 972{ 973 vis_f32 out; 974 __asm("fxors %1,%2,%0" 975 : "=f"(out) 976 : "f"(r1), "f"(r2)); 977 return out; 978} 979 980_VISATTR 981static __inline vis_f32 982vis_fnors(vis_f32 r1, vis_f32 r2) 983{ 984 vis_f32 out; 985 __asm("fnors %1,%2,%0" 986 : "=f"(out) 987 : "f"(r1), "f"(r2)); 988 return out; 989} 990 991_VISATTR 992static __inline vis_f32 993vis_fnands(vis_f32 r1, vis_f32 r2) 994{ 995 vis_f32 out; 996 __asm("fnands %1,%2,%0" 997 : "=f"(out) 998 : "f"(r1), "f"(r2)); 999 return out; 1000} 1001 1002_VISATTR 1003static __inline vis_f32 1004vis_fxnors(vis_f32 r1, vis_f32 r2) 1005{ 1006 vis_f32 out; 1007 __asm("fxnors %1,%2,%0" 1008 : "=f"(out) 1009 : "f"(r1), "f"(r2)); 1010 return out; 1011} 1012 1013_VISATTR 1014static __inline vis_f32 1015vis_fornots(vis_f32 r1, vis_f32 r2) 1016{ 1017 vis_f32 out; 1018 __asm("fornot1s %1,%2,%0" 1019 : "=f"(out) 1020 : "f"(r1), "f"(r2)); 1021 return out; 1022} 1023 1024_VISATTR 1025static __inline vis_f32 1026vis_fandnots(vis_f32 r1, vis_f32 r2) 1027{ 1028 vis_f32 out; 1029 __asm("fandnot1s %1,%2,%0" 1030 : "=f"(out) 1031 : "f"(r1), "f"(r2)); 1032 return out; 1033} 1034 1035/* 4.8.1 Partial Stores */ 1036 1037_VISATTR 1038static __inline void 1039vis_pst_8(vis_d64 data, void *addr, vis_u8 mask) 1040{ 1041 __asm("stda %1,[%0]%2,0xc0" 1042 : "=r"(addr) 1043 : "f"(data), "r"(mask)); 1044} 1045 1046_VISATTR 1047static __inline void 1048vis_pst_16(vis_d64 data, void *addr, vis_u8 mask) 1049{ 1050 __asm("stda %1,[%0]%2,0xc2" 1051 : "=r"(addr) 1052 : "f"(data), "r"(mask)); 1053} 1054 1055_VISATTR 1056static __inline void 1057vis_pst_32(vis_d64 data, void *addr, vis_u8 mask) 1058{ 1059 __asm("stda %1,[%0]%2,0xc4" 1060 : "=r"(addr) 1061 : "f"(data), "r"(mask)); 1062} 1063 1064/* 4.8.2 Byte/Short Loads and Stores */ 1065 1066_VISATTR 1067static __inline void 1068vis_st_u8(vis_u64 data, void *addr) 1069{ 1070 __asm("stda %1,[%0]0xd0" 1071 : "=r"(addr) 1072 : "f"(data)); 1073} 1074 1075_VISATTR 1076static __inline void 1077vis_st_u8_le(vis_d64 data, void *addr) 1078{ 1079 __asm("stda %1,[%0]0xd8" 1080 : "=r"(addr) 1081 : "f"(data)); 1082} 1083 1084_VISATTR 1085static __inline void 1086vis_st_u16(vis_d64 data, void *addr) 1087{ 1088 __asm("stda %1,[%0]0xd2" 1089 : "=r"(addr) 1090 : "f"(data)); 1091} 1092 1093_VISATTR 1094static __inline void 1095vis_st_u16_le(vis_d64 data, void *addr) 1096{ 1097 __asm("stda %1,[%0]0xda" 1098 : "=r"(addr) 1099 : "f"(data)); 1100} 1101 1102_VISATTR 1103static __inline void 1104vis_st_u8_i(vis_d64 data, void *addr, long idx) 1105{ 1106 vis_u8 *ptr = addr; 1107 vis_st_u8(data, ptr + idx); 1108} 1109 1110_VISATTR 1111static __inline void 1112vis_st_u16_i(vis_d64 data, void *addr, long idx) 1113{ 1114 vis_u8 *ptr = addr; 1115 vis_st_u16(data, ptr + idx); 1116} 1117 1118_VISATTR 1119static __inline vis_d64 1120vis_ld_u8(void *addr) 1121{ 1122 vis_u8 val; 1123 vis_d64 out; 1124 1125 val = *((vis_u8 *)addr); 1126 *((vis_u8 *)&out) = val; 1127 1128 return out; 1129} 1130 1131_VISATTR 1132static __inline vis_d64 1133vis_ld_u16(void *addr) 1134{ 1135 vis_u16 val; 1136 vis_d64 out; 1137 1138 val = *((vis_u16 *)addr); 1139 *((vis_u16 *)&out) = val; 1140 1141 return out; 1142} 1143 1144_VISATTR 1145static __inline vis_d64 1146vis_ld_u8_i(void *addr, long idx) 1147{ 1148 vis_u8 *ptr = addr; 1149 return vis_ld_u8(ptr + idx); 1150} 1151 1152_VISATTR 1153static __inline vis_d64 1154vis_ld_u16_i(void *addr, long idx) 1155{ 1156 vis_u8 *ptr = addr; 1157 return vis_ld_u16(ptr + idx); 1158} 1159 1160/* 1161 * VIS 2.0 instructions 1162 */ 1163 1164_VISATTR 1165static __inline vis_u32 1166vis_read_bmask(void) 1167{ 1168 vis_u32 out; 1169 1170 __asm("rd %%gsr,%0" 1171 "srlx %0,32,%0" 1172 : "+f"(out)); 1173 return out; 1174} 1175 1176_VISATTR 1177static __inline void 1178vis_write_bmask(vis_u32 mask1, vis_u32 mask2) 1179{ 1180#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1181 (void)__builtin_vis_bmask(mask1, mask2); 1182#else 1183 vis_u32 out; 1184 1185 __asm("bmask %1,%2,%0" 1186 : "=r"(out) 1187 : "r"(mask1), "r"(mask2)); 1188 1189 (void)out; 1190#endif 1191} 1192 1193_VISATTR 1194static __inline vis_d64 1195vis_bshuffle(vis_d64 pixels1, vis_d64 pixels2) 1196{ 1197 vis_d64 out; 1198 1199 __asm("bshuffle %1,%2,%0" 1200 : "=f"(out) 1201 : "f"(pixels1), "f"(pixels2)); 1202 return out; 1203} 1204 1205_VISATTR 1206static __inline vis_s32 1207vis_edge8n(void *a1, void *a2) 1208{ 1209#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1210 return __builtin_vis_edge8n(a1, a2); 1211#else 1212 vis_s32 out; 1213 1214 __asm("edge8n %1,%2,%0" 1215 : "=r"(out) 1216 : "r"(a1), "r"(a2)); 1217 return out; 1218#endif 1219} 1220 1221_VISATTR 1222static __inline vis_s32 1223vis_edge16n(void *a1, void *a2) 1224{ 1225#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1226 return __builtin_vis_edge16n(a1, a2); 1227#else 1228 vis_s32 out; 1229 1230 __asm("edge16n %1,%2,%0" 1231 : "=r"(out) 1232 : "r"(a1), "r"(a2)); 1233 return out; 1234#endif 1235} 1236 1237_VISATTR 1238static __inline vis_s32 1239vis_edge32n(void *a1, void *a2) 1240{ 1241#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1242 return __builtin_vis_edge32n(a1, a2); 1243#else 1244 vis_s32 out; 1245 1246 __asm("edge32n %1,%2,%0" 1247 : "=r"(out) 1248 : "r"(a1), "r"(a2)); 1249 return out; 1250#endif 1251} 1252 1253_VISATTR 1254static __inline vis_s32 1255vis_edge8ln(void *a1, void *a2) 1256{ 1257#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1258 return __builtin_vis_edge8ln(a1, a2); 1259#else 1260 vis_s32 out; 1261 1262 __asm("edge8ln %1,%2,%0" 1263 : "=r"(out) 1264 : "r"(a1), "r"(a2)); 1265 return out; 1266#endif 1267} 1268 1269_VISATTR 1270static __inline vis_s32 1271vis_edge16ln(void *a1, void *a2) 1272{ 1273#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1274 return __builtin_vis_edge16ln(a1, a2); 1275#else 1276 vis_s32 out; 1277 1278 __asm("edge16ln %1,%2,%0" 1279 : "=r"(out) 1280 : "r"(a1), "r"(a2)); 1281 return out; 1282#endif 1283} 1284 1285_VISATTR 1286static __inline vis_s32 1287vis_edge32ln(void *a1, void *a2) 1288{ 1289#if defined(__VIS__) && __VIS__ >= 0x200 && defined(__GNUC__) 1290 return __builtin_vis_edge32ln(a1, a2); 1291#else 1292 vis_s32 out; 1293 1294 __asm("edge32ln %1,%2,%0" 1295 : "=r"(out) 1296 : "r"(a1), "r"(a2)); 1297 return out; 1298#endif 1299} 1300 1301#ifdef __cplusplus 1302} 1303#endif 1304 1305#endif 1306