Home | History | Annotate | Line # | Download | only in rs6000
      1   1.1  mrg /* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics
      2  1.12  mrg    Copyright (C) 2007-2022 Free Software Foundation, Inc.
      3   1.1  mrg 
      4   1.1  mrg    This file is free software; you can redistribute it and/or modify it under
      5   1.1  mrg    the terms of the GNU General Public License as published by the Free
      6   1.1  mrg    Software Foundation; either version 3 of the License, or (at your option)
      7   1.1  mrg    any later version.
      8   1.1  mrg 
      9   1.1  mrg    This file is distributed in the hope that it will be useful, but WITHOUT
     10   1.1  mrg    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     11   1.1  mrg    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     12   1.1  mrg    for more details.
     13   1.1  mrg 
     14   1.1  mrg    Under Section 7 of GPL version 3, you are granted additional
     15   1.1  mrg    permissions described in the GCC Runtime Library Exception, version
     16   1.1  mrg    3.1, as published by the Free Software Foundation.
     17   1.1  mrg 
     18   1.1  mrg    You should have received a copy of the GNU General Public License and
     19   1.1  mrg    a copy of the GCC Runtime Library Exception along with this program;
     20   1.1  mrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     21   1.1  mrg    <http://www.gnu.org/licenses/>.  */
     22   1.1  mrg 
     23   1.1  mrg #ifndef _SI2VMX_H_
     24   1.1  mrg #define _SI2VMX_H_	1
     25   1.1  mrg 
     26   1.1  mrg #ifndef __SPU__
     27   1.1  mrg 
     28   1.1  mrg #include <stdlib.h>
     29   1.1  mrg #include <vec_types.h>
     30   1.1  mrg 
     31   1.1  mrg 
     32   1.1  mrg /* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics.
     33   1.1  mrg  * Users can override the action by defining it prior to including this
     34   1.1  mrg  * header file.
     35   1.1  mrg  */
     36   1.1  mrg #ifndef SPU_HALT_ACTION
     37   1.1  mrg #define SPU_HALT_ACTION		abort()
     38   1.1  mrg #endif
     39   1.1  mrg 
     40   1.1  mrg /* Specify a default stop action for the spu_stop intrinsic.
     41   1.1  mrg  * Users can override the action by defining it prior to including this
     42   1.1  mrg  * header file.
     43   1.1  mrg  */
     44   1.1  mrg #ifndef SPU_STOP_ACTION
     45   1.1  mrg #define SPU_STOP_ACTION		abort()
     46   1.1  mrg #endif
     47   1.1  mrg 
     48   1.1  mrg 
     49   1.1  mrg /* Specify a default action for unsupported intrinsic.
     50   1.1  mrg  * Users can override the action by defining it prior to including this
     51   1.1  mrg  * header file.
     52   1.1  mrg  */
     53   1.1  mrg #ifndef SPU_UNSUPPORTED_ACTION
     54   1.1  mrg #define SPU_UNSUPPORTED_ACTION	abort()
     55   1.1  mrg #endif
     56   1.1  mrg 
     57   1.1  mrg 
     58   1.1  mrg /* Casting intrinsics - from scalar to quadword
     59   1.1  mrg  */
     60   1.1  mrg 
     61   1.1  mrg static __inline qword si_from_uchar(unsigned char c) {
     62   1.1  mrg   union {
     63   1.1  mrg     qword q;
     64   1.1  mrg     unsigned char c[16];
     65   1.1  mrg   } x;
     66   1.1  mrg   x.c[3] = c;
     67   1.1  mrg   return (x.q);
     68   1.1  mrg }
     69   1.1  mrg 
     70   1.1  mrg static __inline qword si_from_char(signed char c) {
     71   1.1  mrg   union {
     72   1.1  mrg     qword q;
     73   1.1  mrg     signed char c[16];
     74   1.1  mrg   } x;
     75   1.1  mrg   x.c[3] = c;
     76   1.1  mrg   return (x.q);
     77   1.1  mrg }
     78   1.1  mrg 
     79   1.1  mrg static __inline qword si_from_ushort(unsigned short s) {
     80   1.1  mrg   union {
     81   1.1  mrg     qword q;
     82   1.1  mrg     unsigned short s[8];
     83   1.1  mrg   } x;
     84   1.1  mrg   x.s[1] = s;
     85   1.1  mrg   return (x.q);
     86   1.1  mrg }
     87   1.1  mrg 
     88   1.1  mrg static __inline qword si_from_short(short s) {
     89   1.1  mrg   union {
     90   1.1  mrg     qword q;
     91   1.1  mrg     short s[8];
     92   1.1  mrg   } x;
     93   1.1  mrg   x.s[1] = s;
     94   1.1  mrg   return (x.q);
     95   1.1  mrg }
     96   1.1  mrg 
     97   1.1  mrg 
     98   1.1  mrg static __inline qword si_from_uint(unsigned int i) {
     99   1.1  mrg   union {
    100   1.1  mrg     qword q;
    101   1.1  mrg     unsigned int i[4];
    102   1.1  mrg   } x;
    103   1.1  mrg   x.i[0] = i;
    104   1.1  mrg   return (x.q);
    105   1.1  mrg }
    106   1.1  mrg 
    107   1.1  mrg static __inline qword si_from_int(int i) {
    108   1.1  mrg   union {
    109   1.1  mrg     qword q;
    110   1.1  mrg     int i[4];
    111   1.1  mrg   } x;
    112   1.1  mrg   x.i[0] = i;
    113   1.1  mrg   return (x.q);
    114   1.1  mrg }
    115   1.1  mrg 
    116   1.1  mrg static __inline qword si_from_ullong(unsigned long long l) {
    117   1.1  mrg   union {
    118   1.1  mrg     qword q;
    119   1.1  mrg     unsigned long long l[2];
    120   1.1  mrg   } x;
    121   1.1  mrg   x.l[0] = l;
    122   1.1  mrg   return (x.q);
    123   1.1  mrg }
    124   1.1  mrg 
    125   1.1  mrg static __inline qword si_from_llong(long long l) {
    126   1.1  mrg   union {
    127   1.1  mrg     qword q;
    128   1.1  mrg     long long l[2];
    129   1.1  mrg   } x;
    130   1.1  mrg   x.l[0] = l;
    131   1.1  mrg   return (x.q);
    132   1.1  mrg }
    133   1.1  mrg 
    134   1.1  mrg static __inline qword si_from_float(float f) {
    135   1.1  mrg   union {
    136   1.1  mrg     qword q;
    137   1.1  mrg     float f[4];
    138   1.1  mrg   } x;
    139   1.1  mrg   x.f[0] = f;
    140   1.1  mrg   return (x.q);
    141   1.1  mrg }
    142   1.1  mrg 
    143   1.1  mrg static __inline qword si_from_double(double d) {
    144   1.1  mrg   union {
    145   1.1  mrg     qword q;
    146   1.1  mrg     double d[2];
    147   1.1  mrg   } x;
    148   1.1  mrg   x.d[0] = d;
    149   1.1  mrg   return (x.q);
    150   1.1  mrg }
    151   1.1  mrg 
    152   1.1  mrg static __inline qword si_from_ptr(void *ptr) {
    153   1.1  mrg   union {
    154   1.1  mrg     qword q;
    155   1.1  mrg     void *p;
    156   1.1  mrg   } x;
    157   1.1  mrg   x.p = ptr;
    158   1.1  mrg   return (x.q);
    159   1.1  mrg }
    160   1.1  mrg 
    161   1.1  mrg 
    162   1.1  mrg /* Casting intrinsics - from quadword to scalar
    163   1.1  mrg  */
    164   1.1  mrg static __inline unsigned char si_to_uchar(qword q) {
    165   1.1  mrg   union {
    166   1.1  mrg     qword q;
    167   1.1  mrg     unsigned char c[16];
    168   1.1  mrg   } x;
    169   1.1  mrg   x.q = q;
    170   1.1  mrg   return (x.c[3]);
    171   1.1  mrg }
    172   1.1  mrg 
    173   1.1  mrg static __inline signed char si_to_char(qword q) {
    174   1.1  mrg   union {
    175   1.1  mrg     qword q;
    176   1.1  mrg     signed char c[16];
    177   1.1  mrg   } x;
    178   1.1  mrg   x.q = q;
    179   1.1  mrg   return (x.c[3]);
    180   1.1  mrg }
    181   1.1  mrg 
    182   1.1  mrg static __inline unsigned short si_to_ushort(qword q) {
    183   1.1  mrg   union {
    184   1.1  mrg     qword q;
    185   1.1  mrg     unsigned short s[8];
    186   1.1  mrg   } x;
    187   1.1  mrg   x.q = q;
    188   1.1  mrg   return (x.s[1]);
    189   1.1  mrg }
    190   1.1  mrg 
    191   1.1  mrg static __inline short si_to_short(qword q) {
    192   1.1  mrg   union {
    193   1.1  mrg     qword q;
    194   1.1  mrg     short s[8];
    195   1.1  mrg   } x;
    196   1.1  mrg   x.q = q;
    197   1.1  mrg   return (x.s[1]);
    198   1.1  mrg }
    199   1.1  mrg 
    200   1.1  mrg static __inline unsigned int si_to_uint(qword q) {
    201   1.1  mrg   union {
    202   1.1  mrg     qword q;
    203   1.1  mrg     unsigned int i[4];
    204   1.1  mrg   } x;
    205   1.1  mrg   x.q = q;
    206   1.1  mrg   return (x.i[0]);
    207   1.1  mrg }
    208   1.1  mrg 
    209   1.1  mrg static __inline int si_to_int(qword q) {
    210   1.1  mrg   union {
    211   1.1  mrg     qword q;
    212   1.1  mrg     int i[4];
    213   1.1  mrg   } x;
    214   1.1  mrg   x.q = q;
    215   1.1  mrg   return (x.i[0]);
    216   1.1  mrg }
    217   1.1  mrg 
    218   1.1  mrg static __inline unsigned long long si_to_ullong(qword q) {
    219   1.1  mrg   union {
    220   1.1  mrg     qword q;
    221   1.1  mrg     unsigned long long l[2];
    222   1.1  mrg   } x;
    223   1.1  mrg   x.q = q;
    224   1.1  mrg   return (x.l[0]);
    225   1.1  mrg }
    226   1.1  mrg 
    227   1.1  mrg static __inline long long si_to_llong(qword q) {
    228   1.1  mrg   union {
    229   1.1  mrg     qword q;
    230   1.1  mrg     long long l[2];
    231   1.1  mrg   } x;
    232   1.1  mrg   x.q = q;
    233   1.1  mrg   return (x.l[0]);
    234   1.1  mrg }
    235   1.1  mrg 
    236   1.1  mrg static __inline float si_to_float(qword q) {
    237   1.1  mrg   union {
    238   1.1  mrg     qword q;
    239   1.1  mrg     float f[4];
    240   1.1  mrg   } x;
    241   1.1  mrg   x.q = q;
    242   1.1  mrg   return (x.f[0]);
    243   1.1  mrg }
    244   1.1  mrg 
    245   1.1  mrg static __inline double si_to_double(qword q) {
    246   1.1  mrg   union {
    247   1.1  mrg     qword q;
    248   1.1  mrg     double d[2];
    249   1.1  mrg   } x;
    250   1.1  mrg   x.q = q;
    251   1.1  mrg   return (x.d[0]);
    252   1.1  mrg }
    253   1.1  mrg 
    254   1.1  mrg static __inline void * si_to_ptr(qword q) {
    255   1.1  mrg   union {
    256   1.1  mrg     qword q;
    257   1.1  mrg     void *p;
    258   1.1  mrg   } x;
    259   1.1  mrg   x.q = q;
    260   1.1  mrg   return (x.p);
    261   1.1  mrg }
    262   1.1  mrg 
    263   1.1  mrg 
    264   1.1  mrg /* Absolute difference
    265   1.1  mrg  */
    266   1.1  mrg static __inline qword si_absdb(qword a, qword b)
    267   1.1  mrg {
    268   1.1  mrg   vec_uchar16 ac, bc, dc;
    269   1.1  mrg 
    270   1.1  mrg   ac = (vec_uchar16)(a);
    271   1.1  mrg   bc = (vec_uchar16)(b);
    272   1.1  mrg   dc = vec_sel(vec_sub(bc, ac), vec_sub(ac, bc), vec_cmpgt(ac, bc));
    273   1.1  mrg 
    274   1.1  mrg   return ((qword)(dc));
    275   1.1  mrg }
    276   1.1  mrg 
    277   1.1  mrg /* Add intrinsics
    278   1.1  mrg  */
    279   1.1  mrg #define si_a(_a, _b)		((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b))))
    280   1.1  mrg 
    281   1.1  mrg #define si_ah(_a, _b)		((qword)(vec_add((vec_ushort8)(_a), (vec_ushort8)(_b))))
    282   1.1  mrg 
    283   1.1  mrg static __inline qword si_ai(qword a, int b)
    284   1.1  mrg {
    285   1.1  mrg   return ((qword)(vec_add((vec_int4)(a),
    286   1.1  mrg 			  vec_splat((vec_int4)(si_from_int(b)), 0))));
    287   1.1  mrg }
    288   1.1  mrg 
    289   1.1  mrg 
    290   1.1  mrg static __inline qword si_ahi(qword a, short b)
    291   1.1  mrg {
    292   1.1  mrg   return ((qword)(vec_add((vec_short8)(a),
    293   1.1  mrg 			  vec_splat((vec_short8)(si_from_short(b)), 1))));
    294   1.1  mrg }
    295   1.1  mrg 
    296   1.1  mrg 
    297   1.1  mrg #define si_fa(_a, _b)	((qword)(vec_add((vec_float4)(_a), (vec_float4)(_b))))
    298   1.1  mrg 
    299   1.1  mrg 
    300   1.1  mrg static __inline qword si_dfa(qword a, qword b)
    301   1.1  mrg {
    302   1.1  mrg   union {
    303   1.1  mrg     vec_double2 v;
    304   1.1  mrg     double d[2];
    305   1.1  mrg   } ad, bd, dd;
    306   1.1  mrg 
    307   1.1  mrg   ad.v = (vec_double2)(a);
    308   1.1  mrg   bd.v = (vec_double2)(b);
    309   1.1  mrg   dd.d[0] = ad.d[0] + bd.d[0];
    310   1.1  mrg   dd.d[1] = ad.d[1] + bd.d[1];
    311   1.1  mrg 
    312   1.1  mrg   return ((qword)(dd.v));
    313   1.1  mrg }
    314   1.1  mrg 
    315   1.1  mrg /* Add word extended
    316   1.1  mrg  */
    317   1.1  mrg #define si_addx(_a, _b, _c)	((qword)(vec_add(vec_add((vec_uint4)(_a), (vec_uint4)(_b)), 	\
    318   1.1  mrg 						 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
    319   1.1  mrg 
    320   1.1  mrg 
    321   1.1  mrg /* Bit-wise AND
    322   1.1  mrg  */
    323   1.1  mrg #define si_and(_a, _b)		((qword)(vec_and((vec_uint4)(_a), (vec_uint4)(_b))))
    324   1.1  mrg 
    325   1.1  mrg 
    326   1.1  mrg static __inline qword si_andbi(qword a, signed char b)
    327   1.1  mrg {
    328   1.1  mrg   return ((qword)(vec_and((vec_char16)(a),
    329   1.1  mrg 			  vec_splat((vec_char16)(si_from_char(b)), 3))));
    330   1.1  mrg }
    331   1.1  mrg 
    332   1.1  mrg static __inline qword si_andhi(qword a, signed short b)
    333   1.1  mrg {
    334   1.1  mrg   return ((qword)(vec_and((vec_short8)(a),
    335   1.1  mrg 			  vec_splat((vec_short8)(si_from_short(b)), 1))));
    336   1.1  mrg }
    337   1.1  mrg 
    338   1.1  mrg 
    339   1.1  mrg static __inline qword si_andi(qword a, signed int b)
    340   1.1  mrg {
    341   1.1  mrg   return ((qword)(vec_and((vec_int4)(a),
    342   1.1  mrg 			  vec_splat((vec_int4)(si_from_int(b)), 0))));
    343   1.1  mrg }
    344   1.1  mrg 
    345   1.1  mrg 
    346   1.1  mrg /* Bit-wise AND with complement
    347   1.1  mrg  */
    348   1.1  mrg #define si_andc(_a, _b)		((qword)(vec_andc((vec_uchar16)(_a), (vec_uchar16)(_b))))
    349   1.1  mrg 
    350   1.1  mrg 
    351   1.1  mrg /* Average byte vectors
    352   1.1  mrg  */
    353   1.1  mrg #define si_avgb(_a, _b)		((qword)(vec_avg((vec_uchar16)(_a), (vec_uchar16)(_b))))
    354   1.1  mrg 
    355   1.1  mrg 
    356   1.1  mrg /* Branch indirect and set link on external data
    357   1.1  mrg  */
    358   1.1  mrg #define si_bisled(_func)	/* not mappable */
    359   1.1  mrg #define si_bisledd(_func)	/* not mappable */
    360   1.1  mrg #define si_bislede(_func)	/* not mappable */
    361   1.1  mrg 
    362   1.1  mrg 
    363   1.1  mrg /* Borrow generate
    364   1.1  mrg  */
    365   1.1  mrg #define si_bg(_a, _b)		((qword)(vec_subc((vec_uint4)(_b), (vec_uint4)(_a))))
    366   1.1  mrg 
    367   1.1  mrg #define si_bgx(_a, _b, _c)	((qword)(vec_and(vec_or(vec_cmpgt((vec_uint4)(_b), (vec_uint4)(_a)),		\
    368   1.1  mrg 							vec_and(vec_cmpeq((vec_uint4)(_b), (vec_uint4)(_a)), 	\
    369   1.1  mrg 								(vec_uint4)(_c))), vec_splat_u32(1))))
    370   1.1  mrg 
    371   1.1  mrg /* Compare absolute equal
    372   1.1  mrg  */
    373   1.1  mrg static __inline qword si_fcmeq(qword a, qword b)
    374   1.1  mrg {
    375   1.1  mrg   vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
    376   1.1  mrg 
    377   1.1  mrg   return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb),
    378   1.1  mrg 				  vec_andc((vec_float4)(b), msb))));
    379   1.1  mrg }
    380   1.1  mrg 
    381   1.1  mrg static __inline qword si_dfcmeq(qword a, qword b)
    382   1.1  mrg {
    383   1.1  mrg   vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
    384   1.1  mrg   vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
    385   1.1  mrg   vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3,  16,17,18,19,  8,9,10,11, 24,25,26,27};
    386   1.1  mrg 
    387   1.1  mrg   vec_uint4 biteq;
    388   1.1  mrg   vec_uint4 aabs;
    389   1.1  mrg   vec_uint4 babs;
    390   1.1  mrg   vec_uint4 a_gt;
    391   1.1  mrg   vec_uint4 ahi_inf;
    392   1.1  mrg   vec_uint4 anan;
    393   1.1  mrg   vec_uint4 result;
    394   1.1  mrg 
    395   1.1  mrg   union {
    396   1.1  mrg     vec_uchar16 v;
    397   1.1  mrg     int i[4];
    398   1.1  mrg   } x;
    399   1.1  mrg 
    400   1.1  mrg   /* Shift 4 bytes  */
    401   1.1  mrg   x.i[3] = 4 << 3;
    402   1.1  mrg 
    403   1.1  mrg   /*  Mask out sign bits */
    404   1.1  mrg   aabs = vec_and((vec_uint4)a,sign_mask);
    405   1.1  mrg   babs = vec_and((vec_uint4)b,sign_mask);
    406   1.1  mrg 
    407   1.1  mrg   /*  A)  Check for bit equality, store in high word */
    408   1.1  mrg   biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs);
    409   1.1  mrg   biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
    410   1.1  mrg 
    411   1.1  mrg   /*
    412   1.1  mrg       B)  Check if a is NaN, store in high word
    413   1.1  mrg 
    414   1.1  mrg       B1) If the high word is greater than max_exp (indicates a NaN)
    415   1.1  mrg       B2) If the low word is greater than 0
    416   1.1  mrg   */
    417   1.1  mrg   a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
    418   1.1  mrg 
    419   1.1  mrg   /*  B3) Check if the high word is equal to the inf exponent */
    420   1.1  mrg   ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
    421   1.1  mrg 
    422   1.1  mrg   /*  anan = B1[hi] or (B2[lo] and B3[hi]) */
    423   1.1  mrg   anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
    424   1.1  mrg 
    425   1.1  mrg   /*  result = A and not B  */
    426   1.1  mrg   result = vec_andc(biteq, anan);
    427   1.1  mrg 
    428   1.1  mrg   /*  Promote high words to 64 bits and return  */
    429   1.1  mrg   return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
    430   1.1  mrg }
    431   1.1  mrg 
    432   1.1  mrg 
    433   1.1  mrg /* Compare absolute greater than
    434   1.1  mrg  */
    435   1.1  mrg static __inline qword si_fcmgt(qword a, qword b)
    436   1.1  mrg {
    437   1.1  mrg   vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
    438   1.1  mrg 
    439   1.1  mrg   return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb),
    440   1.1  mrg 				  vec_andc((vec_float4)(b), msb))));
    441   1.1  mrg }
    442   1.1  mrg 
    443   1.1  mrg static __inline qword si_dfcmgt(qword a, qword b)
    444   1.1  mrg {
    445   1.1  mrg   vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
    446   1.1  mrg   vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
    447   1.1  mrg   vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
    448   1.1  mrg 
    449   1.1  mrg   union {
    450   1.1  mrg     vec_uchar16 v;
    451   1.1  mrg     int i[4];
    452   1.1  mrg   } x;
    453   1.1  mrg 
    454   1.1  mrg   /* Shift 4 bytes  */
    455   1.1  mrg   x.i[3] = 4 << 3;
    456   1.1  mrg 
    457   1.1  mrg   // absolute value of a,b
    458   1.1  mrg   vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
    459   1.1  mrg   vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
    460   1.1  mrg 
    461   1.1  mrg   // check if a is nan
    462   1.1  mrg   vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
    463   1.1  mrg   vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
    464   1.1  mrg   a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
    465   1.1  mrg   a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
    466   1.1  mrg 
    467   1.1  mrg   // check if b is nan
    468   1.1  mrg   vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
    469   1.1  mrg   vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
    470   1.1  mrg   b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
    471   1.1  mrg   b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
    472   1.1  mrg 
    473   1.1  mrg   // A) Check if the exponents are different
    474   1.1  mrg   vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs);
    475   1.1  mrg 
    476   1.1  mrg   // B) Check if high word equal, and low word greater
    477   1.1  mrg   vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aabs, (vec_uint4)babs);
    478   1.1  mrg   vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs);
    479   1.1  mrg   vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
    480   1.1  mrg 
    481   1.1  mrg   //  If either A or B is true, return true (unless NaNs detected)
    482   1.1  mrg   vec_uint4 r = vec_or(gt_hi, eqgt);
    483   1.1  mrg 
    484   1.1  mrg   // splat the high words of the comparison step
    485   1.1  mrg   r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
    486   1.1  mrg 
    487   1.1  mrg   // correct for NaNs in input
    488   1.1  mrg   return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
    489   1.1  mrg }
    490   1.1  mrg 
    491   1.1  mrg 
    492   1.1  mrg /* Compare equal
    493   1.1  mrg  */
    494   1.1  mrg static __inline qword si_ceqb(qword a, qword b)
    495   1.1  mrg {
    496   1.1  mrg   return ((qword)(vec_cmpeq((vec_uchar16)(a), (vec_uchar16)(b))));
    497   1.1  mrg }
    498   1.1  mrg 
    499   1.1  mrg static __inline qword si_ceqh(qword a, qword b)
    500   1.1  mrg {
    501   1.1  mrg   return ((qword)(vec_cmpeq((vec_ushort8)(a), (vec_ushort8)(b))));
    502   1.1  mrg }
    503   1.1  mrg 
    504   1.1  mrg static __inline qword si_ceq(qword a, qword b)
    505   1.1  mrg {
    506   1.1  mrg   return ((qword)(vec_cmpeq((vec_uint4)(a), (vec_uint4)(b))));
    507   1.1  mrg }
    508   1.1  mrg 
    509   1.1  mrg static __inline qword si_fceq(qword a, qword b)
    510   1.1  mrg {
    511   1.1  mrg   return ((qword)(vec_cmpeq((vec_float4)(a), (vec_float4)(b))));
    512   1.1  mrg }
    513   1.1  mrg 
    514   1.1  mrg static __inline qword si_ceqbi(qword a, signed char b)
    515   1.1  mrg {
    516   1.1  mrg   return ((qword)(vec_cmpeq((vec_char16)(a),
    517   1.1  mrg 			    vec_splat((vec_char16)(si_from_char(b)), 3))));
    518   1.1  mrg }
    519   1.1  mrg 
    520   1.1  mrg static __inline qword si_ceqhi(qword a, signed short b)
    521   1.1  mrg {
    522   1.1  mrg   return ((qword)(vec_cmpeq((vec_short8)(a),
    523   1.1  mrg 			  vec_splat((vec_short8)(si_from_short(b)), 1))));
    524   1.1  mrg }
    525   1.1  mrg 
    526   1.1  mrg static __inline qword si_ceqi(qword a, signed int b)
    527   1.1  mrg {
    528   1.1  mrg   return ((qword)(vec_cmpeq((vec_int4)(a),
    529   1.1  mrg 			  vec_splat((vec_int4)(si_from_int(b)), 0))));
    530   1.1  mrg }
    531   1.1  mrg 
    532   1.1  mrg static __inline qword si_dfceq(qword a, qword b)
    533   1.1  mrg {
    534   1.1  mrg   vec_uint4 sign_mask= (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
    535   1.1  mrg   vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
    536   1.1  mrg   vec_uchar16 hihi_promote = (vec_uchar16) { 0,1,2,3,  16,17,18,19,  8,9,10,11, 24,25,26,27};
    537   1.1  mrg 
    538   1.1  mrg   vec_uint4 biteq;
    539   1.1  mrg   vec_uint4 aabs;
    540   1.1  mrg   vec_uint4 babs;
    541   1.1  mrg   vec_uint4 a_gt;
    542   1.1  mrg   vec_uint4 ahi_inf;
    543   1.1  mrg   vec_uint4 anan;
    544   1.1  mrg   vec_uint4 iszero;
    545   1.1  mrg   vec_uint4 result;
    546   1.1  mrg 
    547   1.1  mrg   union {
    548   1.1  mrg     vec_uchar16 v;
    549   1.1  mrg     int i[4];
    550   1.1  mrg   } x;
    551   1.1  mrg 
    552   1.1  mrg   /* Shift 4 bytes  */
    553   1.1  mrg   x.i[3] = 4 << 3;
    554   1.1  mrg 
    555   1.1  mrg   /*  A)  Check for bit equality, store in high word */
    556   1.1  mrg   biteq = (vec_uint4) vec_cmpeq((vec_uint4)a,(vec_uint4)b);
    557   1.1  mrg   biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
    558   1.1  mrg 
    559   1.1  mrg   /*  Mask out sign bits */
    560   1.1  mrg   aabs = vec_and((vec_uint4)a,sign_mask);
    561   1.1  mrg   babs = vec_and((vec_uint4)b,sign_mask);
    562   1.1  mrg 
    563   1.1  mrg   /*
    564   1.1  mrg       B)  Check if a is NaN, store in high word
    565   1.1  mrg 
    566   1.1  mrg       B1) If the high word is greater than max_exp (indicates a NaN)
    567   1.1  mrg       B2) If the low word is greater than 0
    568   1.1  mrg   */
    569   1.1  mrg   a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
    570   1.1  mrg 
    571   1.1  mrg   /*  B3) Check if the high word is equal to the inf exponent */
    572   1.1  mrg   ahi_inf = (vec_uint4)vec_cmpeq(aabs,nan_mask);
    573   1.1  mrg 
    574   1.1  mrg   /*  anan = B1[hi] or (B2[lo] and B3[hi]) */
    575   1.1  mrg   anan = (vec_uint4)vec_or(a_gt,vec_and((vec_uint4)vec_slo((vec_uchar16)a_gt,x.v),ahi_inf));
    576   1.1  mrg 
    577   1.1  mrg   /*  C)  Check for 0 = -0 special case */
    578   1.1  mrg   iszero =(vec_uint4)vec_cmpeq((vec_uint4)vec_or(aabs,babs),(vec_uint4)vec_splat_u32(0));
    579   1.1  mrg   iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
    580   1.1  mrg 
    581   1.1  mrg   /*  result = (A or C) and not B  */
    582   1.1  mrg   result = vec_or(biteq,iszero);
    583   1.1  mrg   result = vec_andc(result, anan);
    584   1.1  mrg 
    585   1.1  mrg   /*  Promote high words to 64 bits and return  */
    586   1.1  mrg   return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
    587   1.1  mrg }
    588   1.1  mrg 
    589   1.1  mrg 
    590   1.1  mrg /* Compare greater than
    591   1.1  mrg  */
    592   1.1  mrg static __inline qword si_cgtb(qword a, qword b)
    593   1.1  mrg {
    594   1.1  mrg   return ((qword)(vec_cmpgt((vec_char16)(a), (vec_char16)(b))));
    595   1.1  mrg }
    596   1.1  mrg 
    597   1.1  mrg static __inline qword si_cgth(qword a, qword b)
    598   1.1  mrg {
    599   1.1  mrg   return ((qword)(vec_cmpgt((vec_short8)(a), (vec_short8)(b))));
    600   1.1  mrg }
    601   1.1  mrg 
    602   1.1  mrg static __inline qword si_cgt(qword a, qword b)
    603   1.1  mrg {
    604   1.1  mrg   return ((qword)(vec_cmpgt((vec_int4)(a), (vec_int4)(b))));
    605   1.1  mrg }
    606   1.1  mrg 
    607   1.1  mrg static __inline qword si_clgtb(qword a, qword b)
    608   1.1  mrg {
    609   1.1  mrg   return ((qword)(vec_cmpgt((vec_uchar16)(a), (vec_uchar16)(b))));
    610   1.1  mrg }
    611   1.1  mrg 
    612   1.1  mrg static __inline qword si_clgth(qword a, qword b)
    613   1.1  mrg {
    614   1.1  mrg   return ((qword)(vec_cmpgt((vec_ushort8)(a), (vec_ushort8)(b))));
    615   1.1  mrg }
    616   1.1  mrg 
    617   1.1  mrg static __inline qword si_clgt(qword a, qword b)
    618   1.1  mrg {
    619   1.1  mrg   return ((qword)(vec_cmpgt((vec_uint4)(a), (vec_uint4)(b))));
    620   1.1  mrg }
    621   1.1  mrg 
    622   1.1  mrg static __inline qword si_fcgt(qword a, qword b)
    623   1.1  mrg {
    624   1.1  mrg   return ((qword)(vec_cmpgt((vec_float4)(a), (vec_float4)(b))));
    625   1.1  mrg }
    626   1.1  mrg 
    627   1.1  mrg static __inline qword si_dfcgt(qword a, qword b)
    628   1.1  mrg {
    629   1.1  mrg   vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
    630   1.1  mrg   vec_uchar16 borrow_shuffle = (vec_uchar16) { 4,5,6,7, 192,192,192,192, 12,13,14,15, 192,192,192,192 };
    631   1.1  mrg   vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
    632   1.1  mrg   vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
    633   1.1  mrg 
    634   1.1  mrg   union {
    635   1.1  mrg     vec_uchar16 v;
    636   1.1  mrg     int i[4];
    637   1.1  mrg   } x;
    638   1.1  mrg 
    639   1.1  mrg   /* Shift 4 bytes  */
    640   1.1  mrg   x.i[3] = 4 << 3;
    641   1.1  mrg 
    642   1.1  mrg   // absolute value of a,b
    643   1.1  mrg   vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
    644   1.1  mrg   vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
    645   1.1  mrg 
    646   1.1  mrg   // check if a is nan
    647   1.1  mrg   vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
    648   1.1  mrg   vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
    649   1.1  mrg   a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
    650   1.1  mrg   a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
    651   1.1  mrg 
    652   1.1  mrg   // check if b is nan
    653   1.1  mrg   vec_uint4 b_inf = (vec_uint4)vec_cmpeq(babs, nan_mask);
    654   1.1  mrg   vec_uint4 b_nan = (vec_uint4)vec_cmpgt(babs, nan_mask);
    655   1.1  mrg   b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
    656   1.1  mrg   b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
    657   1.1  mrg 
    658   1.1  mrg   // sign of a
    659   1.1  mrg   vec_uint4 asel = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
    660   1.1  mrg   asel = (vec_uint4)vec_perm((vec_uchar16)asel,(vec_uchar16)asel,splat_hi);
    661   1.1  mrg 
    662   1.1  mrg   // sign of b
    663   1.1  mrg   vec_uint4 bsel = (vec_uint4)vec_sra((vec_int4)(b), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
    664   1.1  mrg   bsel = (vec_uint4)vec_perm((vec_uchar16)bsel,(vec_uchar16)bsel,splat_hi);
    665   1.1  mrg 
    666   1.1  mrg   // negative a
    667   1.1  mrg   vec_uint4 abor = vec_subc((vec_uint4)vec_splat_u32(0), aabs);
    668   1.1  mrg   vec_uchar16 pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), vec_sr(borrow_shuffle, vec_splat_u8(3)), vec_sra(borrow_shuffle, vec_splat_u8(7)));
    669   1.1  mrg   abor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)abor, (vec_uchar16)abor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
    670   1.1  mrg   vec_uint4 aneg = vec_add(vec_add(vec_splat_u32(0), vec_nor(aabs, aabs)), vec_and(abor, vec_splat_u32(1)));
    671   1.1  mrg 
    672   1.1  mrg   // pick the one we want
    673   1.1  mrg   vec_int4 aval = (vec_int4)vec_sel((vec_uchar16)aabs, (vec_uchar16)aneg, (vec_uchar16)asel);
    674   1.1  mrg 
    675   1.1  mrg   // negative b
    676   1.1  mrg   vec_uint4 bbor = vec_subc((vec_uint4)vec_splat_u32(0), babs);
    677   1.1  mrg   bbor = (vec_uint4)(vec_perm(vec_perm((vec_uchar16)bbor, (vec_uchar16)bbor, borrow_shuffle),((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),pat));
    678   1.1  mrg   vec_uint4 bneg = vec_add(vec_nor(babs, babs), vec_and(bbor, vec_splat_u32(1)));
    679   1.1  mrg 
    680   1.1  mrg   // pick the one we want
    681   1.1  mrg   vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel);
    682   1.1  mrg 
    683   1.1  mrg   // A) Check if the exponents are different
    684   1.1  mrg   vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval);
    685   1.1  mrg 
    686   1.1  mrg   // B) Check if high word equal, and low word greater
    687   1.1  mrg   vec_uint4 gt_lo = (vec_uint4)vec_cmpgt((vec_uint4)aval, (vec_uint4)bval);
    688   1.1  mrg   vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval);
    689   1.1  mrg   vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
    690   1.1  mrg 
    691   1.1  mrg   //  If either A or B is true, return true (unless NaNs detected)
    692   1.1  mrg   vec_uint4 r = vec_or(gt_hi, eqgt);
    693   1.1  mrg 
    694   1.1  mrg   // splat the high words of the comparison step
    695   1.1  mrg   r = (vec_uint4)vec_perm((vec_uchar16)r,(vec_uchar16)r,splat_hi);
    696   1.1  mrg 
    697   1.1  mrg   // correct for NaNs in input
    698   1.1  mrg   return ((qword)vec_andc(r,vec_or(a_nan,b_nan)));
    699   1.1  mrg }
    700   1.1  mrg 
    701   1.1  mrg static __inline qword si_cgtbi(qword a, signed char b)
    702   1.1  mrg {
    703   1.1  mrg   return ((qword)(vec_cmpgt((vec_char16)(a),
    704   1.1  mrg 			    vec_splat((vec_char16)(si_from_char(b)), 3))));
    705   1.1  mrg }
    706   1.1  mrg 
    707   1.1  mrg static __inline qword si_cgthi(qword a, signed short b)
    708   1.1  mrg {
    709   1.1  mrg   return ((qword)(vec_cmpgt((vec_short8)(a),
    710   1.1  mrg 			    vec_splat((vec_short8)(si_from_short(b)), 1))));
    711   1.1  mrg }
    712   1.1  mrg 
    713   1.1  mrg static __inline qword si_cgti(qword a, signed int b)
    714   1.1  mrg {
    715   1.1  mrg   return ((qword)(vec_cmpgt((vec_int4)(a),
    716   1.1  mrg 			    vec_splat((vec_int4)(si_from_int(b)), 0))));
    717   1.1  mrg }
    718   1.1  mrg 
    719   1.1  mrg static __inline qword si_clgtbi(qword a, unsigned char b)
    720   1.1  mrg {
    721   1.1  mrg   return ((qword)(vec_cmpgt((vec_uchar16)(a),
    722   1.1  mrg 			    vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
    723   1.1  mrg }
    724   1.1  mrg 
    725   1.1  mrg static __inline qword si_clgthi(qword a, unsigned short b)
    726   1.1  mrg {
    727   1.1  mrg   return ((qword)(vec_cmpgt((vec_ushort8)(a),
    728   1.1  mrg 			    vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
    729   1.1  mrg }
    730   1.1  mrg 
    731   1.1  mrg static __inline qword si_clgti(qword a, unsigned int b)
    732   1.1  mrg {
    733   1.1  mrg   return ((qword)(vec_cmpgt((vec_uint4)(a),
    734   1.1  mrg 			    vec_splat((vec_uint4)(si_from_uint(b)), 0))));
    735   1.1  mrg }
    736   1.1  mrg 
    737   1.1  mrg static __inline qword si_dftsv(qword a, char b)
    738   1.1  mrg {
    739   1.1  mrg   vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
    740   1.1  mrg   vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
    741   1.1  mrg   vec_uint4 result = (vec_uint4){0};
    742   1.1  mrg   vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
    743   1.1  mrg   sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi);
    744   1.1  mrg   vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask);
    745   1.1  mrg 
    746   1.1  mrg   union {
    747   1.1  mrg     vec_uchar16 v;
    748   1.1  mrg     int i[4];
    749   1.1  mrg   } x;
    750   1.1  mrg 
    751   1.1  mrg   /* Shift 4 bytes  */
    752   1.1  mrg   x.i[3] = 4 << 3;
    753   1.1  mrg 
    754   1.1  mrg   /* Nan or +inf or -inf  */
    755   1.1  mrg   if (b & 0x70)
    756   1.1  mrg   {
    757   1.1  mrg     vec_uint4 nan_mask = (vec_uint4) { 0x7FF00000, 0x0, 0x7FF00000, 0x0 };
    758   1.1  mrg     vec_uint4 a_inf = (vec_uint4)vec_cmpeq(aabs, nan_mask);
    759   1.1  mrg      /* NaN  */
    760   1.1  mrg      if (b & 0x40)
    761   1.1  mrg      {
    762   1.1  mrg        vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
    763   1.1  mrg        a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
    764   1.1  mrg        a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
    765   1.1  mrg        result = vec_or(result, a_nan);
    766   1.1  mrg      }
    767   1.1  mrg      /* inf  */
    768   1.1  mrg      if (b & 0x30)
    769   1.1  mrg      {
    770   1.1  mrg        a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf);
    771   1.1  mrg        a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi);
    772   1.1  mrg         /* +inf  */
    773   1.1  mrg         if (b & 0x20)
    774   1.1  mrg           result = vec_or(vec_andc(a_inf, sign), result);
    775   1.1  mrg         /* -inf  */
    776   1.1  mrg         if (b & 0x10)
    777   1.1  mrg           result = vec_or(vec_and(a_inf, sign), result);
    778   1.1  mrg      }
    779   1.1  mrg   }
    780   1.1  mrg   /* 0 or denorm  */
    781   1.1  mrg   if (b & 0xF)
    782   1.1  mrg   {
    783   1.1  mrg     vec_uint4 iszero =(vec_uint4)vec_cmpeq(aabs,(vec_uint4)vec_splat_u32(0));
    784   1.1  mrg     iszero = vec_and(iszero,(vec_uint4)vec_slo((vec_uchar16)iszero,x.v));
    785   1.1  mrg     /* denorm  */
    786   1.1  mrg     if (b & 0x3)
    787   1.1  mrg     {
    788   1.1  mrg       vec_uint4 denorm_mask = (vec_uint4){0xFFFFF, 0xFFFFF, 0xFFFFF, 0xFFFFF};
    789   1.1  mrg       vec_uint4 isdenorm = vec_nor((vec_uint4)vec_cmpgt(aabs, denorm_mask), iszero);
    790   1.1  mrg       isdenorm = (vec_uint4)vec_perm((vec_uchar16)isdenorm, (vec_uchar16)isdenorm, splat_hi);
    791   1.1  mrg       /* +denorm  */
    792   1.1  mrg      if (b & 0x2)
    793   1.1  mrg         result = vec_or(vec_andc(isdenorm, sign), result);
    794   1.1  mrg       /* -denorm  */
    795   1.1  mrg      if (b & 0x1)
    796   1.1  mrg         result = vec_or(vec_and(isdenorm, sign), result);
    797   1.1  mrg     }
    798   1.1  mrg     /* 0  */
    799   1.1  mrg     if (b & 0xC)
    800   1.1  mrg     {
    801   1.1  mrg       iszero = (vec_uint4)vec_perm((vec_uchar16)iszero, (vec_uchar16)iszero, splat_hi);
    802   1.1  mrg       /* +0  */
    803   1.1  mrg      if (b & 0x8)
    804   1.1  mrg         result = vec_or(vec_andc(iszero, sign), result);
    805   1.1  mrg       /* -0  */
    806   1.1  mrg      if (b & 0x4)
    807   1.1  mrg         result = vec_or(vec_and(iszero, sign), result);
    808   1.1  mrg     }
    809   1.1  mrg   }
    810   1.1  mrg   return ((qword)result);
    811   1.1  mrg }
    812   1.1  mrg 
    813   1.1  mrg 
    814   1.1  mrg /* Carry generate
    815   1.1  mrg  */
    816   1.1  mrg #define si_cg(_a, _b)		((qword)(vec_addc((vec_uint4)(_a), (vec_uint4)(_b))))
    817   1.1  mrg 
    818   1.1  mrg #define si_cgx(_a, _b, _c)	((qword)(vec_or(vec_addc((vec_uint4)(_a), (vec_uint4)(_b)), 		\
    819   1.1  mrg 						vec_addc(vec_add((vec_uint4)(_a), (vec_uint4)(_b)),	\
    820   1.1  mrg 							 vec_and((vec_uint4)(_c), vec_splat_u32(1))))))
    821   1.1  mrg 
    822   1.1  mrg 
    823   1.1  mrg /* Count ones for bytes
    824   1.1  mrg  */
    825   1.1  mrg static __inline qword si_cntb(qword a)
    826   1.1  mrg {
    827   1.1  mrg   vec_uchar16 nib_cnt = (vec_uchar16){0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
    828   1.1  mrg   vec_uchar16 four = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 };
    829   1.1  mrg   vec_uchar16 av;
    830   1.1  mrg 
    831   1.1  mrg   av = (vec_uchar16)(a);
    832   1.1  mrg 
    833   1.1  mrg   return ((qword)(vec_add(vec_perm(nib_cnt, nib_cnt, av),
    834   1.1  mrg 			  vec_perm(nib_cnt, nib_cnt, vec_sr (av, four)))));
    835   1.1  mrg }
    836   1.1  mrg 
    837   1.1  mrg /* Count ones for bytes
    838   1.1  mrg  */
    839   1.1  mrg static __inline qword si_clz(qword a)
    840   1.1  mrg {
    841   1.1  mrg   vec_uchar16 av;
    842   1.1  mrg   vec_uchar16 cnt_hi, cnt_lo, cnt, tmp1, tmp2, tmp3;
    843   1.1  mrg   vec_uchar16 four    = vec_splat_u8(4);
    844   1.1  mrg   vec_uchar16 nib_cnt = (vec_uchar16){4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
    845   1.1  mrg   vec_uchar16 eight   = vec_splat_u8(8);
    846   1.1  mrg   vec_uchar16 sixteen = (vec_uchar16){16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
    847   1.1  mrg   vec_uchar16 twentyfour = (vec_uchar16){24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24};
    848   1.1  mrg 
    849   1.1  mrg   av = (vec_uchar16)(a);
    850   1.1  mrg 
    851   1.1  mrg   cnt_hi = vec_perm(nib_cnt, nib_cnt, vec_sr(av, four));
    852   1.1  mrg   cnt_lo = vec_perm(nib_cnt, nib_cnt, av);
    853   1.1  mrg 
    854   1.1  mrg   cnt = vec_add(cnt_hi, vec_and(cnt_lo, vec_cmpeq(cnt_hi, four)));
    855   1.1  mrg 
    856   1.1  mrg   tmp1 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(eight));
    857   1.1  mrg   tmp2 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(sixteen));
    858   1.1  mrg   tmp3 = (vec_uchar16)vec_sl((vec_uint4)(cnt), (vec_uint4)(twentyfour));
    859   1.1  mrg 
    860   1.1  mrg   cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight)));
    861   1.1  mrg   cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen)));
    862   1.1  mrg   cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour)));
    863   1.1  mrg 
    864   1.1  mrg   return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour))));
    865   1.1  mrg }
    866   1.1  mrg 
    867   1.1  mrg /* Convert to float
    868   1.1  mrg  */
    869   1.1  mrg #define si_cuflt(_a, _b)	((qword)(vec_ctf((vec_uint4)(_a), _b)))
    870   1.1  mrg #define si_csflt(_a, _b)	((qword)(vec_ctf((vec_int4)(_a), _b)))
    871   1.1  mrg 
    872   1.1  mrg /* Convert to signed int
    873   1.1  mrg  */
    874   1.1  mrg #define si_cflts(_a, _b)	((qword)(vec_cts((vec_float4)(_a), _b)))
    875   1.1  mrg 
    876   1.1  mrg /* Convert to unsigned int
    877   1.1  mrg  */
    878   1.1  mrg #define si_cfltu(_a, _b)	((qword)(vec_ctu((vec_float4)(_a), _b)))
    879   1.1  mrg 
    880   1.1  mrg /* Synchronize
    881   1.1  mrg  */
    882   1.1  mrg #define si_dsync()		/* do nothing */
    883   1.1  mrg #define si_sync()		/* do nothing */
    884   1.1  mrg #define si_syncc()		/* do nothing */
    885   1.1  mrg 
    886   1.1  mrg 
    887   1.1  mrg /* Equivalence
    888   1.1  mrg  */
    889   1.1  mrg static __inline qword si_eqv(qword a, qword b)
    890   1.1  mrg {
    891   1.1  mrg   vec_uchar16 d;
    892   1.1  mrg 
    893   1.1  mrg   d = vec_xor((vec_uchar16)(a), (vec_uchar16)(b));
    894   1.1  mrg   return ((qword)(vec_nor(d, d)));
    895   1.1  mrg }
    896   1.1  mrg 
    897   1.1  mrg /* Extend
    898   1.1  mrg  */
    899   1.1  mrg static __inline qword si_xsbh(qword a)
    900   1.1  mrg {
    901   1.1  mrg   vec_char16 av;
    902   1.1  mrg 
    903   1.1  mrg   av = (vec_char16)(a);
    904   1.1  mrg   return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15,
    905   1.1  mrg 						              0, 0, 0, 0, 0, 0, 0, 0})))));
    906   1.1  mrg }
    907   1.1  mrg 
    908   1.1  mrg static __inline qword si_xshw(qword a)
    909   1.1  mrg {
    910   1.1  mrg   vec_short8 av;
    911   1.1  mrg 
    912   1.1  mrg   av = (vec_short8)(a);
    913   1.1  mrg   return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7,
    914   1.1  mrg 					                      10,11,14,15,
    915   1.1  mrg 							      0, 0, 0, 0,
    916   1.1  mrg 						              0, 0, 0, 0})))));
    917   1.1  mrg }
    918   1.1  mrg 
    919   1.1  mrg static __inline qword si_xswd(qword a)
    920   1.1  mrg {
    921   1.1  mrg   vec_int4 av;
    922   1.1  mrg 
    923   1.1  mrg   av = (vec_int4)(a);
    924   1.1  mrg   return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})),
    925   1.1  mrg 			   ((vec_uchar16){20, 21, 22, 23,
    926   1.1  mrg 					   4,  5,  6,  7,
    927   1.1  mrg 				          28, 29, 30, 31,
    928   1.1  mrg 				          12, 13, 14, 15}))));
    929   1.1  mrg }
    930   1.1  mrg 
    931   1.1  mrg static __inline qword si_fesd(qword a)
    932   1.1  mrg {
    933   1.1  mrg   union {
    934   1.1  mrg     double d[2];
    935   1.1  mrg     vec_double2	vd;
    936   1.1  mrg   } out;
    937   1.1  mrg   union {
    938   1.1  mrg     float f[4];
    939   1.1  mrg     vec_float4 vf;
    940   1.1  mrg   } in;
    941   1.1  mrg 
    942   1.1  mrg   in.vf = (vec_float4)(a);
    943   1.1  mrg   out.d[0] = (double)(in.f[0]);
    944   1.1  mrg   out.d[1] = (double)(in.f[2]);
    945   1.1  mrg   return ((qword)(out.vd));
    946   1.1  mrg }
    947   1.1  mrg 
    948   1.1  mrg /* Gather
    949   1.1  mrg  */
    950   1.1  mrg static __inline qword si_gbb(qword a)
    951   1.1  mrg {
    952   1.1  mrg   vec_uchar16 bits;
    953   1.1  mrg   vec_uint4   bytes;
    954   1.1  mrg 
    955   1.1  mrg   bits  = vec_sl(vec_and((vec_uchar16)(a), vec_splat_u8(1)), ((vec_uchar16){7, 6, 5, 4, 3, 2, 1, 0,
    956   1.1  mrg 								            7, 6, 5, 4, 3, 2, 1, 0}));
    957   1.1  mrg   bytes = (vec_uint4)vec_sum2s((vec_int4)(vec_sum4s(bits, ((vec_uint4){0}))), ((vec_int4){0}));
    958   1.1  mrg 
    959   1.1  mrg   return ((qword)(vec_perm(bytes, bytes, ((vec_uchar16){0, 0, 7,15, 0, 0, 0, 0,
    960   1.1  mrg 					                0, 0, 0, 0, 0, 0, 0, 0}))));
    961   1.1  mrg }
    962   1.1  mrg 
    963   1.1  mrg 
    964   1.1  mrg static __inline qword si_gbh(qword a)
    965   1.1  mrg {
    966   1.1  mrg   vec_ushort8 bits;
    967   1.1  mrg   vec_uint4   bytes;
    968   1.1  mrg 
    969   1.1  mrg   bits  = vec_sl(vec_and((vec_ushort8)(a), vec_splat_u16(1)), ((vec_ushort8){7, 6, 5, 4, 3, 2, 1, 0}));
    970   1.1  mrg 
    971   1.1  mrg   bytes = (vec_uint4)vec_sums((vec_int4)(vec_sum4s((vec_short8)(bits), (vec_int4){0})), (vec_int4){0});
    972   1.1  mrg 
    973   1.1  mrg   return ((qword)(vec_sld(bytes, bytes, 12)));
    974   1.1  mrg }
    975   1.1  mrg 
    976   1.1  mrg static __inline qword si_gb(qword a)
    977   1.1  mrg {
    978   1.1  mrg   vec_uint4 bits;
    979   1.1  mrg   vec_uint4 bytes;
    980   1.1  mrg 
    981   1.1  mrg   bits  = vec_sl(vec_and((vec_uint4)(a), vec_splat_u32(1)), ((vec_uint4){3, 2, 1, 0}));
    982   1.1  mrg   bytes = (vec_uint4)vec_sums((vec_int4)(bits), ((vec_int4){0}));
    983   1.1  mrg   return ((qword)(vec_sld(bytes, bytes, 12)));
    984   1.1  mrg }
    985   1.1  mrg 
    986   1.1  mrg 
    987   1.1  mrg /* Compare and halt
    988   1.1  mrg  */
    989   1.1  mrg static __inline void si_heq(qword a, qword b)
    990   1.1  mrg {
    991   1.1  mrg   union {
    992   1.1  mrg     vector unsigned int v;
    993   1.1  mrg     unsigned int i[4];
    994   1.1  mrg   } aa, bb;
    995   1.1  mrg 
    996   1.1  mrg   aa.v = (vector unsigned int)(a);
    997   1.1  mrg   bb.v = (vector unsigned int)(b);
    998   1.1  mrg 
    999   1.1  mrg   if (aa.i[0] == bb.i[0]) { SPU_HALT_ACTION; };
   1000   1.1  mrg }
   1001   1.1  mrg 
   1002   1.1  mrg static __inline void si_heqi(qword a, unsigned int b)
   1003   1.1  mrg {
   1004   1.1  mrg   union {
   1005   1.1  mrg     vector unsigned int v;
   1006   1.1  mrg     unsigned int i[4];
   1007   1.1  mrg   } aa;
   1008   1.1  mrg 
   1009   1.1  mrg   aa.v = (vector unsigned int)(a);
   1010   1.1  mrg 
   1011   1.1  mrg   if (aa.i[0] == b) { SPU_HALT_ACTION; };
   1012   1.1  mrg }
   1013   1.1  mrg 
   1014   1.1  mrg static __inline void si_hgt(qword a, qword b)
   1015   1.1  mrg {
   1016   1.1  mrg   union {
   1017   1.1  mrg     vector signed int v;
   1018   1.1  mrg     signed int i[4];
   1019   1.1  mrg   } aa, bb;
   1020   1.1  mrg 
   1021   1.1  mrg   aa.v = (vector signed int)(a);
   1022   1.1  mrg   bb.v = (vector signed int)(b);
   1023   1.1  mrg 
   1024   1.1  mrg   if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
   1025   1.1  mrg }
   1026   1.1  mrg 
   1027   1.1  mrg static __inline void si_hgti(qword a, signed int b)
   1028   1.1  mrg {
   1029   1.1  mrg   union {
   1030   1.1  mrg     vector signed int v;
   1031   1.1  mrg     signed int i[4];
   1032   1.1  mrg   } aa;
   1033   1.1  mrg 
   1034   1.1  mrg   aa.v = (vector signed int)(a);
   1035   1.1  mrg 
   1036   1.1  mrg   if (aa.i[0] > b) { SPU_HALT_ACTION; };
   1037   1.1  mrg }
   1038   1.1  mrg 
   1039   1.1  mrg static __inline void si_hlgt(qword a, qword b)
   1040   1.1  mrg {
   1041   1.1  mrg   union {
   1042   1.1  mrg     vector unsigned int v;
   1043   1.1  mrg     unsigned int i[4];
   1044   1.1  mrg   } aa, bb;
   1045   1.1  mrg 
   1046   1.1  mrg   aa.v = (vector unsigned int)(a);
   1047   1.1  mrg   bb.v = (vector unsigned int)(b);
   1048   1.1  mrg 
   1049   1.1  mrg   if (aa.i[0] > bb.i[0]) { SPU_HALT_ACTION; };
   1050   1.1  mrg }
   1051   1.1  mrg 
   1052   1.1  mrg static __inline void si_hlgti(qword a, unsigned int b)
   1053   1.1  mrg {
   1054   1.1  mrg   union {
   1055   1.1  mrg     vector unsigned int v;
   1056   1.1  mrg     unsigned int i[4];
   1057   1.1  mrg   } aa;
   1058   1.1  mrg 
   1059   1.1  mrg   aa.v = (vector unsigned int)(a);
   1060   1.1  mrg 
   1061   1.1  mrg   if (aa.i[0] > b) { SPU_HALT_ACTION; };
   1062   1.1  mrg }
   1063   1.1  mrg 
   1064   1.1  mrg 
   1065   1.1  mrg /* Multiply and Add
   1066   1.1  mrg  */
   1067   1.1  mrg static __inline qword si_mpya(qword a, qword b, qword c)
   1068   1.1  mrg {
   1069   1.1  mrg   return ((qword)(vec_msum(vec_and((vec_short8)(a),
   1070   1.1  mrg 				   ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})),
   1071   1.1  mrg 			   (vec_short8)(b), (vec_int4)(c))));
   1072   1.1  mrg }
   1073   1.1  mrg 
   1074   1.1  mrg static __inline qword si_fma(qword a, qword b, qword c)
   1075   1.1  mrg {
   1076   1.1  mrg   return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
   1077   1.1  mrg }
   1078   1.1  mrg 
   1079   1.1  mrg static __inline qword si_dfma(qword a, qword b, qword c)
   1080   1.1  mrg {
   1081   1.1  mrg   union {
   1082   1.1  mrg     vec_double2 v;
   1083   1.1  mrg     double d[2];
   1084   1.1  mrg   } aa, bb, cc, dd;
   1085   1.1  mrg 
   1086   1.1  mrg   aa.v = (vec_double2)(a);
   1087   1.1  mrg   bb.v = (vec_double2)(b);
   1088   1.1  mrg   cc.v = (vec_double2)(c);
   1089   1.1  mrg   dd.d[0] = aa.d[0] * bb.d[0] + cc.d[0];
   1090   1.1  mrg   dd.d[1] = aa.d[1] * bb.d[1] + cc.d[1];
   1091   1.1  mrg   return ((qword)(dd.v));
   1092   1.1  mrg }
   1093   1.1  mrg 
   1094   1.1  mrg /* Form Mask
   1095   1.1  mrg  */
   1096   1.1  mrg #define si_fsmbi(_a)	si_fsmb(si_from_int(_a))
   1097   1.1  mrg 
   1098   1.1  mrg static __inline qword si_fsmb(qword a)
   1099   1.1  mrg {
   1100   1.1  mrg   vec_char16 mask;
   1101   1.1  mrg   vec_ushort8 in;
   1102   1.1  mrg 
   1103   1.1  mrg   in = (vec_ushort8)(a);
   1104   1.1  mrg   mask = (vec_char16)(vec_perm(in, in, ((vec_uchar16){2, 2, 2, 2, 2, 2, 2, 2,
   1105   1.1  mrg 					              3, 3, 3, 3, 3, 3, 3, 3})));
   1106   1.1  mrg   return ((qword)(vec_sra(vec_sl(mask, ((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7,
   1107   1.1  mrg 				                      0, 1, 2, 3, 4, 5, 6, 7})),
   1108   1.1  mrg 			  vec_splat_u8(7))));
   1109   1.1  mrg }
   1110   1.1  mrg 
   1111   1.1  mrg 
   1112   1.1  mrg static __inline qword si_fsmh(qword a)
   1113   1.1  mrg {
   1114   1.1  mrg   vec_uchar16 in;
   1115   1.1  mrg   vec_short8 mask;
   1116   1.1  mrg 
   1117   1.1  mrg   in = (vec_uchar16)(a);
   1118   1.1  mrg   mask = (vec_short8)(vec_splat(in, 3));
   1119   1.1  mrg   return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})),
   1120   1.1  mrg 			  vec_splat_u16(15))));
   1121   1.1  mrg }
   1122   1.1  mrg 
   1123   1.1  mrg static __inline qword si_fsm(qword a)
   1124   1.1  mrg {
   1125   1.1  mrg   vec_uchar16 in;
   1126   1.1  mrg   vec_int4 mask;
   1127   1.1  mrg 
   1128   1.1  mrg   in = (vec_uchar16)(a);
   1129   1.1  mrg   mask = (vec_int4)(vec_splat(in, 3));
   1130   1.1  mrg   return ((qword)(vec_sra(vec_sl(mask, ((vec_uint4){28, 29, 30, 31})),
   1131   1.1  mrg 			  ((vec_uint4){31,31,31,31}))));
   1132   1.1  mrg }
   1133   1.1  mrg 
   1134   1.1  mrg /* Move from/to registers
   1135   1.1  mrg  */
   1136   1.1  mrg #define si_fscrrd()		((qword)((vec_uint4){0}))
   1137   1.1  mrg #define si_fscrwr(_a)
   1138   1.1  mrg 
   1139   1.1  mrg #define si_mfspr(_reg)		((qword)((vec_uint4){0}))
   1140   1.1  mrg #define si_mtspr(_reg, _a)
   1141   1.1  mrg 
   1142   1.1  mrg /* Multiply High High Add
   1143   1.1  mrg  */
   1144   1.1  mrg static __inline qword si_mpyhha(qword a, qword b, qword c)
   1145   1.1  mrg {
   1146   1.1  mrg   return ((qword)(vec_add(vec_mule((vec_short8)(a), (vec_short8)(b)), (vec_int4)(c))));
   1147   1.1  mrg }
   1148   1.1  mrg 
   1149   1.1  mrg static __inline qword si_mpyhhau(qword a, qword b, qword c)
   1150   1.1  mrg {
   1151   1.1  mrg   return ((qword)(vec_add(vec_mule((vec_ushort8)(a), (vec_ushort8)(b)), (vec_uint4)(c))));
   1152   1.1  mrg }
   1153   1.1  mrg 
   1154   1.1  mrg /* Multiply Subtract
   1155   1.1  mrg  */
   1156   1.1  mrg static __inline qword si_fms(qword a, qword b, qword c)
   1157   1.1  mrg {
   1158   1.1  mrg   return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b),
   1159   1.1  mrg 			   vec_sub(((vec_float4){0.0f}), (vec_float4)(c)))));
   1160   1.1  mrg }
   1161   1.1  mrg 
   1162   1.1  mrg static __inline qword si_dfms(qword a, qword b, qword c)
   1163   1.1  mrg {
   1164   1.1  mrg   union {
   1165   1.1  mrg     vec_double2 v;
   1166   1.1  mrg     double d[2];
   1167   1.1  mrg   } aa, bb, cc, dd;
   1168   1.1  mrg 
   1169   1.1  mrg   aa.v = (vec_double2)(a);
   1170   1.1  mrg   bb.v = (vec_double2)(b);
   1171   1.1  mrg   cc.v = (vec_double2)(c);
   1172   1.1  mrg   dd.d[0] = aa.d[0] * bb.d[0] - cc.d[0];
   1173   1.1  mrg   dd.d[1] = aa.d[1] * bb.d[1] - cc.d[1];
   1174   1.1  mrg   return ((qword)(dd.v));
   1175   1.1  mrg }
   1176   1.1  mrg 
   1177   1.1  mrg /* Multiply
   1178   1.1  mrg  */
   1179   1.1  mrg static __inline qword si_fm(qword a, qword b)
   1180   1.1  mrg {
   1181   1.1  mrg   return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), ((vec_float4){0.0f}))));
   1182   1.1  mrg }
   1183   1.1  mrg 
   1184   1.1  mrg static __inline qword si_dfm(qword a, qword b)
   1185   1.1  mrg {
   1186   1.1  mrg   union {
   1187   1.1  mrg     vec_double2 v;
   1188   1.1  mrg     double d[2];
   1189   1.1  mrg   } aa, bb, dd;
   1190   1.1  mrg 
   1191   1.1  mrg   aa.v = (vec_double2)(a);
   1192   1.1  mrg   bb.v = (vec_double2)(b);
   1193   1.1  mrg   dd.d[0] = aa.d[0] * bb.d[0];
   1194   1.1  mrg   dd.d[1] = aa.d[1] * bb.d[1];
   1195   1.1  mrg   return ((qword)(dd.v));
   1196   1.1  mrg }
   1197   1.1  mrg 
   1198   1.1  mrg /* Multiply High
   1199   1.1  mrg  */
   1200   1.1  mrg static __inline qword si_mpyh(qword a, qword b)
   1201   1.1  mrg {
   1202   1.1  mrg   vec_uint4 sixteen = (vec_uint4){16, 16, 16, 16};
   1203   1.1  mrg 
   1204   1.1  mrg   return ((qword)(vec_sl(vec_mule((vec_short8)(a), (vec_short8)(vec_sl((vec_uint4)(b), sixteen))), sixteen)));
   1205   1.1  mrg }
   1206   1.1  mrg 
   1207   1.1  mrg 
   1208   1.1  mrg /* Multiply High High
   1209   1.1  mrg  */
   1210   1.1  mrg static __inline qword si_mpyhh(qword a, qword b)
   1211   1.1  mrg {
   1212   1.1  mrg   return ((qword)(vec_mule((vec_short8)(a), (vec_short8)(b))));
   1213   1.1  mrg }
   1214   1.1  mrg 
   1215   1.1  mrg static __inline qword si_mpyhhu(qword a, qword b)
   1216   1.1  mrg {
   1217   1.1  mrg   return ((qword)(vec_mule((vec_ushort8)(a), (vec_ushort8)(b))));
   1218   1.1  mrg }
   1219   1.1  mrg 
   1220   1.1  mrg /* Multiply Odd
   1221   1.1  mrg  */
   1222   1.1  mrg static __inline qword si_mpy(qword a, qword b)
   1223   1.1  mrg {
   1224   1.1  mrg   return ((qword)(vec_mulo((vec_short8)(a), (vec_short8)(b))));
   1225   1.1  mrg }
   1226   1.1  mrg 
   1227   1.1  mrg static __inline qword si_mpyu(qword a, qword b)
   1228   1.1  mrg {
   1229   1.1  mrg   return ((qword)(vec_mulo((vec_ushort8)(a), (vec_ushort8)(b))));
   1230   1.1  mrg }
   1231   1.1  mrg 
   1232   1.1  mrg static __inline qword si_mpyi(qword a, short b)
   1233   1.1  mrg {
   1234   1.1  mrg   return ((qword)(vec_mulo((vec_short8)(a),
   1235   1.1  mrg 			   vec_splat((vec_short8)(si_from_short(b)), 1))));
   1236   1.1  mrg }
   1237   1.1  mrg 
   1238   1.1  mrg static __inline qword si_mpyui(qword a, unsigned short b)
   1239   1.1  mrg {
   1240   1.1  mrg   return ((qword)(vec_mulo((vec_ushort8)(a),
   1241   1.1  mrg 			   vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
   1242   1.1  mrg }
   1243   1.1  mrg 
   1244   1.1  mrg /* Multiply and Shift Right
   1245   1.1  mrg  */
   1246   1.1  mrg static __inline qword si_mpys(qword a, qword b)
   1247   1.1  mrg {
   1248   1.1  mrg   return ((qword)(vec_sra(vec_mulo((vec_short8)(a), (vec_short8)(b)), ((vec_uint4){16,16,16,16}))));
   1249   1.1  mrg }
   1250   1.1  mrg 
   1251   1.1  mrg /* Nand
   1252   1.1  mrg  */
   1253   1.1  mrg static __inline qword si_nand(qword a, qword b)
   1254   1.1  mrg {
   1255   1.1  mrg   vec_uchar16 d;
   1256   1.1  mrg 
   1257   1.1  mrg   d = vec_and((vec_uchar16)(a), (vec_uchar16)(b));
   1258   1.1  mrg   return ((qword)(vec_nor(d, d)));
   1259   1.1  mrg }
   1260   1.1  mrg 
   1261   1.1  mrg /* Negative Multiply Add
   1262   1.1  mrg  */
   1263   1.1  mrg static __inline qword si_dfnma(qword a, qword b, qword c)
   1264   1.1  mrg {
   1265   1.1  mrg   union {
   1266   1.1  mrg     vec_double2 v;
   1267   1.1  mrg     double d[2];
   1268   1.1  mrg   } aa, bb, cc, dd;
   1269   1.1  mrg 
   1270   1.1  mrg   aa.v = (vec_double2)(a);
   1271   1.1  mrg   bb.v = (vec_double2)(b);
   1272   1.1  mrg   cc.v = (vec_double2)(c);
   1273   1.1  mrg   dd.d[0] = -cc.d[0] - aa.d[0] * bb.d[0];
   1274   1.1  mrg   dd.d[1] = -cc.d[1] - aa.d[1] * bb.d[1];
   1275   1.1  mrg   return ((qword)(dd.v));
   1276   1.1  mrg }
   1277   1.1  mrg 
   1278   1.1  mrg /* Negative Multiply and Subtract
   1279   1.1  mrg  */
   1280   1.1  mrg static __inline qword si_fnms(qword a, qword b, qword c)
   1281   1.1  mrg {
   1282   1.1  mrg   return ((qword)(vec_nmsub((vec_float4)(a), (vec_float4)(b), (vec_float4)(c))));
   1283   1.1  mrg }
   1284   1.1  mrg 
   1285   1.1  mrg static __inline qword si_dfnms(qword a, qword b, qword c)
   1286   1.1  mrg {
   1287   1.1  mrg   union {
   1288   1.1  mrg     vec_double2 v;
   1289   1.1  mrg     double d[2];
   1290   1.1  mrg   } aa, bb, cc, dd;
   1291   1.1  mrg 
   1292   1.1  mrg   aa.v = (vec_double2)(a);
   1293   1.1  mrg   bb.v = (vec_double2)(b);
   1294   1.1  mrg   cc.v = (vec_double2)(c);
   1295   1.1  mrg   dd.d[0] = cc.d[0] - aa.d[0] * bb.d[0];
   1296   1.1  mrg   dd.d[1] = cc.d[1] - aa.d[1] * bb.d[1];
   1297   1.1  mrg   return ((qword)(dd.v));
   1298   1.1  mrg }
   1299   1.1  mrg 
   1300   1.1  mrg /* Nor
   1301   1.1  mrg  */
   1302   1.1  mrg static __inline qword si_nor(qword a, qword b)
   1303   1.1  mrg {
   1304   1.1  mrg   return ((qword)(vec_nor((vec_uchar16)(a), (vec_uchar16)(b))));
   1305   1.1  mrg }
   1306   1.1  mrg 
   1307   1.1  mrg /* Or
   1308   1.1  mrg  */
   1309   1.1  mrg static __inline qword si_or(qword a, qword b)
   1310   1.1  mrg {
   1311   1.1  mrg   return ((qword)(vec_or((vec_uchar16)(a), (vec_uchar16)(b))));
   1312   1.1  mrg }
   1313   1.1  mrg 
   1314   1.1  mrg static __inline qword si_orbi(qword a, unsigned char b)
   1315   1.1  mrg {
   1316   1.1  mrg   return ((qword)(vec_or((vec_uchar16)(a),
   1317   1.1  mrg 			 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
   1318   1.1  mrg }
   1319   1.1  mrg 
   1320   1.1  mrg static __inline qword si_orhi(qword a, unsigned short b)
   1321   1.1  mrg {
   1322   1.1  mrg   return ((qword)(vec_or((vec_ushort8)(a),
   1323   1.1  mrg 			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
   1324   1.1  mrg }
   1325   1.1  mrg 
   1326   1.1  mrg static __inline qword si_ori(qword a, unsigned int b)
   1327   1.1  mrg {
   1328   1.1  mrg   return ((qword)(vec_or((vec_uint4)(a),
   1329   1.1  mrg 			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
   1330   1.1  mrg }
   1331   1.1  mrg 
   1332   1.1  mrg /* Or Complement
   1333   1.1  mrg  */
   1334   1.1  mrg static __inline qword si_orc(qword a, qword b)
   1335   1.1  mrg {
   1336   1.1  mrg   return ((qword)(vec_or((vec_uchar16)(a), vec_nor((vec_uchar16)(b), (vec_uchar16)(b)))));
   1337   1.1  mrg }
   1338   1.1  mrg 
   1339   1.1  mrg 
   1340   1.1  mrg /* Or Across
   1341   1.1  mrg  */
   1342   1.1  mrg static __inline qword si_orx(qword a)
   1343   1.1  mrg {
   1344   1.1  mrg   vec_uchar16 tmp;
   1345   1.1  mrg   tmp = (vec_uchar16)(a);
   1346   1.1  mrg   tmp = vec_or(tmp, vec_sld(tmp, tmp, 8));
   1347   1.1  mrg   tmp = vec_or(tmp, vec_sld(tmp, tmp, 4));
   1348   1.1  mrg   return ((qword)(vec_and(tmp, ((vec_uchar16){0xFF,0xFF,0xFF,0xFF, 0x00,0x00,0x00,0x00,
   1349   1.1  mrg 				              0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}))));
   1350   1.1  mrg }
   1351   1.1  mrg 
   1352   1.1  mrg 
   1353   1.1  mrg /* Estimates
   1354   1.1  mrg  */
   1355   1.1  mrg static __inline qword si_frest(qword a)
   1356   1.1  mrg {
   1357   1.1  mrg   return ((qword)(vec_re((vec_float4)(a))));
   1358   1.1  mrg }
   1359   1.1  mrg 
   1360   1.1  mrg static __inline qword si_frsqest(qword a)
   1361   1.1  mrg {
   1362   1.1  mrg   return ((qword)(vec_rsqrte((vec_float4)(a))));
   1363   1.1  mrg }
   1364   1.1  mrg 
   1365   1.1  mrg #define si_fi(_a, _d)		(_d)
   1366   1.1  mrg 
   1367   1.1  mrg /* Channel Read and Write
   1368   1.1  mrg  */
   1369   1.1  mrg #define si_rdch(_channel)		((qword)(vec_splat_u8(0)))	/* not mappable */
   1370   1.1  mrg #define si_rchcnt(_channel)		((qword)(vec_splat_u8(0)))	/* not mappable */
   1371   1.1  mrg #define si_wrch(_channel, _a)		/* not mappable */
   1372   1.1  mrg 
   1373   1.1  mrg /* Rotate Left
   1374   1.1  mrg  */
   1375   1.1  mrg static __inline qword si_roth(qword a, qword b)
   1376   1.1  mrg {
   1377   1.1  mrg   return ((qword)(vec_rl((vec_ushort8)(a), (vec_ushort8)(b))));
   1378   1.1  mrg }
   1379   1.1  mrg 
   1380   1.1  mrg static __inline qword si_rot(qword a, qword b)
   1381   1.1  mrg {
   1382   1.1  mrg   return ((qword)(vec_rl((vec_uint4)(a), (vec_uint4)(b))));
   1383   1.1  mrg }
   1384   1.1  mrg 
   1385   1.1  mrg static __inline qword si_rothi(qword a, int b)
   1386   1.1  mrg {
   1387   1.1  mrg   return ((qword)(vec_rl((vec_ushort8)(a),
   1388   1.1  mrg 			 vec_splat((vec_ushort8)(si_from_int(b)), 1))));
   1389   1.1  mrg }
   1390   1.1  mrg 
   1391   1.1  mrg static __inline qword si_roti(qword a, int b)
   1392   1.1  mrg {
   1393   1.1  mrg   return ((qword)(vec_rl((vec_uint4)(a),
   1394   1.1  mrg 			 vec_splat((vec_uint4)(si_from_int(b)), 0))));
   1395   1.1  mrg }
   1396   1.1  mrg 
   1397   1.1  mrg /* Rotate Left with Mask
   1398   1.1  mrg  */
   1399   1.1  mrg static __inline qword si_rothm(qword a, qword b)
   1400   1.1  mrg {
   1401   1.1  mrg   vec_ushort8 neg_b;
   1402   1.1  mrg   vec_ushort8 mask;
   1403   1.1  mrg 
   1404   1.1  mrg   neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
   1405   1.1  mrg   mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
   1406   1.1  mrg   return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
   1407   1.1  mrg }
   1408   1.1  mrg 
   1409   1.1  mrg static __inline qword si_rotm(qword a, qword b)
   1410   1.1  mrg {
   1411   1.1  mrg   vec_uint4 neg_b;
   1412   1.1  mrg   vec_uint4 mask;
   1413   1.1  mrg 
   1414   1.1  mrg   neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
   1415   1.1  mrg   mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
   1416   1.1  mrg   return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
   1417   1.1  mrg }
   1418   1.1  mrg 
   1419   1.1  mrg static __inline qword si_rothmi(qword a, int b)
   1420   1.1  mrg {
   1421   1.1  mrg   vec_ushort8 neg_b;
   1422   1.1  mrg   vec_ushort8 mask;
   1423   1.1  mrg 
   1424   1.1  mrg   neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
   1425   1.1  mrg   mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
   1426   1.1  mrg   return ((qword)(vec_andc(vec_sr((vec_ushort8)(a), neg_b), mask)));
   1427   1.1  mrg }
   1428   1.1  mrg 
   1429   1.1  mrg static __inline qword si_rotmi(qword a, int b)
   1430   1.1  mrg {
   1431   1.1  mrg   vec_uint4 neg_b;
   1432   1.1  mrg   vec_uint4 mask;
   1433   1.1  mrg 
   1434   1.1  mrg   neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
   1435   1.1  mrg   mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
   1436   1.1  mrg   return ((qword)(vec_andc(vec_sr((vec_uint4)(a), neg_b), mask)));
   1437   1.1  mrg }
   1438   1.1  mrg 
   1439   1.1  mrg 
   1440   1.1  mrg /* Rotate Left Algebraic with Mask
   1441   1.1  mrg  */
   1442   1.1  mrg static __inline qword si_rotmah(qword a, qword b)
   1443   1.1  mrg {
   1444   1.1  mrg   vec_ushort8 neg_b;
   1445   1.1  mrg   vec_ushort8 mask;
   1446   1.1  mrg 
   1447   1.1  mrg   neg_b = (vec_ushort8)vec_sub(vec_splat_s16(0), (vec_short8)(b));
   1448   1.1  mrg   mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
   1449   1.1  mrg   return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
   1450   1.1  mrg }
   1451   1.1  mrg 
   1452   1.1  mrg static __inline qword si_rotma(qword a, qword b)
   1453   1.1  mrg {
   1454   1.1  mrg   vec_uint4 neg_b;
   1455   1.1  mrg   vec_uint4 mask;
   1456   1.1  mrg 
   1457   1.1  mrg   neg_b = (vec_uint4)vec_sub(vec_splat_s32(0), (vec_int4)(b));
   1458   1.1  mrg   mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
   1459   1.1  mrg   return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
   1460   1.1  mrg }
   1461   1.1  mrg 
   1462   1.1  mrg 
   1463   1.1  mrg static __inline qword si_rotmahi(qword a, int b)
   1464   1.1  mrg {
   1465   1.1  mrg   vec_ushort8 neg_b;
   1466   1.1  mrg   vec_ushort8 mask;
   1467   1.1  mrg 
   1468   1.1  mrg   neg_b = vec_splat((vec_ushort8)(si_from_int(-b)), 1);
   1469   1.1  mrg   mask = vec_sra(vec_sl(neg_b, vec_splat_u16(11)), vec_splat_u16(15));
   1470   1.1  mrg   return ((qword)(vec_sra((vec_short8)(a), (vec_ushort8)vec_or(neg_b, mask))));
   1471   1.1  mrg }
   1472   1.1  mrg 
   1473   1.1  mrg static __inline qword si_rotmai(qword a, int b)
   1474   1.1  mrg {
   1475   1.1  mrg   vec_uint4 neg_b;
   1476   1.1  mrg   vec_uint4 mask;
   1477   1.1  mrg 
   1478   1.1  mrg   neg_b = vec_splat((vec_uint4)(si_from_int(-b)), 0);
   1479   1.1  mrg   mask = vec_sra(vec_sl(neg_b, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
   1480   1.1  mrg   return ((qword)(vec_sra((vec_int4)(a), (vec_uint4)vec_or(neg_b, mask))));
   1481   1.1  mrg }
   1482   1.1  mrg 
   1483   1.1  mrg 
   1484   1.1  mrg /* Rotate Left Quadword by Bytes with Mask
   1485   1.1  mrg  */
   1486   1.1  mrg static __inline qword si_rotqmbyi(qword a, int count)
   1487   1.1  mrg {
   1488   1.1  mrg   union {
   1489   1.1  mrg     vec_uchar16 v;
   1490   1.1  mrg     int i[4];
   1491   1.1  mrg   } x;
   1492   1.1  mrg   vec_uchar16 mask;
   1493   1.1  mrg 
   1494   1.1  mrg   count = 0 - count;
   1495   1.1  mrg   x.i[3] = count << 3;
   1496   1.1  mrg   mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
   1497   1.1  mrg 
   1498   1.1  mrg   return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
   1499   1.1  mrg }
   1500   1.1  mrg 
   1501   1.1  mrg 
   1502   1.1  mrg static __inline qword si_rotqmby(qword a, qword count)
   1503   1.1  mrg {
   1504   1.1  mrg   union {
   1505   1.1  mrg     vec_uchar16 v;
   1506   1.1  mrg     int i[4];
   1507   1.1  mrg   } x;
   1508   1.1  mrg   int cnt;
   1509   1.1  mrg   vec_uchar16 mask;
   1510   1.1  mrg 
   1511   1.1  mrg   x.v = (vec_uchar16)(count);
   1512   1.1  mrg   x.i[0] = cnt = (0 - x.i[0]) << 3;
   1513   1.1  mrg 
   1514   1.1  mrg   x.v = vec_splat(x.v, 3);
   1515   1.1  mrg   mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
   1516   1.1  mrg 
   1517   1.1  mrg   return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
   1518   1.1  mrg }
   1519   1.1  mrg 
   1520   1.1  mrg 
   1521   1.1  mrg /* Rotate Left Quadword by Bytes
   1522   1.1  mrg  */
   1523   1.1  mrg static __inline qword si_rotqbyi(qword a, int count)
   1524   1.1  mrg {
   1525   1.1  mrg   union {
   1526   1.1  mrg     vec_uchar16 v;
   1527   1.1  mrg     int i[4];
   1528   1.1  mrg   } left, right;
   1529   1.1  mrg 
   1530   1.1  mrg   count <<= 3;
   1531   1.1  mrg   left.i[3] = count;
   1532   1.1  mrg   right.i[3] = 0 - count;
   1533   1.1  mrg   return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left.v), vec_sro((vec_uchar16)(a), right.v))));
   1534   1.1  mrg }
   1535   1.1  mrg 
   1536   1.1  mrg static __inline qword si_rotqby(qword a, qword count)
   1537   1.1  mrg {
   1538   1.1  mrg   vec_uchar16 left, right;
   1539   1.1  mrg 
   1540   1.1  mrg   left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
   1541   1.1  mrg   right = vec_sub(vec_splat_u8(0), left);
   1542   1.1  mrg   return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
   1543   1.1  mrg }
   1544   1.1  mrg 
   1545   1.1  mrg /* Rotate Left Quadword by Bytes Bit Count
   1546   1.1  mrg  */
   1547   1.1  mrg static __inline qword si_rotqbybi(qword a, qword count)
   1548   1.1  mrg {
   1549   1.1  mrg   vec_uchar16 left, right;
   1550   1.1  mrg 
   1551   1.1  mrg   left = vec_splat((vec_uchar16)(count), 3);
   1552   1.1  mrg   right = vec_sub(vec_splat_u8(7), left);
   1553   1.1  mrg   return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
   1554   1.1  mrg }
   1555   1.1  mrg 
   1556   1.1  mrg 
   1557   1.1  mrg /* Rotate Left Quadword by Bytes Bit Count
   1558   1.1  mrg  */
   1559   1.1  mrg static __inline qword si_rotqbii(qword a, int count)
   1560   1.1  mrg {
   1561   1.1  mrg   vec_uchar16 x, y;
   1562   1.1  mrg   vec_uchar16 result;
   1563   1.1  mrg 
   1564   1.1  mrg   x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3);
   1565   1.1  mrg   y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
   1566   1.1  mrg 			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
   1567   1.1  mrg   result = vec_or(vec_sll((qword)(a), x), y);
   1568   1.1  mrg   return ((qword)(result));
   1569   1.1  mrg }
   1570   1.1  mrg 
   1571   1.1  mrg static __inline qword si_rotqbi(qword a, qword count)
   1572   1.1  mrg {
   1573   1.1  mrg   vec_uchar16 x, y;
   1574   1.1  mrg   vec_uchar16 result;
   1575   1.1  mrg 
   1576   1.1  mrg   x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7));
   1577   1.1  mrg   y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
   1578   1.1  mrg 			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
   1579   1.1  mrg 
   1580   1.1  mrg   result = vec_or(vec_sll((qword)(a), x), y);
   1581   1.1  mrg   return ((qword)(result));
   1582   1.1  mrg }
   1583   1.1  mrg 
   1584   1.1  mrg 
   1585   1.1  mrg /* Rotate Left Quadword and Mask by Bits
   1586   1.1  mrg  */
   1587   1.1  mrg static __inline qword si_rotqmbii(qword a, int count)
   1588   1.1  mrg {
   1589   1.1  mrg   return ((qword)(vec_srl((vec_uchar16)(a), vec_splat((vec_uchar16)(si_from_int(0 - count)), 3))));
   1590   1.1  mrg }
   1591   1.1  mrg 
   1592   1.1  mrg static __inline qword si_rotqmbi(qword a, qword count)
   1593   1.1  mrg {
   1594   1.1  mrg   return ((qword)(vec_srl((vec_uchar16)(a), vec_sub(vec_splat_u8(0), vec_splat((vec_uchar16)(count), 3)))));
   1595   1.1  mrg }
   1596   1.1  mrg 
   1597   1.1  mrg 
   1598   1.1  mrg /* Rotate Left Quadword and Mask by Bytes with Bit Count
   1599   1.1  mrg  */
   1600   1.1  mrg static __inline qword si_rotqmbybi(qword a, qword count)
   1601   1.1  mrg {
   1602   1.1  mrg   union {
   1603   1.1  mrg     vec_uchar16 v;
   1604   1.1  mrg     int i[4];
   1605   1.1  mrg   } x;
   1606   1.1  mrg   int cnt;
   1607   1.1  mrg   vec_uchar16 mask;
   1608   1.1  mrg 
   1609   1.1  mrg   x.v = (vec_uchar16)(count);
   1610   1.1  mrg   x.i[0] = cnt = 0 - (x.i[0] & ~7);
   1611   1.1  mrg   x.v = vec_splat(x.v, 3);
   1612   1.1  mrg   mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
   1613   1.1  mrg 
   1614   1.1  mrg   return ((qword)(vec_and(vec_sro((vec_uchar16)(a), x.v), mask)));
   1615   1.1  mrg }
   1616   1.1  mrg 
   1617   1.1  mrg 
   1618   1.1  mrg 
   1619   1.1  mrg 
   1620   1.1  mrg /* Round Double to Float
   1621   1.1  mrg  */
   1622   1.1  mrg static __inline qword si_frds(qword a)
   1623   1.1  mrg {
   1624   1.1  mrg   union {
   1625   1.1  mrg     vec_float4 v;
   1626   1.1  mrg     float f[4];
   1627   1.1  mrg   } d;
   1628   1.1  mrg   union {
   1629   1.1  mrg     vec_double2 v;
   1630   1.1  mrg     double d[2];
   1631   1.1  mrg   } in;
   1632   1.1  mrg 
   1633   1.1  mrg   in.v = (vec_double2)(a);
   1634   1.1  mrg   d.v = (vec_float4){0.0f};
   1635   1.1  mrg   d.f[0] = (float)in.d[0];
   1636   1.1  mrg   d.f[2] = (float)in.d[1];
   1637   1.1  mrg 
   1638   1.1  mrg   return ((qword)(d.v));
   1639   1.1  mrg }
   1640   1.1  mrg 
   1641   1.1  mrg /* Select Bits
   1642   1.1  mrg  */
   1643   1.1  mrg static __inline qword si_selb(qword a, qword b, qword c)
   1644   1.1  mrg {
   1645   1.1  mrg   return ((qword)(vec_sel((vec_uchar16)(a), (vec_uchar16)(b), (vec_uchar16)(c))));
   1646   1.1  mrg }
   1647   1.1  mrg 
   1648   1.1  mrg 
   1649   1.1  mrg /* Shuffle Bytes
   1650   1.1  mrg  */
   1651   1.1  mrg static __inline qword si_shufb(qword a, qword b, qword pattern)
   1652   1.1  mrg {
   1653   1.1  mrg   vec_uchar16 pat;
   1654   1.1  mrg 
   1655   1.1  mrg   pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}),
   1656   1.1  mrg 		vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)),
   1657   1.1  mrg 		vec_sra((vec_uchar16)(pattern), vec_splat_u8(7)));
   1658   1.1  mrg   return ((qword)(vec_perm(vec_perm(a, b, pattern),
   1659   1.1  mrg 			   ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0,
   1660   1.1  mrg 				          0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),
   1661   1.1  mrg 			   pat)));
   1662   1.1  mrg }
   1663   1.1  mrg 
   1664   1.1  mrg 
   1665   1.1  mrg /* Shift Left
   1666   1.1  mrg  */
   1667   1.1  mrg static __inline qword si_shlh(qword a, qword b)
   1668   1.1  mrg {
   1669   1.1  mrg   vec_ushort8 mask;
   1670   1.1  mrg 
   1671   1.1  mrg   mask = (vec_ushort8)vec_sra(vec_sl((vec_ushort8)(b), vec_splat_u16(11)), vec_splat_u16(15));
   1672   1.1  mrg   return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), (vec_ushort8)(b)), mask)));
   1673   1.1  mrg }
   1674   1.1  mrg 
   1675   1.1  mrg static __inline qword si_shl(qword a, qword b)
   1676   1.1  mrg {
   1677   1.1  mrg   vec_uint4 mask;
   1678   1.1  mrg 
   1679   1.1  mrg   mask = (vec_uint4)vec_sra(vec_sl((vec_uint4)(b), ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
   1680   1.1  mrg   return ((qword)(vec_andc(vec_sl((vec_uint4)(a), (vec_uint4)(b)), mask)));
   1681   1.1  mrg }
   1682   1.1  mrg 
   1683   1.1  mrg 
   1684   1.1  mrg static __inline qword si_shlhi(qword a, unsigned int b)
   1685   1.1  mrg {
   1686   1.1  mrg   vec_ushort8 mask;
   1687   1.1  mrg   vec_ushort8 bv;
   1688   1.1  mrg 
   1689   1.1  mrg   bv = vec_splat((vec_ushort8)(si_from_int(b)), 1);
   1690   1.1  mrg   mask = (vec_ushort8)vec_sra(vec_sl(bv, vec_splat_u16(11)), vec_splat_u16(15));
   1691   1.1  mrg   return ((qword)(vec_andc(vec_sl((vec_ushort8)(a), bv), mask)));
   1692   1.1  mrg }
   1693   1.1  mrg 
   1694   1.1  mrg static __inline qword si_shli(qword a, unsigned int b)
   1695   1.1  mrg {
   1696   1.1  mrg   vec_uint4 bv;
   1697   1.1  mrg   vec_uint4 mask;
   1698   1.1  mrg 
   1699   1.1  mrg   bv = vec_splat((vec_uint4)(si_from_uint(b)), 0);
   1700   1.1  mrg   mask = (vec_uint4)vec_sra(vec_sl(bv, ((vec_uint4){26,26,26,26})), ((vec_uint4){31,31,31,31}));
   1701   1.1  mrg   return ((qword)(vec_andc(vec_sl((vec_uint4)(a), bv), mask)));
   1702   1.1  mrg }
   1703   1.1  mrg 
   1704   1.1  mrg 
   1705   1.1  mrg /* Shift Left Quadword
   1706   1.1  mrg  */
   1707   1.1  mrg static __inline qword si_shlqbii(qword a, unsigned int count)
   1708   1.1  mrg {
   1709   1.1  mrg   vec_uchar16 x;
   1710   1.1  mrg 
   1711   1.1  mrg   x = vec_splat((vec_uchar16)(si_from_uint(count)), 3);
   1712   1.1  mrg   return ((qword)(vec_sll((vec_uchar16)(a), x)));
   1713   1.1  mrg }
   1714   1.1  mrg 
   1715   1.1  mrg static __inline qword si_shlqbi(qword a, qword count)
   1716   1.1  mrg {
   1717   1.1  mrg   vec_uchar16 x;
   1718   1.1  mrg 
   1719   1.1  mrg   x = vec_splat((vec_uchar16)(count), 3);
   1720   1.1  mrg   return ((qword)(vec_sll((vec_uchar16)(a), x)));
   1721   1.1  mrg }
   1722   1.1  mrg 
   1723   1.1  mrg 
   1724   1.1  mrg /* Shift Left Quadword by Bytes
   1725   1.1  mrg  */
   1726   1.1  mrg static __inline qword si_shlqbyi(qword a, unsigned int count)
   1727   1.1  mrg {
   1728   1.1  mrg   union {
   1729   1.1  mrg     vec_uchar16 v;
   1730   1.1  mrg     int i[4];
   1731   1.1  mrg   } x;
   1732   1.1  mrg   vec_uchar16 mask;
   1733   1.1  mrg 
   1734   1.1  mrg   x.i[3] = count << 3;
   1735   1.1  mrg   mask = (count & 0x10) ? vec_splat_u8(0) : vec_splat_u8(-1);
   1736   1.1  mrg   return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
   1737   1.1  mrg }
   1738   1.1  mrg 
   1739   1.1  mrg static __inline qword si_shlqby(qword a, qword count)
   1740   1.1  mrg {
   1741   1.1  mrg   union {
   1742   1.1  mrg     vec_uchar16 v;
   1743   1.1  mrg     unsigned int i[4];
   1744   1.1  mrg   } x;
   1745   1.1  mrg   unsigned int cnt;
   1746   1.1  mrg   vec_uchar16 mask;
   1747   1.1  mrg 
   1748   1.1  mrg   x.v = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
   1749   1.1  mrg   cnt = x.i[0];
   1750   1.1  mrg   mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
   1751   1.1  mrg   return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
   1752   1.1  mrg }
   1753   1.1  mrg 
   1754   1.1  mrg /* Shift Left Quadword by Bytes with Bit Count
   1755   1.1  mrg  */
   1756   1.1  mrg static __inline qword si_shlqbybi(qword a, qword count)
   1757   1.1  mrg {
   1758   1.1  mrg   union {
   1759   1.1  mrg     vec_uchar16 v;
   1760   1.1  mrg     int i[4];
   1761   1.1  mrg   } x;
   1762   1.1  mrg   unsigned int cnt;
   1763   1.1  mrg   vec_uchar16 mask;
   1764   1.1  mrg 
   1765   1.1  mrg   x.v = vec_splat((vec_uchar16)(count), 3);
   1766   1.1  mrg   cnt = x.i[0];
   1767   1.1  mrg   mask = (cnt & 0x80) ? vec_splat_u8(0) : vec_splat_u8(-1);
   1768   1.1  mrg   return ((qword)(vec_and(vec_slo((vec_uchar16)(a), x.v), mask)));
   1769   1.1  mrg }
   1770   1.1  mrg 
   1771   1.1  mrg 
   1772   1.1  mrg /* Stop and Signal
   1773   1.1  mrg  */
   1774   1.1  mrg #define si_stop(_type)		SPU_STOP_ACTION
   1775   1.1  mrg #define si_stopd(a, b, c)	SPU_STOP_ACTION
   1776   1.1  mrg 
   1777   1.1  mrg 
   1778   1.1  mrg /* Subtract
   1779   1.1  mrg  */
   1780   1.1  mrg static __inline qword si_sfh(qword a, qword b)
   1781   1.1  mrg {
   1782   1.1  mrg   return ((qword)(vec_sub((vec_ushort8)(b), (vec_ushort8)(a))));
   1783   1.1  mrg }
   1784   1.1  mrg 
   1785   1.1  mrg static __inline qword si_sf(qword a, qword b)
   1786   1.1  mrg {
   1787   1.1  mrg   return ((qword)(vec_sub((vec_uint4)(b), (vec_uint4)(a))));
   1788   1.1  mrg }
   1789   1.1  mrg 
   1790   1.1  mrg static __inline qword si_fs(qword a, qword b)
   1791   1.1  mrg {
   1792   1.1  mrg   return ((qword)(vec_sub((vec_float4)(a), (vec_float4)(b))));
   1793   1.1  mrg }
   1794   1.1  mrg 
   1795   1.1  mrg static __inline qword si_dfs(qword a, qword b)
   1796   1.1  mrg {
   1797   1.1  mrg   union {
   1798   1.1  mrg     vec_double2 v;
   1799   1.1  mrg     double d[2];
   1800   1.1  mrg   } aa, bb, dd;
   1801   1.1  mrg 
   1802   1.1  mrg   aa.v = (vec_double2)(a);
   1803   1.1  mrg   bb.v = (vec_double2)(b);
   1804   1.1  mrg   dd.d[0] = aa.d[0] - bb.d[0];
   1805   1.1  mrg   dd.d[1] = aa.d[1] - bb.d[1];
   1806   1.1  mrg   return ((qword)(dd.v));
   1807   1.1  mrg }
   1808   1.1  mrg 
   1809   1.1  mrg static __inline qword si_sfhi(qword a, short b)
   1810   1.1  mrg {
   1811   1.1  mrg   return ((qword)(vec_sub(vec_splat((vec_short8)(si_from_short(b)), 1),
   1812   1.1  mrg 			  (vec_short8)(a))));
   1813   1.1  mrg }
   1814   1.1  mrg 
   1815   1.1  mrg static __inline qword si_sfi(qword a, int b)
   1816   1.1  mrg {
   1817   1.1  mrg   return ((qword)(vec_sub(vec_splat((vec_int4)(si_from_int(b)), 0),
   1818   1.1  mrg 			  (vec_int4)(a))));
   1819   1.1  mrg }
   1820   1.1  mrg 
   1821   1.1  mrg /* Subtract word extended
   1822   1.1  mrg  */
   1823   1.1  mrg #define si_sfx(_a, _b, _c)	((qword)(vec_add(vec_add((vec_uint4)(_b), 				\
   1824   1.1  mrg 							 vec_nor((vec_uint4)(_a), (vec_uint4)(_a))), 	\
   1825   1.1  mrg 						 vec_and((vec_uint4)(_c), vec_splat_u32(1)))))
   1826   1.1  mrg 
   1827   1.1  mrg 
   1828   1.1  mrg /* Sum Bytes into Shorts
   1829   1.1  mrg  */
   1830   1.1  mrg static __inline qword si_sumb(qword a, qword b)
   1831   1.1  mrg {
   1832   1.1  mrg   vec_uint4 zero = (vec_uint4){0};
   1833   1.1  mrg   vec_ushort8 sum_a, sum_b;
   1834   1.1  mrg 
   1835   1.1  mrg   sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero);
   1836   1.1  mrg   sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero);
   1837   1.1  mrg 
   1838   1.1  mrg   return ((qword)(vec_perm(sum_a, sum_b, ((vec_uchar16){18, 19,  2,  3, 22, 23,  6,  7,
   1839   1.1  mrg 					                26, 27, 10, 11, 30, 31, 14, 15}))));
   1840   1.1  mrg }
   1841   1.1  mrg 
   1842   1.1  mrg /* Exclusive OR
   1843   1.1  mrg  */
   1844   1.1  mrg static __inline qword si_xor(qword a, qword b)
   1845   1.1  mrg {
   1846   1.1  mrg   return ((qword)(vec_xor((vec_uchar16)(a), (vec_uchar16)(b))));
   1847   1.1  mrg }
   1848   1.1  mrg 
   1849   1.1  mrg static __inline qword si_xorbi(qword a, unsigned char b)
   1850   1.1  mrg {
   1851   1.1  mrg   return ((qword)(vec_xor((vec_uchar16)(a),
   1852   1.1  mrg 			  vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
   1853   1.1  mrg }
   1854   1.1  mrg 
   1855   1.1  mrg static __inline qword si_xorhi(qword a, unsigned short b)
   1856   1.1  mrg {
   1857   1.1  mrg   return ((qword)(vec_xor((vec_ushort8)(a),
   1858   1.1  mrg 			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
   1859   1.1  mrg }
   1860   1.1  mrg 
   1861   1.1  mrg static __inline qword si_xori(qword a, unsigned int b)
   1862   1.1  mrg {
   1863   1.1  mrg   return ((qword)(vec_xor((vec_uint4)(a),
   1864   1.1  mrg 			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
   1865   1.1  mrg }
   1866   1.1  mrg 
   1867   1.1  mrg 
   1868   1.1  mrg /* Generate Controls for Sub-Quadword Insertion
   1869   1.1  mrg  */
   1870   1.1  mrg static __inline qword si_cbd(qword a, int imm)
   1871   1.1  mrg {
   1872   1.1  mrg   union {
   1873   1.1  mrg     vec_uint4 v;
   1874   1.1  mrg     unsigned char c[16];
   1875   1.1  mrg   } shmask;
   1876   1.1  mrg 
   1877   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1878   1.1  mrg   shmask.c[(si_to_uint(a) + (unsigned int)(imm)) & 0xF] = 0x03;
   1879   1.1  mrg   return ((qword)(shmask.v));
   1880   1.1  mrg }
   1881   1.1  mrg 
   1882   1.1  mrg static __inline qword si_cdd(qword a, int imm)
   1883   1.1  mrg {
   1884   1.1  mrg   union {
   1885   1.1  mrg     vec_uint4 v;
   1886   1.1  mrg     unsigned long long ll[2];
   1887   1.1  mrg   } shmask;
   1888   1.1  mrg 
   1889   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1890   1.1  mrg   shmask.ll[((si_to_uint(a) + (unsigned int)(imm)) >> 3) & 0x1] = 0x0001020304050607ULL;
   1891   1.1  mrg   return ((qword)(shmask.v));
   1892   1.1  mrg }
   1893   1.1  mrg 
   1894   1.1  mrg static __inline qword si_chd(qword a, int imm)
   1895   1.1  mrg {
   1896   1.1  mrg   union {
   1897   1.1  mrg     vec_uint4 v;
   1898   1.1  mrg     unsigned short s[8];
   1899   1.1  mrg   } shmask;
   1900   1.1  mrg 
   1901   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1902   1.1  mrg   shmask.s[((si_to_uint(a) + (unsigned int)(imm)) >> 1) & 0x7] = 0x0203;
   1903   1.1  mrg   return ((qword)(shmask.v));
   1904   1.1  mrg }
   1905   1.1  mrg 
   1906   1.1  mrg static __inline qword si_cwd(qword a, int imm)
   1907   1.1  mrg {
   1908   1.1  mrg   union {
   1909   1.1  mrg     vec_uint4 v;
   1910   1.1  mrg     unsigned int i[4];
   1911   1.1  mrg   } shmask;
   1912   1.1  mrg 
   1913   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1914   1.1  mrg   shmask.i[((si_to_uint(a) + (unsigned int)(imm)) >> 2) & 0x3] = 0x00010203;
   1915   1.1  mrg   return ((qword)(shmask.v));
   1916   1.1  mrg }
   1917   1.1  mrg 
   1918   1.1  mrg static __inline qword si_cbx(qword a, qword b)
   1919   1.1  mrg {
   1920   1.1  mrg   union {
   1921   1.1  mrg     vec_uint4 v;
   1922   1.1  mrg     unsigned char c[16];
   1923   1.1  mrg   } shmask;
   1924   1.1  mrg 
   1925   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1926   1.1  mrg   shmask.c[si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) & 0xF] = 0x03;
   1927   1.1  mrg   return ((qword)(shmask.v));
   1928   1.1  mrg }
   1929   1.1  mrg 
   1930   1.1  mrg 
   1931   1.1  mrg static __inline qword si_cdx(qword a, qword b)
   1932   1.1  mrg {
   1933   1.1  mrg   union {
   1934   1.1  mrg     vec_uint4 v;
   1935   1.1  mrg     unsigned long long ll[2];
   1936   1.1  mrg   } shmask;
   1937   1.1  mrg 
   1938   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1939   1.1  mrg   shmask.ll[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 3) & 0x1] = 0x0001020304050607ULL;
   1940   1.1  mrg   return ((qword)(shmask.v));
   1941   1.1  mrg }
   1942   1.1  mrg 
   1943   1.1  mrg static __inline qword si_chx(qword a, qword b)
   1944   1.1  mrg {
   1945   1.1  mrg   union {
   1946   1.1  mrg     vec_uint4 v;
   1947   1.1  mrg     unsigned short s[8];
   1948   1.1  mrg   } shmask;
   1949   1.1  mrg 
   1950   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1951   1.1  mrg   shmask.s[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 1) & 0x7] = 0x0203;
   1952   1.1  mrg   return ((qword)(shmask.v));
   1953   1.1  mrg }
   1954   1.1  mrg 
   1955   1.1  mrg static __inline qword si_cwx(qword a, qword b)
   1956   1.1  mrg {
   1957   1.1  mrg   union {
   1958   1.1  mrg     vec_uint4 v;
   1959   1.1  mrg     unsigned int i[4];
   1960   1.1  mrg   } shmask;
   1961   1.1  mrg 
   1962   1.1  mrg   shmask.v = ((vec_uint4){0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F});
   1963   1.1  mrg   shmask.i[(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))) >> 2) & 0x3] = 0x00010203;
   1964   1.1  mrg   return ((qword)(shmask.v));
   1965   1.1  mrg }
   1966   1.1  mrg 
   1967   1.1  mrg 
   1968   1.1  mrg /* Constant Formation
   1969   1.1  mrg  */
   1970   1.1  mrg static __inline qword si_il(signed short imm)
   1971   1.1  mrg {
   1972   1.1  mrg   return ((qword)(vec_splat((vec_int4)(si_from_int((signed int)(imm))), 0)));
   1973   1.1  mrg }
   1974   1.1  mrg 
   1975   1.1  mrg 
   1976   1.1  mrg static __inline qword si_ila(unsigned int imm)
   1977   1.1  mrg {
   1978   1.1  mrg   return ((qword)(vec_splat((vec_uint4)(si_from_uint(imm)), 0)));
   1979   1.1  mrg }
   1980   1.1  mrg 
   1981   1.1  mrg static __inline qword si_ilh(signed short imm)
   1982   1.1  mrg {
   1983   1.1  mrg   return ((qword)(vec_splat((vec_short8)(si_from_short(imm)), 1)));
   1984   1.1  mrg }
   1985   1.1  mrg 
   1986   1.1  mrg static __inline qword si_ilhu(signed short imm)
   1987   1.1  mrg {
   1988   1.1  mrg   return ((qword)(vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm) << 16)), 0)));
   1989   1.1  mrg }
   1990   1.1  mrg 
   1991   1.1  mrg static __inline qword si_iohl(qword a, unsigned short imm)
   1992   1.1  mrg {
   1993   1.1  mrg   return ((qword)(vec_or((vec_uint4)(a), vec_splat((vec_uint4)(si_from_uint((unsigned int)(imm))), 0))));
   1994   1.1  mrg }
   1995   1.1  mrg 
   1996   1.1  mrg /* No Operation
   1997   1.1  mrg  */
   1998   1.1  mrg #define si_lnop()		/* do nothing */
   1999   1.1  mrg #define si_nop()		/* do nothing */
   2000   1.1  mrg 
   2001   1.1  mrg 
   2002   1.1  mrg /* Memory Load and Store
   2003   1.1  mrg  */
   2004   1.1  mrg static __inline qword si_lqa(unsigned int imm)
   2005   1.1  mrg {
   2006   1.1  mrg   return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
   2007   1.1  mrg }
   2008   1.1  mrg 
   2009   1.1  mrg static __inline qword si_lqd(qword a, unsigned int imm)
   2010   1.1  mrg {
   2011   1.1  mrg   return ((qword)(vec_ld(si_to_uint(a) & ~0xF, (vector unsigned char *)(imm))));
   2012   1.1  mrg }
   2013   1.1  mrg 
   2014   1.1  mrg static __inline qword si_lqr(unsigned int imm)
   2015   1.1  mrg {
   2016   1.1  mrg   return ((qword)(vec_ld(0, (vector unsigned char *)(imm))));
   2017   1.1  mrg }
   2018   1.1  mrg 
   2019   1.1  mrg static __inline qword si_lqx(qword a, qword b)
   2020   1.1  mrg {
   2021   1.1  mrg   return ((qword)(vec_ld(si_to_uint((qword)(vec_add((vec_uint4)(a), (vec_uint4)(b)))), (vector unsigned char *)(0))));
   2022   1.1  mrg }
   2023   1.1  mrg 
   2024   1.1  mrg static __inline void si_stqa(qword a, unsigned int imm)
   2025   1.1  mrg {
   2026   1.1  mrg   vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
   2027   1.1  mrg }
   2028   1.1  mrg 
   2029   1.1  mrg static __inline void si_stqd(qword a, qword b, unsigned int imm)
   2030   1.1  mrg {
   2031   1.1  mrg   vec_st((vec_uchar16)(a), si_to_uint(b) & ~0xF, (vector unsigned char *)(imm));
   2032   1.1  mrg }
   2033   1.1  mrg 
   2034   1.1  mrg static __inline void si_stqr(qword a, unsigned int imm)
   2035   1.1  mrg {
   2036   1.1  mrg   vec_st((vec_uchar16)(a), 0, (vector unsigned char *)(imm));
   2037   1.1  mrg }
   2038   1.1  mrg 
   2039   1.1  mrg static __inline void si_stqx(qword a, qword b, qword c)
   2040   1.1  mrg {
   2041   1.1  mrg   vec_st((vec_uchar16)(a),
   2042   1.1  mrg 	 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))),
   2043   1.1  mrg 	 (vector unsigned char *)(0));
   2044   1.1  mrg }
   2045   1.1  mrg 
   2046   1.1  mrg #endif /* !__SPU__ */
   2047   1.1  mrg #endif /* !_SI2VMX_H_ */
   2048   1.1  mrg 
   2049