Home | History | Annotate | Line # | Download | only in x86_64
      1 dnl  AMD64 logops.
      2 
      3 dnl  Copyright 2004-2017 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 
     34 C		c/l	c/l	c/l	good
     35 C	       var-1   var-2   var-3  for cpu?
     36 C AMD K8,K9	 1.5	 1.5	 1.5	 y
     37 C AMD K10	 1.5	 1.5	 1.5	 y
     38 C AMD bd1
     39 C AMD bd2
     40 C AMD bd3
     41 C AMD bd4
     42 C AMD bt1	 2.67	~2.79	~2.67
     43 C AMD bt2	 2.0	 2.28	 2.28	 y
     44 C AMD zen	 1.5	 1.5	 1.5	 =
     45 C Intel P4	 2.8	 3.35	 3.6
     46 C Intel PNR	 2.0	 2.0	 2.0	 =
     47 C Intel NHM	 2.0	 2.0	 2.0	 =
     48 C Intel SBR	 1.5	 1.75	 1.75	 n
     49 C Intel IBR	 1.48	 1.71	 1.72	 n
     50 C Intel HWL	 1.5	 1.5	 1.5	 n
     51 C Intel BWL	 1.5	 1.5	 1.5	 n
     52 C Intel SKL	 1.5	 1.5	 1.5	 n
     53 C Intel atom	 3.82	 3.82	 3.82	 n
     54 C Intel SLM	 3.0	 3.0	 3.0	 =
     55 C VIA nano	 3.25
     56 
     57 ifdef(`OPERATION_and_n',`
     58   define(`func',`mpn_and_n')
     59   define(`VARIANT_1')
     60   define(`LOGOP',`and')')
     61 ifdef(`OPERATION_andn_n',`
     62   define(`func',`mpn_andn_n')
     63   define(`VARIANT_2')
     64   define(`LOGOP',`and')')
     65 ifdef(`OPERATION_nand_n',`
     66   define(`func',`mpn_nand_n')
     67   define(`VARIANT_3')
     68   define(`LOGOP',`and')')
     69 ifdef(`OPERATION_ior_n',`
     70   define(`func',`mpn_ior_n')
     71   define(`VARIANT_1')
     72   define(`LOGOP',`or')')
     73 ifdef(`OPERATION_iorn_n',`
     74   define(`func',`mpn_iorn_n')
     75   define(`VARIANT_2')
     76   define(`LOGOP',`or')')
     77 ifdef(`OPERATION_nior_n',`
     78   define(`func',`mpn_nior_n')
     79   define(`VARIANT_3')
     80   define(`LOGOP',`or')')
     81 ifdef(`OPERATION_xor_n',`
     82   define(`func',`mpn_xor_n')
     83   define(`VARIANT_1')
     84   define(`LOGOP',`xor')')
     85 ifdef(`OPERATION_xnor_n',`
     86   define(`func',`mpn_xnor_n')
     87   define(`VARIANT_2')
     88   define(`LOGOP',`xor')')
     89 
     90 
     91 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
     92 
     93 C INPUT PARAMETERS
     94 define(`rp',`%rdi')
     95 define(`up',`%rsi')
     96 define(`vp',`%rdx')
     97 define(`n',`%rcx')
     98 
     99 ABI_SUPPORT(DOS64)
    100 ABI_SUPPORT(STD64)
    101 
    102 ASM_START()
    103 
    104 ifdef(`VARIANT_1',`
    105 	TEXT
    106 	ALIGN(32)
    107 PROLOGUE(func)
    108 	FUNC_ENTRY(4)
    109 	mov	(vp), %r8
    110 	mov	R32(%rcx), R32(%rax)
    111 	lea	(vp,n,8), vp
    112 	lea	(up,n,8), up
    113 	lea	(rp,n,8), rp
    114 	neg	n
    115 	and	$3, R32(%rax)
    116 	je	L(b00)
    117 	cmp	$2, R32(%rax)
    118 	jc	L(b01)
    119 	je	L(b10)
    120 
    121 L(b11):	LOGOP	(up,n,8), %r8
    122 	mov	%r8, (rp,n,8)
    123 	dec	n
    124 	jmp	L(e11)
    125 L(b10):	add	$-2, n
    126 	jmp	L(e10)
    127 L(b01):	LOGOP	(up,n,8), %r8
    128 	mov	%r8, (rp,n,8)
    129 	inc	n
    130 	jz	L(ret)
    131 
    132 L(top):	mov	(vp,n,8), %r8
    133 L(b00):	mov	8(vp,n,8), %r9
    134 	LOGOP	(up,n,8), %r8
    135 	LOGOP	8(up,n,8), %r9
    136 	nop				C K8/K9/K10 concession
    137 	mov	%r8, (rp,n,8)
    138 	mov	%r9, 8(rp,n,8)
    139 L(e11):	mov	16(vp,n,8), %r8
    140 L(e10):	mov	24(vp,n,8), %r9
    141 	LOGOP	16(up,n,8), %r8
    142 	LOGOP	24(up,n,8), %r9
    143 	mov	%r8, 16(rp,n,8)
    144 	mov	%r9, 24(rp,n,8)
    145 	add	$4, n
    146 	jnc	L(top)
    147 
    148 L(ret):	FUNC_EXIT()
    149 	ret
    150 EPILOGUE()
    151 ')
    152 
    153 ifdef(`VARIANT_2',`
    154 	TEXT
    155 	ALIGN(32)
    156 PROLOGUE(func)
    157 	FUNC_ENTRY(4)
    158 	mov	(vp), %r8
    159 	not	%r8
    160 	mov	R32(%rcx), R32(%rax)
    161 	lea	(vp,n,8), vp
    162 	lea	(up,n,8), up
    163 	lea	(rp,n,8), rp
    164 	neg	n
    165 	and	$3, R32(%rax)
    166 	je	L(b00)
    167 	cmp	$2, R32(%rax)
    168 	jc	L(b01)
    169 	je	L(b10)
    170 
    171 L(b11):	LOGOP	(up,n,8), %r8
    172 	mov	%r8, (rp,n,8)
    173 	dec	n
    174 	jmp	L(e11)
    175 L(b10):	add	$-2, n
    176 	jmp	L(e10)
    177 	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
    178 L(b01):	LOGOP	(up,n,8), %r8
    179 	mov	%r8, (rp,n,8)
    180 	inc	n
    181 	jz	L(ret)
    182 
    183 L(top):	mov	(vp,n,8), %r8
    184 	not	%r8
    185 L(b00):	mov	8(vp,n,8), %r9
    186 	not	%r9
    187 	LOGOP	(up,n,8), %r8
    188 	LOGOP	8(up,n,8), %r9
    189 	mov	%r8, (rp,n,8)
    190 	mov	%r9, 8(rp,n,8)
    191 L(e11):	mov	16(vp,n,8), %r8
    192 	not	%r8
    193 L(e10):	mov	24(vp,n,8), %r9
    194 	not	%r9
    195 	LOGOP	16(up,n,8), %r8
    196 	LOGOP	24(up,n,8), %r9
    197 	mov	%r8, 16(rp,n,8)
    198 	mov	%r9, 24(rp,n,8)
    199 	add	$4, n
    200 	jnc	L(top)
    201 
    202 L(ret):	FUNC_EXIT()
    203 	ret
    204 EPILOGUE()
    205 ')
    206 
    207 ifdef(`VARIANT_3',`
    208 	TEXT
    209 	ALIGN(32)
    210 PROLOGUE(func)
    211 	FUNC_ENTRY(4)
    212 	mov	(vp), %r8
    213 	mov	R32(%rcx), R32(%rax)
    214 	lea	(vp,n,8), vp
    215 	lea	(up,n,8), up
    216 	lea	(rp,n,8), rp
    217 	neg	n
    218 	and	$3, R32(%rax)
    219 	je	L(b00)
    220 	cmp	$2, R32(%rax)
    221 	jc	L(b01)
    222 	je	L(b10)
    223 
    224 L(b11):	LOGOP	(up,n,8), %r8
    225 	not	%r8
    226 	mov	%r8, (rp,n,8)
    227 	dec	n
    228 	jmp	L(e11)
    229 L(b10):	add	$-2, n
    230 	jmp	L(e10)
    231 	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
    232 L(b01):	LOGOP	(up,n,8), %r8
    233 	not	%r8
    234 	mov	%r8, (rp,n,8)
    235 	inc	n
    236 	jz	L(ret)
    237 
    238 L(top):	mov	(vp,n,8), %r8
    239 L(b00):	mov	8(vp,n,8), %r9
    240 	LOGOP	(up,n,8), %r8
    241 	not	%r8
    242 	LOGOP	8(up,n,8), %r9
    243 	not	%r9
    244 	mov	%r8, (rp,n,8)
    245 	mov	%r9, 8(rp,n,8)
    246 L(e11):	mov	16(vp,n,8), %r8
    247 L(e10):	mov	24(vp,n,8), %r9
    248 	LOGOP	16(up,n,8), %r8
    249 	not	%r8
    250 	LOGOP	24(up,n,8), %r9
    251 	not	%r9
    252 	mov	%r8, 16(rp,n,8)
    253 	mov	%r9, 24(rp,n,8)
    254 	add	$4, n
    255 	jnc	L(top)
    256 
    257 L(ret):	FUNC_EXIT()
    258 	ret
    259 EPILOGUE()
    260 ')
    261