Home | History | Annotate | Line # | Download | only in atom
      1 dnl  X86 mpn_cnd_sub_n optimised for Intel Atom.
      2 
      3 dnl  Copyright 2013 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C			    cycles/limb
     34 C P5				 ?
     35 C P6 model 0-8,10-12		 ?
     36 C P6 model 9   (Banias)		 ?
     37 C P6 model 13  (Dothan)		 ?
     38 C P4 model 0-1 (Willamette)	 ?
     39 C P4 model 2   (Northwood)	 ?
     40 C P4 model 3-4 (Prescott)	 ?
     41 C Intel atom			 5.67
     42 C AMD K6			 ?
     43 C AMD K7			 ?
     44 C AMD K8			 ?
     45 
     46 
     47 define(`rp',  `%edi')
     48 define(`up',  `%esi')
     49 define(`vp',  `%ebp')
     50 define(`n',   `%ecx')
     51 define(`cnd', `20(%esp)')
     52 
     53 ASM_START()
     54 	TEXT
     55 	ALIGN(16)
     56 PROLOGUE(mpn_cnd_sub_n)
     57 	push	%edi
     58 	push	%esi
     59 	push	%ebx
     60 	push	%ebp
     61 
     62 	mov	cnd, %eax		C make cnd into a mask (1)
     63 	mov	24(%esp), rp
     64 	neg	%eax			C make cnd into a mask (1)
     65 	mov	28(%esp), up
     66 	sbb	%eax, %eax		C make cnd into a mask (1)
     67 	mov	32(%esp), vp
     68 	mov	%eax, cnd		C make cnd into a mask (1)
     69 	mov	36(%esp), n
     70 
     71 	xor	%edx, %edx
     72 
     73 	inc	n
     74 	shr	n
     75 	jnc	L(ent)
     76 
     77 	mov	0(vp), %eax
     78 	and	cnd, %eax
     79 	lea	4(vp), vp
     80 	mov	0(up), %edx
     81 	sub	%eax, %edx
     82 	lea	4(rp), rp
     83 	lea	4(up), up
     84 	mov	%edx, -4(rp)
     85 	sbb	%edx, %edx		C save cy
     86 
     87 L(ent):	mov	0(vp), %ebx
     88 	and	cnd, %ebx
     89 	add	%edx, %edx		C restore cy
     90 	mov	0(up), %edx
     91 	dec	n
     92 	je	L(end)
     93 
     94 L(top):	sbb	%ebx, %edx
     95 	mov	4(vp), %eax
     96 	mov	%edx, 0(rp)
     97 	sbb	%edx, %edx		C save cy
     98 	mov	8(vp), %ebx
     99 	lea	8(up), up
    100 	and	cnd, %ebx
    101 	and	cnd, %eax
    102 	add	%edx, %edx		C restore cy
    103 	mov	-4(up), %edx
    104 	lea	8(rp), rp
    105 	sbb	%eax, %edx
    106 	mov	%edx, -4(rp)
    107 	dec	n
    108 	mov	0(up), %edx
    109 	lea	8(vp), vp
    110 	jne	L(top)
    111 
    112 L(end):	sbb	%ebx, %edx
    113 	mov	%edx, 0(rp)
    114 
    115 	mov	$0, %eax
    116 	adc	%eax, %eax
    117 
    118 	pop	%ebp
    119 	pop	%ebx
    120 	pop	%esi
    121 	pop	%edi
    122 	ret
    123 EPILOGUE()
    124 ASM_END()
    125