Home | History | Annotate | Line # | Download | only in p6
      1 dnl  x86 mpn_gcd_11 optimised for processors with fast BSF.
      2 
      3 dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked by Torbjorn Granlund.
      4 
      5 dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2015 Free Software
      6 dnl  Foundation, Inc.
      7 
      8 dnl  This file is part of the GNU MP Library.
      9 dnl
     10 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     11 dnl  it under the terms of either:
     12 dnl
     13 dnl    * the GNU Lesser General Public License as published by the Free
     14 dnl      Software Foundation; either version 3 of the License, or (at your
     15 dnl      option) any later version.
     16 dnl
     17 dnl  or
     18 dnl
     19 dnl    * the GNU General Public License as published by the Free Software
     20 dnl      Foundation; either version 2 of the License, or (at your option) any
     21 dnl      later version.
     22 dnl
     23 dnl  or both in parallel, as here.
     24 dnl
     25 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     26 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     27 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     28 dnl  for more details.
     29 dnl
     30 dnl  You should have received copies of the GNU General Public License and the
     31 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     32 dnl  see https://www.gnu.org/licenses/.
     33 
     34 include(`../config.m4')
     35 
     36 
     37 C	     cycles/bit (approx)
     38 C AMD K7	 7.80
     39 C AMD K8,K9	 7.79
     40 C AMD K10	 4.08
     41 C AMD bd1	 ?
     42 C AMD bobcat	 7.82
     43 C Intel P4-2	14.9
     44 C Intel P4-3/4	14.0
     45 C Intel P6/13	 5.09
     46 C Intel core2	 4.22
     47 C Intel NHM	 5.00
     48 C Intel SBR	 5.00
     49 C Intel atom	17.1
     50 C VIA nano	?
     51 C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
     52 
     53 
     54 define(`u0',    `%eax')
     55 define(`v0',    `%edx')
     56 
     57 ASM_START()
     58 	TEXT
     59 	ALIGN(16)
     60 PROLOGUE(mpn_gcd_11)
     61 	push	%edi
     62 	push	%esi
     63 
     64 	mov	12(%esp), %eax
     65 	mov	16(%esp), %edx
     66 	jmp	L(odd)
     67 
     68 	ALIGN(16)		C               K10   BD    C2    NHM   SBR
     69 L(top):	cmovc(	%esi, %eax)	C u = |v - u|   0,3   0,3   0,6   0,5   0,5
     70 	cmovc(	%edi, %edx)	C v = min(u,v)  0,3   0,3   2,8   1,7   1,7
     71 	shr	%cl, %eax	C               1,7   1,6   2,8   2,8   2,8
     72 L(odd):	mov	%edx, %esi	C               1     1     4     3     3
     73 	sub	%eax, %esi	C               2     2     5     4     4
     74 	bsf	%esi, %ecx	C               3     3     6     5     5
     75 	mov	%eax, %edi	C               2     2     3     3     4
     76 	sub	%edx, %eax	C               2     2     4     3     4
     77 	jnz	L(top)		C
     78 
     79 L(end):	mov	%edx, %eax
     80 	pop	%esi
     81 	pop	%edi
     82 	ret
     83 EPILOGUE()
     84