Home | History | Annotate | Line # | Download | only in bd2
      1      1.1  mrg dnl  AMD64 mpn_gcd_22.  Assumes useless bsf, useless shrd, tzcnt, no shlx.
      2      1.1  mrg 
      3      1.1  mrg dnl  Copyright 2019 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg dnl  This file is part of the GNU MP Library.
      6      1.1  mrg dnl
      7      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8      1.1  mrg dnl  it under the terms of either:
      9      1.1  mrg dnl
     10      1.1  mrg dnl    * the GNU Lesser General Public License as published by the Free
     11      1.1  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     12      1.1  mrg dnl      option) any later version.
     13      1.1  mrg dnl
     14      1.1  mrg dnl  or
     15      1.1  mrg dnl
     16      1.1  mrg dnl    * the GNU General Public License as published by the Free Software
     17      1.1  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     18      1.1  mrg dnl      later version.
     19      1.1  mrg dnl
     20      1.1  mrg dnl  or both in parallel, as here.
     21      1.1  mrg dnl
     22      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24      1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25      1.1  mrg dnl  for more details.
     26      1.1  mrg dnl
     27      1.1  mrg dnl  You should have received copies of the GNU General Public License and the
     28      1.1  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29      1.1  mrg dnl  see https://www.gnu.org/licenses/.
     30      1.1  mrg 
     31      1.1  mrg include(`../config.m4')
     32      1.1  mrg 
     33      1.1  mrg 
     34      1.1  mrg C	     cycles/bit
     35      1.1  mrg C AMD K8,K9	12.3
     36      1.1  mrg C AMD K10	 8.0
     37      1.1  mrg C AMD bd1	10.0
     38  1.1.1.2  mrg C AMD bd2	 7.2
     39      1.1  mrg C AMD bd3	 ?
     40      1.1  mrg C AMD bd4	 6.7
     41      1.1  mrg C AMD bt1	13.6
     42      1.1  mrg C AMD bt2	 8.9
     43      1.1  mrg C AMD zn1	 5.7
     44      1.1  mrg C AMD zn2	 5.6
     45      1.1  mrg C Intel P4	 ?
     46      1.1  mrg C Intel CNR	 9.7
     47      1.1  mrg C Intel PNR	 9.7
     48      1.1  mrg C Intel NHM	 9.4
     49      1.1  mrg C Intel WSM	 9.5
     50      1.1  mrg C Intel SBR	10.3
     51      1.1  mrg C Intel IBR	 ?
     52      1.1  mrg C Intel HWL	 8.2
     53      1.1  mrg C Intel BWL	 7.4
     54      1.1  mrg C Intel SKL	 7.3
     55      1.1  mrg C Intel atom	26.5
     56      1.1  mrg C Intel SLM	17.4
     57      1.1  mrg C Intel GLM	13.4
     58      1.1  mrg C Intel GLM+	12.4
     59      1.1  mrg C VIA nano	 ?
     60      1.1  mrg 
     61      1.1  mrg 
     62      1.1  mrg define(`u1',    `%rdi')
     63      1.1  mrg define(`u0',    `%rsi')
     64      1.1  mrg define(`v1',    `%rdx')
     65      1.1  mrg define(`v0_param', `%rcx')
     66      1.1  mrg 
     67      1.1  mrg define(`v0',    `%rax')
     68      1.1  mrg define(`cnt',   `%rcx')
     69      1.1  mrg 
     70      1.1  mrg define(`s0',    `%r8')
     71      1.1  mrg define(`s1',    `%r9')
     72      1.1  mrg define(`t0',    `%r10')
     73      1.1  mrg define(`t1',    `%r11')
     74      1.1  mrg 
     75      1.1  mrg dnl ABI_SUPPORT(DOS64)	C returns mp_double_limb_t in memory
     76      1.1  mrg ABI_SUPPORT(STD64)
     77      1.1  mrg 
     78      1.1  mrg ASM_START()
     79      1.1  mrg 	TEXT
     80      1.1  mrg 	ALIGN(64)
     81      1.1  mrg PROLOGUE(mpn_gcd_22)
     82      1.1  mrg 	FUNC_ENTRY(4)
     83      1.1  mrg 	mov	v0_param, v0
     84      1.1  mrg 
     85      1.1  mrg 	ALIGN(16)
     86      1.1  mrg L(top):	mov	v0, t0
     87      1.1  mrg 	sub	u0, t0
     88      1.1  mrg 	jz	L(lowz)		C	jump when low limb result = 0
     89      1.1  mrg 	mov	v1, t1
     90      1.1  mrg 	sbb	u1, t1
     91      1.1  mrg 
     92      1.1  mrg 	rep;bsf	t0, cnt		C tzcnt!
     93      1.1  mrg 	mov	u0, s0
     94      1.1  mrg 	mov	u1, s1
     95      1.1  mrg 
     96      1.1  mrg 	sub	v0, u0
     97      1.1  mrg 	sbb	v1, u1
     98      1.1  mrg 
     99      1.1  mrg L(bck):	cmovc	t0, u0		C u = |u - v|
    100      1.1  mrg 	cmovc	t1, u1		C u = |u - v|
    101      1.1  mrg 	cmovc	s0, v0		C v = min(u,v)
    102      1.1  mrg 	cmovc	s1, v1		C v = min(u,v)
    103      1.1  mrg 
    104      1.1  mrg C Rightshift (u1,,u0) into (u1,,u0)
    105      1.1  mrg L(shr):	shr	R8(cnt), u0
    106      1.1  mrg 	mov	u1, t1
    107      1.1  mrg 	shr	R8(cnt), u1
    108      1.1  mrg 	neg	cnt
    109      1.1  mrg 	shl	R8(cnt), t1
    110      1.1  mrg 	or	t1, u0
    111      1.1  mrg 
    112      1.1  mrg 	test	v1, v1
    113      1.1  mrg 	jnz	L(top)
    114      1.1  mrg 	test	u1, u1
    115      1.1  mrg 	jnz	L(top)
    116      1.1  mrg 
    117      1.1  mrg L(gcd_11):
    118      1.1  mrg 	mov	v0, %rdi
    119      1.1  mrg C	mov	u0, %rsi
    120      1.1  mrg 	TCALL(	mpn_gcd_11)
    121      1.1  mrg 
    122      1.1  mrg L(lowz):C We come here when v0 - u0 = 0
    123      1.1  mrg 	C 1. If v1 - u1 = 0, then gcd is u = v.
    124      1.1  mrg 	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
    125      1.1  mrg 	mov	v1, t0
    126      1.1  mrg 	sub	u1, t0
    127      1.1  mrg 	je	L(end)
    128      1.1  mrg 
    129      1.1  mrg 	xor	t1, t1
    130      1.1  mrg 	rep;bsf	t0, cnt		C tzcnt!
    131      1.1  mrg 	mov	u0, s0
    132      1.1  mrg 	mov	u1, s1
    133      1.1  mrg 	mov	u1, u0
    134      1.1  mrg 	xor	u1, u1
    135      1.1  mrg 	sub	v1, u0
    136      1.1  mrg 	jmp	L(bck)
    137      1.1  mrg 
    138      1.1  mrg L(end):	C mov	v0, %rax
    139      1.1  mrg 	C mov	v1, %rdx
    140      1.1  mrg 	FUNC_EXIT()
    141      1.1  mrg 	ret
    142      1.1  mrg EPILOGUE()
    143