1 1.1 mrg dnl x86 mpn_gcd_11 optimised for processors with fast BSF. 2 1.1 mrg 3 1.1 mrg dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked by Torbjorn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2015 Free Software 6 1.1 mrg dnl Foundation, Inc. 7 1.1 mrg 8 1.1 mrg dnl This file is part of the GNU MP Library. 9 1.1 mrg dnl 10 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 11 1.1 mrg dnl it under the terms of either: 12 1.1 mrg dnl 13 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 14 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 15 1.1 mrg dnl option) any later version. 16 1.1 mrg dnl 17 1.1 mrg dnl or 18 1.1 mrg dnl 19 1.1 mrg dnl * the GNU General Public License as published by the Free Software 20 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 21 1.1 mrg dnl later version. 22 1.1 mrg dnl 23 1.1 mrg dnl or both in parallel, as here. 24 1.1 mrg dnl 25 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 26 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 1.1 mrg dnl for more details. 29 1.1 mrg dnl 30 1.1 mrg dnl You should have received copies of the GNU General Public License and the 31 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32 1.1 mrg dnl see https://www.gnu.org/licenses/. 33 1.1 mrg 34 1.1 mrg include(`../config.m4') 35 1.1 mrg 36 1.1 mrg 37 1.1 mrg C cycles/bit (approx) 38 1.1 mrg C AMD K7 7.80 39 1.1 mrg C AMD K8,K9 7.79 40 1.1 mrg C AMD K10 4.08 41 1.1 mrg C AMD bd1 ? 42 1.1 mrg C AMD bobcat 7.82 43 1.1 mrg C Intel P4-2 14.9 44 1.1 mrg C Intel P4-3/4 14.0 45 1.1 mrg C Intel P6/13 5.09 46 1.1 mrg C Intel core2 4.22 47 1.1 mrg C Intel NHM 5.00 48 1.1 mrg C Intel SBR 5.00 49 1.1 mrg C Intel atom 17.1 50 1.1 mrg C VIA nano ? 51 1.1 mrg C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1 52 1.1 mrg 53 1.1 mrg 54 1.1 mrg define(`u0', `%eax') 55 1.1 mrg define(`v0', `%edx') 56 1.1 mrg 57 1.1 mrg ASM_START() 58 1.1 mrg TEXT 59 1.1 mrg ALIGN(16) 60 1.1 mrg PROLOGUE(mpn_gcd_11) 61 1.1 mrg push %edi 62 1.1 mrg push %esi 63 1.1 mrg 64 1.1 mrg mov 12(%esp), %eax 65 1.1 mrg mov 16(%esp), %edx 66 1.1 mrg jmp L(odd) 67 1.1 mrg 68 1.1 mrg ALIGN(16) C K10 BD C2 NHM SBR 69 1.1 mrg L(top): cmovc( %esi, %eax) C u = |v - u| 0,3 0,3 0,6 0,5 0,5 70 1.1 mrg cmovc( %edi, %edx) C v = min(u,v) 0,3 0,3 2,8 1,7 1,7 71 1.1 mrg shr %cl, %eax C 1,7 1,6 2,8 2,8 2,8 72 1.1 mrg L(odd): mov %edx, %esi C 1 1 4 3 3 73 1.1 mrg sub %eax, %esi C 2 2 5 4 4 74 1.1 mrg bsf %esi, %ecx C 3 3 6 5 5 75 1.1 mrg mov %eax, %edi C 2 2 3 3 4 76 1.1 mrg sub %edx, %eax C 2 2 4 3 4 77 1.1 mrg jnz L(top) C 78 1.1 mrg 79 1.1 mrg L(end): mov %edx, %eax 80 1.1 mrg pop %esi 81 1.1 mrg pop %edi 82 1.1 mrg ret 83 1.1 mrg EPILOGUE() 84