1 dnl AMD64 mpn_gcd_22. Assumes useless bsf, useless shrd, tzcnt, no shlx. 2 3 dnl Copyright 2019 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 34 C cycles/bit 35 C AMD K8,K9 12.3 36 C AMD K10 8.0 37 C AMD bd1 10.0 38 C AMD bd2 7.2 39 C AMD bd3 ? 40 C AMD bd4 6.7 41 C AMD bt1 13.6 42 C AMD bt2 8.9 43 C AMD zn1 5.7 44 C AMD zn2 5.6 45 C Intel P4 ? 46 C Intel CNR 9.7 47 C Intel PNR 9.7 48 C Intel NHM 9.4 49 C Intel WSM 9.5 50 C Intel SBR 10.3 51 C Intel IBR ? 52 C Intel HWL 8.2 53 C Intel BWL 7.4 54 C Intel SKL 7.3 55 C Intel atom 26.5 56 C Intel SLM 17.4 57 C Intel GLM 13.4 58 C Intel GLM+ 12.4 59 C VIA nano ? 60 61 62 define(`u1', `%rdi') 63 define(`u0', `%rsi') 64 define(`v1', `%rdx') 65 define(`v0_param', `%rcx') 66 67 define(`v0', `%rax') 68 define(`cnt', `%rcx') 69 70 define(`s0', `%r8') 71 define(`s1', `%r9') 72 define(`t0', `%r10') 73 define(`t1', `%r11') 74 75 dnl ABI_SUPPORT(DOS64) C returns mp_double_limb_t in memory 76 ABI_SUPPORT(STD64) 77 78 ASM_START() 79 TEXT 80 ALIGN(64) 81 PROLOGUE(mpn_gcd_22) 82 FUNC_ENTRY(4) 83 mov v0_param, v0 84 85 ALIGN(16) 86 L(top): mov v0, t0 87 sub u0, t0 88 jz L(lowz) C jump when low limb result = 0 89 mov v1, t1 90 sbb u1, t1 91 92 rep;bsf t0, cnt C tzcnt! 93 mov u0, s0 94 mov u1, s1 95 96 sub v0, u0 97 sbb v1, u1 98 99 L(bck): cmovc t0, u0 C u = |u - v| 100 cmovc t1, u1 C u = |u - v| 101 cmovc s0, v0 C v = min(u,v) 102 cmovc s1, v1 C v = min(u,v) 103 104 C Rightshift (u1,,u0) into (u1,,u0) 105 L(shr): shr R8(cnt), u0 106 mov u1, t1 107 shr R8(cnt), u1 108 neg cnt 109 shl R8(cnt), t1 110 or t1, u0 111 112 test v1, v1 113 jnz L(top) 114 test u1, u1 115 jnz L(top) 116 117 L(gcd_11): 118 mov v0, %rdi 119 C mov u0, %rsi 120 TCALL( mpn_gcd_11) 121 122 L(lowz):C We come here when v0 - u0 = 0 123 C 1. If v1 - u1 = 0, then gcd is u = v. 124 C 2. Else compute gcd_21({v1,v0}, |u1-v1|) 125 mov v1, t0 126 sub u1, t0 127 je L(end) 128 129 xor t1, t1 130 rep;bsf t0, cnt C tzcnt! 131 mov u0, s0 132 mov u1, s1 133 mov u1, u0 134 xor u1, u1 135 sub v1, u0 136 jmp L(bck) 137 138 L(end): C mov v0, %rax 139 C mov v1, %rdx 140 FUNC_EXIT() 141 ret 142 EPILOGUE() 143