1 1.1 mrg dnl AMD64 mpn_copyd optimised for AMD bobcat. 2 1.1 mrg 3 1.1 mrg dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg dnl This file is part of the GNU MP Library. 6 1.1 mrg dnl 7 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1 mrg dnl it under the terms of either: 9 1.1 mrg dnl 10 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 11 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 12 1.1 mrg dnl option) any later version. 13 1.1 mrg dnl 14 1.1 mrg dnl or 15 1.1 mrg dnl 16 1.1 mrg dnl * the GNU General Public License as published by the Free Software 17 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 18 1.1 mrg dnl later version. 19 1.1 mrg dnl 20 1.1 mrg dnl or both in parallel, as here. 21 1.1 mrg dnl 22 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1 mrg dnl for more details. 26 1.1 mrg dnl 27 1.1 mrg dnl You should have received copies of the GNU General Public License and the 28 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1 mrg dnl see https://www.gnu.org/licenses/. 30 1.1 mrg 31 1.1 mrg include(`../config.m4') 32 1.1 mrg 33 1.1 mrg C cycles/limb 34 1.1 mrg C AMD K8,K9 1 35 1.1 mrg C AMD K10 1-2 (alignment fluctuations) 36 1.1 mrg C AMD bd1 ? 37 1.1 mrg C AMD bobcat 1.5 38 1.1 mrg C Intel P4 2.8 39 1.1 mrg C Intel core2 1 40 1.1 mrg C Intel NHM 1-1.25 41 1.1 mrg C Intel SBR 1 42 1.1 mrg C Intel atom 2.87 43 1.1 mrg C VIA nano 2 44 1.1 mrg 45 1.1 mrg C INPUT PARAMETERS 46 1.1 mrg C rp rdi 47 1.1 mrg C up rsi 48 1.1 mrg C n rdx 49 1.1 mrg 50 1.1 mrg define(`rp',`%rdi') 51 1.1 mrg define(`up',`%rsi') 52 1.1 mrg define(`n',`%rdx') 53 1.1 mrg 54 1.1 mrg ABI_SUPPORT(DOS64) 55 1.1 mrg ABI_SUPPORT(STD64) 56 1.1 mrg 57 1.1 mrg ASM_START() 58 1.1 mrg TEXT 59 1.1 mrg ALIGN(16) 60 1.1 mrg PROLOGUE(mpn_copyd) 61 1.1 mrg FUNC_ENTRY(3) 62 1.1 mrg sub $4, n 63 1.1 mrg jl L(end) 64 1.1 mrg ALIGN(16) 65 1.1 mrg L(top): mov 24(up,n,8), %r8 66 1.1 mrg mov %r8, 24(rp,n,8) 67 1.1 mrg mov 16(up,n,8), %r8 68 1.1 mrg mov %r8, 16(rp,n,8) 69 1.1 mrg mov 8(up,n,8), %r8 70 1.1 mrg mov %r8, 8(rp,n,8) 71 1.1 mrg mov (up,n,8), %r8 72 1.1 mrg mov %r8, (rp,n,8) 73 1.1 mrg L(ent): sub $4, n 74 1.1 mrg jge L(top) 75 1.1 mrg 76 1.1 mrg L(end): cmp $-4, R32(n) 77 1.1 mrg jz L(ret) 78 1.1 mrg mov 24(up,n,8), %r8 79 1.1 mrg mov %r8, 24(rp,n,8) 80 1.1 mrg cmp $-3, R32(n) 81 1.1 mrg jz L(ret) 82 1.1 mrg mov 16(up,n,8), %r8 83 1.1 mrg mov %r8, 16(rp,n,8) 84 1.1 mrg cmp $-2, R32(n) 85 1.1 mrg jz L(ret) 86 1.1 mrg mov 8(up,n,8), %r8 87 1.1 mrg mov %r8, 8(rp,n,8) 88 1.1 mrg 89 1.1 mrg L(ret): FUNC_EXIT() 90 1.1 mrg ret 91 1.1 mrg EPILOGUE() 92