Home | History | Annotate | Line # | Download | only in bt1
      1  1.1  mrg dnl  AMD64 mpn_copyd optimised for AMD bobcat.
      2  1.1  mrg 
      3  1.1  mrg dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
      4  1.1  mrg 
      5  1.1  mrg dnl  This file is part of the GNU MP Library.
      6  1.1  mrg dnl
      7  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1  mrg dnl  it under the terms of either:
      9  1.1  mrg dnl
     10  1.1  mrg dnl    * the GNU Lesser General Public License as published by the Free
     11  1.1  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     12  1.1  mrg dnl      option) any later version.
     13  1.1  mrg dnl
     14  1.1  mrg dnl  or
     15  1.1  mrg dnl
     16  1.1  mrg dnl    * the GNU General Public License as published by the Free Software
     17  1.1  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     18  1.1  mrg dnl      later version.
     19  1.1  mrg dnl
     20  1.1  mrg dnl  or both in parallel, as here.
     21  1.1  mrg dnl
     22  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23  1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1  mrg dnl  for more details.
     26  1.1  mrg dnl
     27  1.1  mrg dnl  You should have received copies of the GNU General Public License and the
     28  1.1  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1  mrg dnl  see https://www.gnu.org/licenses/.
     30  1.1  mrg 
     31  1.1  mrg include(`../config.m4')
     32  1.1  mrg 
     33  1.1  mrg C	     cycles/limb
     34  1.1  mrg C AMD K8,K9	 1
     35  1.1  mrg C AMD K10	 1-2  (alignment fluctuations)
     36  1.1  mrg C AMD bd1	 ?
     37  1.1  mrg C AMD bobcat	 1.5
     38  1.1  mrg C Intel P4	 2.8
     39  1.1  mrg C Intel core2	 1
     40  1.1  mrg C Intel NHM	 1-1.25
     41  1.1  mrg C Intel SBR	 1
     42  1.1  mrg C Intel atom	 2.87
     43  1.1  mrg C VIA nano	 2
     44  1.1  mrg 
     45  1.1  mrg C INPUT PARAMETERS
     46  1.1  mrg C rp	rdi
     47  1.1  mrg C up	rsi
     48  1.1  mrg C n	rdx
     49  1.1  mrg 
     50  1.1  mrg define(`rp',`%rdi')
     51  1.1  mrg define(`up',`%rsi')
     52  1.1  mrg define(`n',`%rdx')
     53  1.1  mrg 
     54  1.1  mrg ABI_SUPPORT(DOS64)
     55  1.1  mrg ABI_SUPPORT(STD64)
     56  1.1  mrg 
     57  1.1  mrg ASM_START()
     58  1.1  mrg 	TEXT
     59  1.1  mrg 	ALIGN(16)
     60  1.1  mrg PROLOGUE(mpn_copyd)
     61  1.1  mrg 	FUNC_ENTRY(3)
     62  1.1  mrg 	sub	$4, n
     63  1.1  mrg 	jl	L(end)
     64  1.1  mrg 	ALIGN(16)
     65  1.1  mrg L(top):	mov	24(up,n,8), %r8
     66  1.1  mrg 	mov	%r8, 24(rp,n,8)
     67  1.1  mrg 	mov	16(up,n,8), %r8
     68  1.1  mrg 	mov	%r8, 16(rp,n,8)
     69  1.1  mrg 	mov	8(up,n,8), %r8
     70  1.1  mrg 	mov	%r8, 8(rp,n,8)
     71  1.1  mrg 	mov	(up,n,8), %r8
     72  1.1  mrg 	mov	%r8, (rp,n,8)
     73  1.1  mrg L(ent):	sub	$4, n
     74  1.1  mrg 	jge	L(top)
     75  1.1  mrg 
     76  1.1  mrg L(end):	cmp	$-4, R32(n)
     77  1.1  mrg 	jz	L(ret)
     78  1.1  mrg 	mov	24(up,n,8), %r8
     79  1.1  mrg 	mov	%r8, 24(rp,n,8)
     80  1.1  mrg 	cmp	$-3, R32(n)
     81  1.1  mrg 	jz	L(ret)
     82  1.1  mrg 	mov	16(up,n,8), %r8
     83  1.1  mrg 	mov	%r8, 16(rp,n,8)
     84  1.1  mrg 	cmp	$-2, R32(n)
     85  1.1  mrg 	jz	L(ret)
     86  1.1  mrg 	mov	8(up,n,8), %r8
     87  1.1  mrg 	mov	%r8, 8(rp,n,8)
     88  1.1  mrg 
     89  1.1  mrg L(ret):	FUNC_EXIT()
     90  1.1  mrg 	ret
     91  1.1  mrg EPILOGUE()
     92