Home | History | Annotate | Line # | Download | only in pentium
com.asm revision 1.1.1.1
      1 dnl  Intel Pentium mpn_com -- mpn ones complement.
      2 
      3 dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
      4 dnl
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or
      8 dnl  modify it under the terms of the GNU Lesser General Public License as
      9 dnl  published by the Free Software Foundation; either version 3 of the
     10 dnl  License, or (at your option) any later version.
     11 dnl
     12 dnl  The GNU MP Library is distributed in the hope that it will be useful,
     13 dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 dnl  Lesser General Public License for more details.
     16 dnl
     17 dnl  You should have received a copy of the GNU Lesser General Public License
     18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     19 
     20 include(`../config.m4')
     21 
     22 
     23 C P5: 1.75 cycles/limb
     24 
     25 
     26 NAILS_SUPPORT(0-31)
     27 
     28 
     29 C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
     30 C
     31 C This code is similar to mpn_copyi, basically there's just some "xorl
     32 C $GMP_NUMB_MASK"s inserted.
     33 C
     34 C Alternatives:
     35 C
     36 C On P55 some MMX code could be 1.25 c/l (8 limb unrolled) if src and dst
     37 C are the same alignment mod 8, but it doesn't seem worth the trouble for
     38 C just that case (there'd need to be some plain integer available too for
     39 C the unaligned case).
     40 
     41 defframe(PARAM_SIZE,12)
     42 defframe(PARAM_SRC, 8)
     43 defframe(PARAM_DST, 4)
     44 
     45 	TEXT
     46 	ALIGN(8)
     47 PROLOGUE(mpn_com)
     48 deflit(`FRAME',0)
     49 
     50 	movl	PARAM_SRC, %eax
     51 	movl	PARAM_SIZE, %ecx
     52 
     53 	pushl	%esi	FRAME_pushl()
     54 	pushl	%edi	FRAME_pushl()
     55 
     56 	leal	(%eax,%ecx,4), %eax
     57 	xorl	$-1, %ecx		C -size-1
     58 
     59 	movl	PARAM_DST, %edx
     60 	addl	$8, %ecx		C -size+7
     61 
     62 	jns	L(end)
     63 
     64 	movl	(%edx), %esi		C fetch destination cache line
     65 	nop
     66 
     67 L(top):
     68 	C eax	&src[size]
     69 	C ebx
     70 	C ecx	counter, limbs, negative
     71 	C edx	dst, incrementing
     72 	C esi	scratch
     73 	C edi	scratch
     74 	C ebp
     75 
     76 	movl	28(%edx), %esi		C destination prefetch
     77 	addl	$32, %edx
     78 
     79 	movl	-28(%eax,%ecx,4), %esi
     80 	movl	-24(%eax,%ecx,4), %edi
     81 	xorl	$GMP_NUMB_MASK, %esi
     82 	xorl	$GMP_NUMB_MASK, %edi
     83 	movl	%esi, -32(%edx)
     84 	movl	%edi, -28(%edx)
     85 
     86 	movl	-20(%eax,%ecx,4), %esi
     87 	movl	-16(%eax,%ecx,4), %edi
     88 	xorl	$GMP_NUMB_MASK, %esi
     89 	xorl	$GMP_NUMB_MASK, %edi
     90 	movl	%esi, -24(%edx)
     91 	movl	%edi, -20(%edx)
     92 
     93 	movl	-12(%eax,%ecx,4), %esi
     94 	movl	-8(%eax,%ecx,4), %edi
     95 	xorl	$GMP_NUMB_MASK, %esi
     96 	xorl	$GMP_NUMB_MASK, %edi
     97 	movl	%esi, -16(%edx)
     98 	movl	%edi, -12(%edx)
     99 
    100 	movl	-4(%eax,%ecx,4), %esi
    101 	movl	(%eax,%ecx,4), %edi
    102 	xorl	$GMP_NUMB_MASK, %esi
    103 	xorl	$GMP_NUMB_MASK, %edi
    104 	movl	%esi, -8(%edx)
    105 	movl	%edi, -4(%edx)
    106 
    107 	addl	$8, %ecx
    108 	js	L(top)
    109 
    110 
    111 L(end):
    112 	C eax	&src[size]
    113 	C ecx	0 to 7, representing respectively 7 to 0 limbs remaining
    114 	C edx	dst, next location to store
    115 
    116 	subl	$4, %ecx
    117 	nop
    118 
    119 	jns	L(no4)
    120 
    121 	movl	-12(%eax,%ecx,4), %esi
    122 	movl	-8(%eax,%ecx,4), %edi
    123 	xorl	$GMP_NUMB_MASK, %esi
    124 	xorl	$GMP_NUMB_MASK, %edi
    125 	movl	%esi, (%edx)
    126 	movl	%edi, 4(%edx)
    127 
    128 	movl	-4(%eax,%ecx,4), %esi
    129 	movl	(%eax,%ecx,4), %edi
    130 	xorl	$GMP_NUMB_MASK, %esi
    131 	xorl	$GMP_NUMB_MASK, %edi
    132 	movl	%esi, 8(%edx)
    133 	movl	%edi, 12(%edx)
    134 
    135 	addl	$16, %edx
    136 	addl	$4, %ecx
    137 L(no4):
    138 
    139 	subl	$2, %ecx
    140 	nop
    141 
    142 	jns	L(no2)
    143 
    144 	movl	-4(%eax,%ecx,4), %esi
    145 	movl	(%eax,%ecx,4), %edi
    146 	xorl	$GMP_NUMB_MASK, %esi
    147 	xorl	$GMP_NUMB_MASK, %edi
    148 	movl	%esi, (%edx)
    149 	movl	%edi, 4(%edx)
    150 
    151 	addl	$8, %edx
    152 	addl	$2, %ecx
    153 L(no2):
    154 
    155 	popl	%edi
    156 	jnz	L(done)
    157 
    158 	movl	-4(%eax), %ecx
    159 
    160 	xorl	$GMP_NUMB_MASK, %ecx
    161 	popl	%esi
    162 
    163 	movl	%ecx, (%edx)
    164 	ret
    165 
    166 L(done):
    167 	popl	%esi
    168 	ret
    169 
    170 EPILOGUE()
    171