Home | History | Annotate | Line # | Download | only in pentium
      1 dnl  Intel Pentium mpn_com -- mpn ones complement.
      2 
      3 dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 
     34 C P5: 1.75 cycles/limb
     35 
     36 
     37 NAILS_SUPPORT(0-31)
     38 
     39 
     40 C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
     41 C
     42 C This code is similar to mpn_copyi, basically there's just some "xorl
     43 C $GMP_NUMB_MASK"s inserted.
     44 C
     45 C Alternatives:
     46 C
     47 C On P55 some MMX code could be 1.25 c/l (8 limb unrolled) if src and dst
     48 C are the same alignment mod 8, but it doesn't seem worth the trouble for
     49 C just that case (there'd need to be some plain integer available too for
     50 C the unaligned case).
     51 
     52 defframe(PARAM_SIZE,12)
     53 defframe(PARAM_SRC, 8)
     54 defframe(PARAM_DST, 4)
     55 
     56 	TEXT
     57 	ALIGN(8)
     58 PROLOGUE(mpn_com)
     59 deflit(`FRAME',0)
     60 
     61 	movl	PARAM_SRC, %eax
     62 	movl	PARAM_SIZE, %ecx
     63 
     64 	pushl	%esi	FRAME_pushl()
     65 	pushl	%edi	FRAME_pushl()
     66 
     67 	leal	(%eax,%ecx,4), %eax
     68 	xorl	$-1, %ecx		C -size-1
     69 
     70 	movl	PARAM_DST, %edx
     71 	addl	$8, %ecx		C -size+7
     72 
     73 	jns	L(end)
     74 
     75 	movl	(%edx), %esi		C fetch destination cache line
     76 	nop
     77 
     78 L(top):
     79 	C eax	&src[size]
     80 	C ebx
     81 	C ecx	counter, limbs, negative
     82 	C edx	dst, incrementing
     83 	C esi	scratch
     84 	C edi	scratch
     85 	C ebp
     86 
     87 	movl	28(%edx), %esi		C destination prefetch
     88 	addl	$32, %edx
     89 
     90 	movl	-28(%eax,%ecx,4), %esi
     91 	movl	-24(%eax,%ecx,4), %edi
     92 	xorl	$GMP_NUMB_MASK, %esi
     93 	xorl	$GMP_NUMB_MASK, %edi
     94 	movl	%esi, -32(%edx)
     95 	movl	%edi, -28(%edx)
     96 
     97 	movl	-20(%eax,%ecx,4), %esi
     98 	movl	-16(%eax,%ecx,4), %edi
     99 	xorl	$GMP_NUMB_MASK, %esi
    100 	xorl	$GMP_NUMB_MASK, %edi
    101 	movl	%esi, -24(%edx)
    102 	movl	%edi, -20(%edx)
    103 
    104 	movl	-12(%eax,%ecx,4), %esi
    105 	movl	-8(%eax,%ecx,4), %edi
    106 	xorl	$GMP_NUMB_MASK, %esi
    107 	xorl	$GMP_NUMB_MASK, %edi
    108 	movl	%esi, -16(%edx)
    109 	movl	%edi, -12(%edx)
    110 
    111 	movl	-4(%eax,%ecx,4), %esi
    112 	movl	(%eax,%ecx,4), %edi
    113 	xorl	$GMP_NUMB_MASK, %esi
    114 	xorl	$GMP_NUMB_MASK, %edi
    115 	movl	%esi, -8(%edx)
    116 	movl	%edi, -4(%edx)
    117 
    118 	addl	$8, %ecx
    119 	js	L(top)
    120 
    121 
    122 L(end):
    123 	C eax	&src[size]
    124 	C ecx	0 to 7, representing respectively 7 to 0 limbs remaining
    125 	C edx	dst, next location to store
    126 
    127 	subl	$4, %ecx
    128 	nop
    129 
    130 	jns	L(no4)
    131 
    132 	movl	-12(%eax,%ecx,4), %esi
    133 	movl	-8(%eax,%ecx,4), %edi
    134 	xorl	$GMP_NUMB_MASK, %esi
    135 	xorl	$GMP_NUMB_MASK, %edi
    136 	movl	%esi, (%edx)
    137 	movl	%edi, 4(%edx)
    138 
    139 	movl	-4(%eax,%ecx,4), %esi
    140 	movl	(%eax,%ecx,4), %edi
    141 	xorl	$GMP_NUMB_MASK, %esi
    142 	xorl	$GMP_NUMB_MASK, %edi
    143 	movl	%esi, 8(%edx)
    144 	movl	%edi, 12(%edx)
    145 
    146 	addl	$16, %edx
    147 	addl	$4, %ecx
    148 L(no4):
    149 
    150 	subl	$2, %ecx
    151 	nop
    152 
    153 	jns	L(no2)
    154 
    155 	movl	-4(%eax,%ecx,4), %esi
    156 	movl	(%eax,%ecx,4), %edi
    157 	xorl	$GMP_NUMB_MASK, %esi
    158 	xorl	$GMP_NUMB_MASK, %edi
    159 	movl	%esi, (%edx)
    160 	movl	%edi, 4(%edx)
    161 
    162 	addl	$8, %edx
    163 	addl	$2, %ecx
    164 L(no2):
    165 
    166 	popl	%edi
    167 	jnz	L(done)
    168 
    169 	movl	-4(%eax), %ecx
    170 
    171 	xorl	$GMP_NUMB_MASK, %ecx
    172 	popl	%esi
    173 
    174 	movl	%ecx, (%edx)
    175 	ret
    176 
    177 L(done):
    178 	popl	%esi
    179 	ret
    180 
    181 EPILOGUE()
    182