Home | History | Annotate | Line # | Download | only in pentium
      1 dnl  Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
      2 
      3 dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
      4 dnl  Inc.
      5 
      6 dnl  This file is part of the GNU MP Library.
      7 dnl
      8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9 dnl  it under the terms of either:
     10 dnl
     11 dnl    * the GNU Lesser General Public License as published by the Free
     12 dnl      Software Foundation; either version 3 of the License, or (at your
     13 dnl      option) any later version.
     14 dnl
     15 dnl  or
     16 dnl
     17 dnl    * the GNU General Public License as published by the Free Software
     18 dnl      Foundation; either version 2 of the License, or (at your option) any
     19 dnl      later version.
     20 dnl
     21 dnl  or both in parallel, as here.
     22 dnl
     23 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     24 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26 dnl  for more details.
     27 dnl
     28 dnl  You should have received copies of the GNU General Public License and the
     29 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30 dnl  see https://www.gnu.org/licenses/.
     31 
     32 include(`../config.m4')
     33 
     34 
     35 C P5: 14.0 cycles/limb
     36 
     37 
     38 ifdef(`OPERATION_addmul_1', `
     39       define(M4_inst,        addl)
     40       define(M4_function_1,  mpn_addmul_1)
     41       define(M4_function_1c, mpn_addmul_1c)
     42 
     43 ',`ifdef(`OPERATION_submul_1', `
     44       define(M4_inst,        subl)
     45       define(M4_function_1,  mpn_submul_1)
     46       define(M4_function_1c, mpn_submul_1c)
     47 
     48 ',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
     49 ')')')
     50 
     51 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
     52 
     53 
     54 C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
     55 C                         mp_limb_t mult);
     56 C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
     57 C                          mp_limb_t mult, mp_limb_t carry);
     58 C
     59 C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
     60 C                         mp_limb_t mult);
     61 C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
     62 C                          mp_limb_t mult, mp_limb_t carry);
     63 C
     64 
     65 defframe(PARAM_CARRY,     20)
     66 defframe(PARAM_MULTIPLIER,16)
     67 defframe(PARAM_SIZE,      12)
     68 defframe(PARAM_SRC,       8)
     69 defframe(PARAM_DST,       4)
     70 
     71 	TEXT
     72 
     73 	ALIGN(8)
     74 PROLOGUE(M4_function_1c)
     75 deflit(`FRAME',0)
     76 
     77 	movl	PARAM_CARRY, %ecx
     78 	pushl	%esi		FRAME_pushl()
     79 
     80 	jmp	L(start_1c)
     81 
     82 EPILOGUE()
     83 
     84 
     85 	ALIGN(8)
     86 PROLOGUE(M4_function_1)
     87 deflit(`FRAME',0)
     88 
     89 	xorl	%ecx, %ecx
     90 	pushl	%esi		FRAME_pushl()
     91 
     92 L(start_1c):
     93 	movl	PARAM_SRC, %esi
     94 	movl	PARAM_SIZE, %eax
     95 
     96 	pushl	%edi		FRAME_pushl()
     97 	pushl	%ebx		FRAME_pushl()
     98 
     99 	movl	PARAM_DST, %edi
    100 	leal	-1(%eax), %ebx		C size-1
    101 
    102 	leal	(%esi,%eax,4), %esi
    103 	xorl	$-1, %ebx		C -size, and clear carry
    104 
    105 	leal	(%edi,%eax,4), %edi
    106 
    107 L(top):
    108 	C eax
    109 	C ebx	counter, negative
    110 	C ecx	carry
    111 	C edx
    112 	C esi	src end
    113 	C edi	dst end
    114 	C ebp
    115 
    116 	adcl	$0, %ecx
    117 	movl	(%esi,%ebx,4), %eax
    118 
    119 	mull	PARAM_MULTIPLIER
    120 
    121 	addl	%ecx, %eax
    122 	movl	(%edi,%ebx,4), %ecx
    123 
    124 	adcl	$0, %edx
    125 	M4_inst	%eax, %ecx
    126 
    127 	movl	%ecx, (%edi,%ebx,4)
    128 	incl	%ebx
    129 
    130 	movl	%edx, %ecx
    131 	jnz	L(top)
    132 
    133 
    134 	adcl	$0, %ecx
    135 	popl	%ebx
    136 
    137 	movl	%ecx, %eax
    138 	popl	%edi
    139 
    140 	popl	%esi
    141 
    142 	ret
    143 
    144 EPILOGUE()
    145