Home | History | Annotate | Line # | Download | only in x86
dive_1.asm revision 1.1
      1 dnl  x86 mpn_divexact_1 -- mpn by limb exact division.
      2 
      3 dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
      4 dnl
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or
      8 dnl  modify it under the terms of the GNU Lesser General Public License as
      9 dnl  published by the Free Software Foundation; either version 3 of the
     10 dnl  License, or (at your option) any later version.
     11 dnl
     12 dnl  The GNU MP Library is distributed in the hope that it will be useful,
     13 dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15 dnl  Lesser General Public License for more details.
     16 dnl
     17 dnl  You should have received a copy of the GNU Lesser General Public License
     18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     19 
     20 include(`../config.m4')
     21 
     22 
     23 C     cycles/limb
     24 C P54    30.0
     25 C P55    29.0
     26 C P6     13.0 odd divisor, 12.0 even (strangely)
     27 C K6     14.0
     28 C K7     12.0
     29 C P4     42.0
     30 
     31 
     32 C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
     33 C                           mp_limb_t divisor);
     34 C
     35 
     36 defframe(PARAM_DIVISOR,16)
     37 defframe(PARAM_SIZE,   12)
     38 defframe(PARAM_SRC,    8)
     39 defframe(PARAM_DST,    4)
     40 
     41 dnl  re-use parameter space
     42 define(VAR_INVERSE,`PARAM_SRC')
     43 
     44 	TEXT
     45 
     46 	ALIGN(16)
     47 PROLOGUE(mpn_divexact_1)
     48 deflit(`FRAME',0)
     49 
     50 	movl	PARAM_DIVISOR, %eax
     51 	pushl	%ebp	FRAME_pushl()
     52 
     53 	movl	PARAM_SIZE, %ebp
     54 	pushl	%edi	FRAME_pushl()
     55 
     56 	pushl	%ebx	FRAME_pushl()
     57 	movl	$-1, %ecx		C shift count
     58 
     59 	pushl	%esi	FRAME_pushl()
     60 
     61 L(strip_twos):
     62 	incl	%ecx
     63 
     64 	shrl	%eax
     65 	jnc	L(strip_twos)
     66 
     67 	leal	1(%eax,%eax), %ebx	C d without twos
     68 	andl	$127, %eax		C d/2, 7 bits
     69 
     70 ifdef(`PIC',`
     71 	LEA(	binvert_limb_table, %edx)
     72 	movzbl	(%eax,%edx), %eax		C inv 8 bits
     73 ',`
     74 	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
     75 ')
     76 
     77 	leal	(%eax,%eax), %edx	C 2*inv
     78 	movl	%ebx, PARAM_DIVISOR	C d without twos
     79 
     80 	imull	%eax, %eax		C inv*inv
     81 
     82 	movl	PARAM_SRC, %esi
     83 	movl	PARAM_DST, %edi
     84 
     85 	imull	%ebx, %eax		C inv*inv*d
     86 
     87 	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
     88 	leal	(%edx,%edx), %eax	C 2*inv
     89 
     90 	imull	%edx, %edx		C inv*inv
     91 
     92 	leal	(%esi,%ebp,4), %esi	C src end
     93 	leal	(%edi,%ebp,4), %edi	C dst end
     94 	negl	%ebp			C -size
     95 
     96 	imull	%ebx, %edx		C inv*inv*d
     97 
     98 	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
     99 
    100 	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
    101 	pushl	%eax	FRAME_pushl()
    102 	imull	PARAM_DIVISOR, %eax
    103 	cmpl	$1, %eax
    104 	popl	%eax	FRAME_popl()')
    105 
    106 	movl	%eax, VAR_INVERSE
    107 	movl	(%esi,%ebp,4), %eax	C src[0]
    108 
    109 	xorl	%ebx, %ebx
    110 	xorl	%edx, %edx
    111 
    112 	incl	%ebp
    113 	jz	L(one)
    114 
    115 	movl	(%esi,%ebp,4), %edx	C src[1]
    116 
    117 	shrdl(	%cl, %edx, %eax)
    118 
    119 	movl	VAR_INVERSE, %edx
    120 	jmp	L(entry)
    121 
    122 
    123 	ALIGN(8)
    124 	nop	C k6 code alignment
    125 	nop
    126 L(top):
    127 	C eax	q
    128 	C ebx	carry bit, 0 or -1
    129 	C ecx	shift
    130 	C edx	carry limb
    131 	C esi	src end
    132 	C edi	dst end
    133 	C ebp	counter, limbs, negative
    134 
    135 	movl	-4(%esi,%ebp,4), %eax
    136 	subl	%ebx, %edx		C accumulate carry bit
    137 
    138 	movl	(%esi,%ebp,4), %ebx
    139 
    140 	shrdl(	%cl, %ebx, %eax)
    141 
    142 	subl	%edx, %eax		C apply carry limb
    143 	movl	VAR_INVERSE, %edx
    144 
    145 	sbbl	%ebx, %ebx
    146 
    147 L(entry):
    148 	imull	%edx, %eax
    149 
    150 	movl	%eax, -4(%edi,%ebp,4)
    151 	movl	PARAM_DIVISOR, %edx
    152 
    153 	mull	%edx
    154 
    155 	incl	%ebp
    156 	jnz	L(top)
    157 
    158 
    159 	movl	-4(%esi), %eax		C src high limb
    160 L(one):
    161 	shrl	%cl, %eax
    162 	popl	%esi	FRAME_popl()
    163 
    164 	addl	%ebx, %eax		C apply carry bit
    165 	popl	%ebx	FRAME_popl()
    166 
    167 	subl	%edx, %eax		C apply carry limb
    168 
    169 	imull	VAR_INVERSE, %eax
    170 
    171 	movl	%eax, -4(%edi)
    172 
    173 	popl	%edi
    174 	popl	%ebp
    175 
    176 	ret
    177 
    178 EPILOGUE()
    179