Home | History | Annotate | Line # | Download | only in x86
      1      1.1  mrg dnl  x86 mpn_divexact_1 -- mpn by limb exact division.
      2      1.1  mrg 
      3      1.1  mrg dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
      4  1.1.1.2  mrg 
      5      1.1  mrg dnl  This file is part of the GNU MP Library.
      6      1.1  mrg dnl
      7  1.1.1.2  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.2  mrg dnl  it under the terms of either:
      9  1.1.1.2  mrg dnl
     10  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     11  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.2  mrg dnl      option) any later version.
     13  1.1.1.2  mrg dnl
     14  1.1.1.2  mrg dnl  or
     15  1.1.1.2  mrg dnl
     16  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     17  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.2  mrg dnl      later version.
     19  1.1.1.2  mrg dnl
     20  1.1.1.2  mrg dnl  or both in parallel, as here.
     21  1.1.1.2  mrg dnl
     22  1.1.1.2  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23  1.1.1.2  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.2  mrg dnl  for more details.
     26      1.1  mrg dnl
     27  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     28  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     30      1.1  mrg 
     31      1.1  mrg include(`../config.m4')
     32      1.1  mrg 
     33      1.1  mrg 
     34      1.1  mrg C     cycles/limb
     35      1.1  mrg C P54    30.0
     36      1.1  mrg C P55    29.0
     37      1.1  mrg C P6     13.0 odd divisor, 12.0 even (strangely)
     38      1.1  mrg C K6     14.0
     39      1.1  mrg C K7     12.0
     40      1.1  mrg C P4     42.0
     41      1.1  mrg 
     42      1.1  mrg 
     43      1.1  mrg C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
     44      1.1  mrg C                           mp_limb_t divisor);
     45      1.1  mrg C
     46      1.1  mrg 
     47      1.1  mrg defframe(PARAM_DIVISOR,16)
     48      1.1  mrg defframe(PARAM_SIZE,   12)
     49      1.1  mrg defframe(PARAM_SRC,    8)
     50      1.1  mrg defframe(PARAM_DST,    4)
     51      1.1  mrg 
     52      1.1  mrg dnl  re-use parameter space
     53      1.1  mrg define(VAR_INVERSE,`PARAM_SRC')
     54      1.1  mrg 
     55      1.1  mrg 	TEXT
     56      1.1  mrg 
     57      1.1  mrg 	ALIGN(16)
     58      1.1  mrg PROLOGUE(mpn_divexact_1)
     59      1.1  mrg deflit(`FRAME',0)
     60      1.1  mrg 
     61      1.1  mrg 	movl	PARAM_DIVISOR, %eax
     62      1.1  mrg 	pushl	%ebp	FRAME_pushl()
     63      1.1  mrg 
     64      1.1  mrg 	movl	PARAM_SIZE, %ebp
     65      1.1  mrg 	pushl	%edi	FRAME_pushl()
     66      1.1  mrg 
     67      1.1  mrg 	pushl	%ebx	FRAME_pushl()
     68      1.1  mrg 	movl	$-1, %ecx		C shift count
     69      1.1  mrg 
     70      1.1  mrg 	pushl	%esi	FRAME_pushl()
     71      1.1  mrg 
     72      1.1  mrg L(strip_twos):
     73      1.1  mrg 	incl	%ecx
     74      1.1  mrg 
     75      1.1  mrg 	shrl	%eax
     76      1.1  mrg 	jnc	L(strip_twos)
     77      1.1  mrg 
     78      1.1  mrg 	leal	1(%eax,%eax), %ebx	C d without twos
     79      1.1  mrg 	andl	$127, %eax		C d/2, 7 bits
     80      1.1  mrg 
     81      1.1  mrg ifdef(`PIC',`
     82      1.1  mrg 	LEA(	binvert_limb_table, %edx)
     83      1.1  mrg 	movzbl	(%eax,%edx), %eax		C inv 8 bits
     84      1.1  mrg ',`
     85      1.1  mrg 	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
     86      1.1  mrg ')
     87      1.1  mrg 
     88      1.1  mrg 	leal	(%eax,%eax), %edx	C 2*inv
     89      1.1  mrg 	movl	%ebx, PARAM_DIVISOR	C d without twos
     90      1.1  mrg 
     91      1.1  mrg 	imull	%eax, %eax		C inv*inv
     92      1.1  mrg 
     93      1.1  mrg 	movl	PARAM_SRC, %esi
     94      1.1  mrg 	movl	PARAM_DST, %edi
     95      1.1  mrg 
     96      1.1  mrg 	imull	%ebx, %eax		C inv*inv*d
     97      1.1  mrg 
     98      1.1  mrg 	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
     99      1.1  mrg 	leal	(%edx,%edx), %eax	C 2*inv
    100      1.1  mrg 
    101      1.1  mrg 	imull	%edx, %edx		C inv*inv
    102      1.1  mrg 
    103      1.1  mrg 	leal	(%esi,%ebp,4), %esi	C src end
    104      1.1  mrg 	leal	(%edi,%ebp,4), %edi	C dst end
    105      1.1  mrg 	negl	%ebp			C -size
    106      1.1  mrg 
    107      1.1  mrg 	imull	%ebx, %edx		C inv*inv*d
    108      1.1  mrg 
    109      1.1  mrg 	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
    110      1.1  mrg 
    111      1.1  mrg 	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
    112      1.1  mrg 	pushl	%eax	FRAME_pushl()
    113      1.1  mrg 	imull	PARAM_DIVISOR, %eax
    114      1.1  mrg 	cmpl	$1, %eax
    115      1.1  mrg 	popl	%eax	FRAME_popl()')
    116      1.1  mrg 
    117      1.1  mrg 	movl	%eax, VAR_INVERSE
    118      1.1  mrg 	movl	(%esi,%ebp,4), %eax	C src[0]
    119      1.1  mrg 
    120      1.1  mrg 	xorl	%ebx, %ebx
    121      1.1  mrg 	xorl	%edx, %edx
    122      1.1  mrg 
    123      1.1  mrg 	incl	%ebp
    124      1.1  mrg 	jz	L(one)
    125      1.1  mrg 
    126      1.1  mrg 	movl	(%esi,%ebp,4), %edx	C src[1]
    127      1.1  mrg 
    128      1.1  mrg 	shrdl(	%cl, %edx, %eax)
    129      1.1  mrg 
    130      1.1  mrg 	movl	VAR_INVERSE, %edx
    131      1.1  mrg 	jmp	L(entry)
    132      1.1  mrg 
    133      1.1  mrg 
    134      1.1  mrg 	ALIGN(8)
    135      1.1  mrg 	nop	C k6 code alignment
    136      1.1  mrg 	nop
    137      1.1  mrg L(top):
    138      1.1  mrg 	C eax	q
    139      1.1  mrg 	C ebx	carry bit, 0 or -1
    140      1.1  mrg 	C ecx	shift
    141      1.1  mrg 	C edx	carry limb
    142      1.1  mrg 	C esi	src end
    143      1.1  mrg 	C edi	dst end
    144      1.1  mrg 	C ebp	counter, limbs, negative
    145      1.1  mrg 
    146      1.1  mrg 	movl	-4(%esi,%ebp,4), %eax
    147      1.1  mrg 	subl	%ebx, %edx		C accumulate carry bit
    148      1.1  mrg 
    149      1.1  mrg 	movl	(%esi,%ebp,4), %ebx
    150      1.1  mrg 
    151      1.1  mrg 	shrdl(	%cl, %ebx, %eax)
    152      1.1  mrg 
    153      1.1  mrg 	subl	%edx, %eax		C apply carry limb
    154      1.1  mrg 	movl	VAR_INVERSE, %edx
    155      1.1  mrg 
    156      1.1  mrg 	sbbl	%ebx, %ebx
    157      1.1  mrg 
    158      1.1  mrg L(entry):
    159      1.1  mrg 	imull	%edx, %eax
    160      1.1  mrg 
    161      1.1  mrg 	movl	%eax, -4(%edi,%ebp,4)
    162      1.1  mrg 	movl	PARAM_DIVISOR, %edx
    163      1.1  mrg 
    164      1.1  mrg 	mull	%edx
    165      1.1  mrg 
    166      1.1  mrg 	incl	%ebp
    167      1.1  mrg 	jnz	L(top)
    168      1.1  mrg 
    169      1.1  mrg 
    170      1.1  mrg 	movl	-4(%esi), %eax		C src high limb
    171      1.1  mrg L(one):
    172      1.1  mrg 	shrl	%cl, %eax
    173      1.1  mrg 	popl	%esi	FRAME_popl()
    174      1.1  mrg 
    175      1.1  mrg 	addl	%ebx, %eax		C apply carry bit
    176      1.1  mrg 	popl	%ebx	FRAME_popl()
    177      1.1  mrg 
    178      1.1  mrg 	subl	%edx, %eax		C apply carry limb
    179      1.1  mrg 
    180      1.1  mrg 	imull	VAR_INVERSE, %eax
    181      1.1  mrg 
    182      1.1  mrg 	movl	%eax, -4(%edi)
    183      1.1  mrg 
    184      1.1  mrg 	popl	%edi
    185      1.1  mrg 	popl	%ebp
    186      1.1  mrg 
    187      1.1  mrg 	ret
    188      1.1  mrg 
    189      1.1  mrg EPILOGUE()
    190  1.1.1.2  mrg ASM_END()
    191