Home | History | Annotate | Line # | Download | only in x86
      1      1.1  mrg dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
      2      1.1  mrg 
      3  1.1.1.2  mrg dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
      4  1.1.1.2  mrg 
      5      1.1  mrg dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
      6  1.1.1.2  mrg 
      7      1.1  mrg dnl  This file is part of the GNU MP Library.
      8      1.1  mrg dnl
      9  1.1.1.2  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1.1.2  mrg dnl  it under the terms of either:
     11      1.1  mrg dnl
     12  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1.1.2  mrg dnl      option) any later version.
     15      1.1  mrg dnl
     16  1.1.1.2  mrg dnl  or
     17  1.1.1.2  mrg dnl
     18  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1.1.2  mrg dnl      later version.
     21  1.1.1.2  mrg dnl
     22  1.1.1.2  mrg dnl  or both in parallel, as here.
     23  1.1.1.2  mrg dnl
     24  1.1.1.2  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25  1.1.1.2  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1.1.2  mrg dnl  for more details.
     28  1.1.1.2  mrg dnl
     29  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     32      1.1  mrg 
     33      1.1  mrg include(`../config.m4')
     34      1.1  mrg 
     35      1.1  mrg 
     36      1.1  mrg C     cycles/limb
     37      1.1  mrg C P54    30.0
     38      1.1  mrg C P55    29.0
     39      1.1  mrg C P6     13.0 odd divisor, 12.0 even (strangely)
     40      1.1  mrg C K6     14.0
     41      1.1  mrg C K7     12.0
     42      1.1  mrg C P4     42.0
     43      1.1  mrg 
     44      1.1  mrg MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
     45      1.1  mrg 
     46      1.1  mrg defframe(PARAM_SHIFT,  24)
     47      1.1  mrg defframe(PARAM_INVERSE,20)
     48      1.1  mrg defframe(PARAM_DIVISOR,16)
     49      1.1  mrg defframe(PARAM_SIZE,   12)
     50      1.1  mrg defframe(PARAM_SRC,    8)
     51      1.1  mrg defframe(PARAM_DST,    4)
     52      1.1  mrg 
     53      1.1  mrg dnl  re-use parameter space
     54      1.1  mrg define(VAR_INVERSE,`PARAM_SRC')
     55      1.1  mrg 
     56      1.1  mrg 	TEXT
     57      1.1  mrg 
     58      1.1  mrg C mp_limb_t
     59      1.1  mrg C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
     60      1.1  mrg C		    mp_limb_t inverse, int shift)
     61      1.1  mrg 
     62      1.1  mrg 	ALIGN(16)
     63      1.1  mrg PROLOGUE(mpn_pi1_bdiv_q_1)
     64      1.1  mrg deflit(`FRAME',0)
     65      1.1  mrg 
     66      1.1  mrg 	movl	PARAM_SHIFT, %ecx
     67      1.1  mrg 	pushl	%ebp	FRAME_pushl()
     68      1.1  mrg 
     69      1.1  mrg 	movl	PARAM_INVERSE, %eax
     70      1.1  mrg 	movl	PARAM_SIZE, %ebp
     71      1.1  mrg 	pushl	%ebx	FRAME_pushl()
     72      1.1  mrg L(common):
     73      1.1  mrg 	pushl	%edi	FRAME_pushl()
     74      1.1  mrg 	pushl	%esi	FRAME_pushl()
     75      1.1  mrg 
     76      1.1  mrg 	movl	PARAM_SRC, %esi
     77      1.1  mrg 	movl	PARAM_DST, %edi
     78      1.1  mrg 
     79      1.1  mrg 	leal	(%esi,%ebp,4), %esi	C src end
     80      1.1  mrg 	leal	(%edi,%ebp,4), %edi	C dst end
     81      1.1  mrg 	negl	%ebp			C -size
     82      1.1  mrg 
     83      1.1  mrg 	movl	%eax, VAR_INVERSE
     84      1.1  mrg 	movl	(%esi,%ebp,4), %eax	C src[0]
     85      1.1  mrg 
     86      1.1  mrg 	xorl	%ebx, %ebx
     87      1.1  mrg 	xorl	%edx, %edx
     88      1.1  mrg 
     89      1.1  mrg 	incl	%ebp
     90      1.1  mrg 	jz	L(one)
     91      1.1  mrg 
     92      1.1  mrg 	movl	(%esi,%ebp,4), %edx	C src[1]
     93      1.1  mrg 
     94      1.1  mrg 	shrdl(	%cl, %edx, %eax)
     95      1.1  mrg 
     96      1.1  mrg 	movl	VAR_INVERSE, %edx
     97      1.1  mrg 	jmp	L(entry)
     98      1.1  mrg 
     99      1.1  mrg 
    100      1.1  mrg 	ALIGN(8)
    101      1.1  mrg 	nop	C k6 code alignment
    102      1.1  mrg 	nop
    103      1.1  mrg L(top):
    104      1.1  mrg 	C eax	q
    105      1.1  mrg 	C ebx	carry bit, 0 or -1
    106      1.1  mrg 	C ecx	shift
    107      1.1  mrg 	C edx	carry limb
    108      1.1  mrg 	C esi	src end
    109      1.1  mrg 	C edi	dst end
    110      1.1  mrg 	C ebp	counter, limbs, negative
    111      1.1  mrg 
    112      1.1  mrg 	movl	-4(%esi,%ebp,4), %eax
    113      1.1  mrg 	subl	%ebx, %edx		C accumulate carry bit
    114      1.1  mrg 
    115      1.1  mrg 	movl	(%esi,%ebp,4), %ebx
    116      1.1  mrg 
    117      1.1  mrg 	shrdl(	%cl, %ebx, %eax)
    118      1.1  mrg 
    119      1.1  mrg 	subl	%edx, %eax		C apply carry limb
    120      1.1  mrg 	movl	VAR_INVERSE, %edx
    121      1.1  mrg 
    122      1.1  mrg 	sbbl	%ebx, %ebx
    123      1.1  mrg 
    124      1.1  mrg L(entry):
    125      1.1  mrg 	imull	%edx, %eax
    126      1.1  mrg 
    127      1.1  mrg 	movl	%eax, -4(%edi,%ebp,4)
    128      1.1  mrg 	movl	PARAM_DIVISOR, %edx
    129      1.1  mrg 
    130      1.1  mrg 	mull	%edx
    131      1.1  mrg 
    132      1.1  mrg 	incl	%ebp
    133      1.1  mrg 	jnz	L(top)
    134      1.1  mrg 
    135      1.1  mrg 
    136      1.1  mrg 	movl	-4(%esi), %eax		C src high limb
    137      1.1  mrg L(one):
    138      1.1  mrg 	shrl	%cl, %eax
    139      1.1  mrg 	popl	%esi	FRAME_popl()
    140      1.1  mrg 
    141      1.1  mrg 	addl	%ebx, %eax		C apply carry bit
    142      1.1  mrg 
    143      1.1  mrg 	subl	%edx, %eax		C apply carry limb
    144      1.1  mrg 
    145      1.1  mrg 	imull	VAR_INVERSE, %eax
    146      1.1  mrg 
    147      1.1  mrg 	movl	%eax, -4(%edi)
    148      1.1  mrg 
    149      1.1  mrg 	popl	%edi
    150      1.1  mrg 	popl	%ebx
    151      1.1  mrg 	popl	%ebp
    152      1.1  mrg 
    153      1.1  mrg 	ret
    154      1.1  mrg 
    155      1.1  mrg EPILOGUE()
    156      1.1  mrg 
    157      1.1  mrg C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
    158      1.1  mrg C                           mp_limb_t divisor);
    159      1.1  mrg C
    160      1.1  mrg 
    161      1.1  mrg 	ALIGN(16)
    162      1.1  mrg PROLOGUE(mpn_bdiv_q_1)
    163      1.1  mrg deflit(`FRAME',0)
    164      1.1  mrg 
    165      1.1  mrg 	movl	PARAM_DIVISOR, %eax
    166      1.1  mrg 	pushl	%ebp	FRAME_pushl()
    167      1.1  mrg 
    168      1.1  mrg 	movl	$-1, %ecx		C shift count
    169      1.1  mrg 	movl	PARAM_SIZE, %ebp
    170      1.1  mrg 
    171      1.1  mrg 	pushl	%ebx	FRAME_pushl()
    172      1.1  mrg 
    173      1.1  mrg L(strip_twos):
    174      1.1  mrg 	incl	%ecx
    175      1.1  mrg 
    176      1.1  mrg 	shrl	%eax
    177      1.1  mrg 	jnc	L(strip_twos)
    178      1.1  mrg 
    179      1.1  mrg 	leal	1(%eax,%eax), %ebx	C d without twos
    180      1.1  mrg 	andl	$127, %eax		C d/2, 7 bits
    181      1.1  mrg 
    182      1.1  mrg ifdef(`PIC',`
    183      1.1  mrg 	LEA(	binvert_limb_table, %edx)
    184      1.1  mrg 	movzbl	(%eax,%edx), %eax		C inv 8 bits
    185      1.1  mrg ',`
    186      1.1  mrg 	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
    187      1.1  mrg ')
    188      1.1  mrg 
    189      1.1  mrg 	leal	(%eax,%eax), %edx	C 2*inv
    190      1.1  mrg 	movl	%ebx, PARAM_DIVISOR	C d without twos
    191      1.1  mrg 	imull	%eax, %eax		C inv*inv
    192      1.1  mrg 	imull	%ebx, %eax		C inv*inv*d
    193      1.1  mrg 	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
    194      1.1  mrg 
    195      1.1  mrg 	leal	(%edx,%edx), %eax	C 2*inv
    196      1.1  mrg 	imull	%edx, %edx		C inv*inv
    197      1.1  mrg 	imull	%ebx, %edx		C inv*inv*d
    198      1.1  mrg 	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
    199      1.1  mrg 
    200      1.1  mrg 	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
    201      1.1  mrg 	pushl	%eax	FRAME_pushl()
    202      1.1  mrg 	imull	PARAM_DIVISOR, %eax
    203      1.1  mrg 	cmpl	$1, %eax
    204      1.1  mrg 	popl	%eax	FRAME_popl()')
    205      1.1  mrg 
    206      1.1  mrg 	jmp	L(common)
    207      1.1  mrg EPILOGUE()
    208  1.1.1.2  mrg ASM_END()
    209