Home | History | Annotate | Line # | Download | only in x86_64
      1 dnl  x86-64 mpn_div_qr_2n_pi1
      2 dnl  -- Divide an mpn number by a normalized 2-limb number,
      3 dnl     using a single-limb inverse.
      4 
      5 dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 
     36 C		c/l
     37 C INPUT PARAMETERS
     38 define(`qp',		`%rdi')
     39 define(`rp',		`%rsi')
     40 define(`up_param',	`%rdx')
     41 define(`un',		`%rcx')
     42 define(`d1',		`%r8')
     43 define(`d0',		`%r9')
     44 define(`di_param',	`8(%rsp)')
     45 
     46 define(`di',		`%r10')
     47 define(`up',		`%r11')
     48 define(`u2',		`%rbx')
     49 define(`u1',		`%r12')
     50 define(`t1',		`%r13')
     51 define(`t0',		`%r14')
     52 define(`md1',		`%r15')
     53 
     54 C TODO
     55 C * Store qh in the same stack slot as di_param, instead of pushing
     56 C   it. (we could put it in register %rbp, but then we would need to
     57 C   save and restore that instead, which doesn't seem like a win).
     58 
     59 ABI_SUPPORT(DOS64)
     60 ABI_SUPPORT(STD64)
     61 
     62 ASM_START()
     63 	TEXT
     64 	ALIGN(16)
     65 PROLOGUE(mpn_div_qr_2n_pi1)
     66 	FUNC_ENTRY(4)
     67 IFDOS(`	mov	56(%rsp), %r8	')
     68 IFDOS(`	mov	64(%rsp), %r9	')
     69 IFDOS(`define(`di_param', `72(%rsp)')')
     70 	mov	di_param, di
     71 	mov	up_param, up
     72 	push	%r15
     73 	push	%r14
     74 	push	%r13
     75 	push	%r12
     76 	push	%rbx
     77 
     78 	mov	-16(up, un, 8), u1
     79 	mov	-8(up, un, 8), u2
     80 
     81 	mov	u1, t0
     82 	mov	u2, t1
     83 	sub	d0, t0
     84 	sbb	d1, t1
     85 	cmovnc  t0, u1
     86 	cmovnc	t1, u2
     87 	C push qh which is !carry
     88 	sbb	%rax, %rax
     89 	inc	%rax
     90 	push	%rax
     91 	lea	-2(un), un
     92 	mov	d1, md1
     93 	neg	md1
     94 
     95 	jmp	L(next)
     96 
     97 	ALIGN(16)
     98 L(loop):
     99 	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
    100 	C Based on the optimized divrem_2.asm code.
    101 
    102 	mov	di, %rax
    103 	mul	u2
    104 	mov	u1, t0
    105 	add	%rax, t0	C q0 in t0
    106 	adc	u2, %rdx
    107 	mov	%rdx, t1	C q in t1
    108 	imul	md1, %rdx
    109 	mov	d0, %rax
    110 	lea	(%rdx, u1), u2
    111 	mul	t1
    112 	mov	(up, un, 8), u1
    113 	sub	d0, u1
    114 	sbb	d1, u2
    115 	sub	%rax, u1
    116 	sbb	%rdx, u2
    117 	xor	R32(%rax), R32(%rax)
    118 	xor	R32(%rdx), R32(%rdx)
    119 	cmp	t0, u2
    120 	cmovnc	d0, %rax
    121 	cmovnc	d1, %rdx
    122 	adc	$0, t1
    123 	nop
    124 	add	%rax, u1
    125 	adc	%rdx, u2
    126 	cmp	d1, u2
    127 	jae	L(fix)
    128 L(bck):
    129 	mov	t1, (qp, un, 8)
    130 L(next):
    131 	sub	$1, un
    132 	jnc	L(loop)
    133 L(end):
    134 	mov	u2, 8(rp)
    135 	mov	u1, (rp)
    136 
    137 	C qh on stack
    138 	pop	%rax
    139 
    140 	pop	%rbx
    141 	pop	%r12
    142 	pop	%r13
    143 	pop	%r14
    144 	pop	%r15
    145 	FUNC_EXIT()
    146 	ret
    147 
    148 L(fix):	C Unlikely update. u2 >= d1
    149 	seta	%dl
    150 	cmp	d0, u1
    151 	setae	%al
    152 	orb	%dl, %al		C "orb" form to placate Sun tools
    153 	je	L(bck)
    154 	inc	t1
    155 	sub	d0, u1
    156 	sbb	d1, u2
    157 	jmp	L(bck)
    158 EPILOGUE()
    159