Home | History | Annotate | Line # | Download | only in sse2
      1 dnl  Intel Atom  mpn_bdiv_dbm1.
      2 
      3 dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
      4 
      5 dnl  Copyright 2011 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C			    cycles/limb
     36 C			    cycles/limb
     37 C P5				 -
     38 C P6 model 0-8,10-12		 -
     39 C P6 model 9  (Banias)		 9.75
     40 C P6 model 13 (Dothan)
     41 C P4 model 0  (Willamette)
     42 C P4 model 1  (?)
     43 C P4 model 2  (Northwood)	 8.25
     44 C P4 model 3  (Prescott)
     45 C P4 model 4  (Nocona)
     46 C Intel Atom			 8
     47 C AMD K6			 -
     48 C AMD K7			 -
     49 C AMD K8
     50 C AMD K10
     51 
     52 C TODO: This code was optimised for atom-32, consider moving it back to atom
     53 C	dir(atom currently grabs this code), and write a 4-way version(7c/l).
     54 
     55 defframe(PARAM_CARRY,20)
     56 defframe(PARAM_MUL,  16)
     57 defframe(PARAM_SIZE, 12)
     58 defframe(PARAM_SRC,  8)
     59 defframe(PARAM_DST,  4)
     60 
     61 dnl  re-use parameter space
     62 define(SAVE_RP,`PARAM_MUL')
     63 define(SAVE_UP,`PARAM_SIZE')
     64 
     65 define(`rp', `%edi')
     66 define(`up', `%esi')
     67 define(`n',  `%ecx')
     68 define(`reg', `%edx')
     69 define(`cy', `%eax')	C contains the return value
     70 
     71 ASM_START()
     72 	TEXT
     73 	ALIGN(16)
     74 deflit(`FRAME',0)
     75 
     76 PROLOGUE(mpn_bdiv_dbm1c)
     77 	mov	PARAM_SIZE, n		C size
     78 	mov	up, SAVE_UP
     79 	mov	PARAM_SRC, up
     80 	movd	PARAM_MUL, %mm7
     81 	mov	rp, SAVE_RP
     82 	mov	PARAM_DST, rp
     83 
     84 	movd	(up), %mm0
     85 	pmuludq	%mm7, %mm0
     86 	shr	n
     87 	mov	PARAM_CARRY, cy
     88 	jz	L(eq1)
     89 
     90 	movd	4(up), %mm1
     91 	jc	L(odd)
     92 
     93 	lea	4(up), up
     94 	pmuludq	%mm7, %mm1
     95 	movd	%mm0, reg
     96 	psrlq	$32, %mm0
     97 	sub	reg, cy
     98 	movd	%mm0, reg
     99 	movq	%mm1, %mm0
    100 	dec	n
    101 	mov	cy, (rp)
    102 	lea	4(rp), rp
    103 	jz	L(end)
    104 
    105 C	ALIGN(16)
    106 L(top):	movd	4(up), %mm1
    107 	sbb	reg, cy
    108 L(odd):	movd	%mm0, reg
    109 	psrlq	$32, %mm0
    110 	pmuludq	%mm7, %mm1
    111 	sub	reg, cy
    112 	lea	8(up), up
    113 	movd	%mm0, reg
    114 	movd	(up), %mm0
    115 	mov	cy, (rp)
    116 	sbb	reg, cy
    117 	movd	%mm1, reg
    118 	psrlq	$32, %mm1
    119 	sub	reg, cy
    120 	movd	%mm1, reg
    121 	pmuludq	%mm7, %mm0
    122 	dec	n
    123 	mov	cy, 4(rp)
    124 	lea	8(rp), rp
    125 	jnz	L(top)
    126 
    127 L(end):	sbb	reg, cy
    128 
    129 L(eq1):	movd	%mm0, reg
    130 	psrlq	$32, %mm0
    131 	mov	SAVE_UP, up
    132 	sub	reg, cy
    133 	movd	%mm0, reg
    134 	emms
    135 	mov	cy, (rp)
    136 	sbb	reg, cy
    137 
    138 	mov	SAVE_RP, rp
    139 	ret
    140 EPILOGUE()
    141 ASM_END()
    142