Home | History | Annotate | Line # | Download | only in atom
lshift.asm revision 1.1
      1  1.1  mrg dnl  Intel Atom mpn_lshift -- mpn left shift.
      2  1.1  mrg 
      3  1.1  mrg dnl  Copyright 2011 Free Software Foundation, Inc.
      4  1.1  mrg 
      5  1.1  mrg dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
      6  1.1  mrg 
      7  1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1  mrg dnl
      9  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or
     10  1.1  mrg dnl  modify it under the terms of the GNU Lesser General Public License as
     11  1.1  mrg dnl  published by the Free Software Foundation; either version 3 of the
     12  1.1  mrg dnl  License, or (at your option) any later version.
     13  1.1  mrg dnl
     14  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful,
     15  1.1  mrg dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  1.1  mrg dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17  1.1  mrg dnl  Lesser General Public License for more details.
     18  1.1  mrg dnl
     19  1.1  mrg dnl  You should have received a copy of the GNU Lesser General Public License
     20  1.1  mrg dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     21  1.1  mrg 
     22  1.1  mrg include(`../config.m4')
     23  1.1  mrg 
     24  1.1  mrg C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
     25  1.1  mrg C			unsigned cnt);
     26  1.1  mrg 
     27  1.1  mrg C				  cycles/limb
     28  1.1  mrg C				cnt!=1	cnt==1
     29  1.1  mrg C P5
     30  1.1  mrg C P6 model 0-8,10-12
     31  1.1  mrg C P6 model 9  (Banias)
     32  1.1  mrg C P6 model 13 (Dothan)
     33  1.1  mrg C P4 model 0  (Willamette)
     34  1.1  mrg C P4 model 1  (?)
     35  1.1  mrg C P4 model 2  (Northwood)
     36  1.1  mrg C P4 model 3  (Prescott)
     37  1.1  mrg C P4 model 4  (Nocona)
     38  1.1  mrg C Intel Atom			 5	 2.5
     39  1.1  mrg C AMD K6
     40  1.1  mrg C AMD K7
     41  1.1  mrg C AMD K8
     42  1.1  mrg C AMD K10
     43  1.1  mrg 
     44  1.1  mrg defframe(PARAM_CNT, 16)
     45  1.1  mrg defframe(PARAM_SIZE,12)
     46  1.1  mrg defframe(PARAM_SRC,  8)
     47  1.1  mrg defframe(PARAM_DST,  4)
     48  1.1  mrg 
     49  1.1  mrg dnl  re-use parameter space
     50  1.1  mrg define(SAVE_UP,`PARAM_CNT')
     51  1.1  mrg define(VAR_COUNT,`PARAM_SIZE')
     52  1.1  mrg define(SAVE_EBX,`PARAM_SRC')
     53  1.1  mrg define(SAVE_EBP,`PARAM_DST')
     54  1.1  mrg 
     55  1.1  mrg define(`rp',  `%edi')
     56  1.1  mrg define(`up',  `%esi')
     57  1.1  mrg define(`cnt',  `%ecx')
     58  1.1  mrg 
     59  1.1  mrg ASM_START()
     60  1.1  mrg 	TEXT
     61  1.1  mrg 	ALIGN(8)
     62  1.1  mrg deflit(`FRAME',0)
     63  1.1  mrg PROLOGUE(mpn_lshift)
     64  1.1  mrg 	mov	PARAM_CNT, cnt
     65  1.1  mrg 	mov	PARAM_SIZE, %edx
     66  1.1  mrg 	mov	up, SAVE_UP
     67  1.1  mrg 	mov	PARAM_SRC, up
     68  1.1  mrg 	push	rp			FRAME_pushl()
     69  1.1  mrg 	mov	PARAM_DST, rp
     70  1.1  mrg 
     71  1.1  mrg C We can use faster code for shift-by-1 under certain conditions.
     72  1.1  mrg 	cmp	$1,cnt
     73  1.1  mrg 	jne	L(normal)
     74  1.1  mrg 	cmpl	rp, up
     75  1.1  mrg 	jnc	L(special)		C jump if s_ptr + 1 >= res_ptr
     76  1.1  mrg 	leal	(up,%edx,4),%eax
     77  1.1  mrg 	cmpl	%eax,rp
     78  1.1  mrg 	jnc	L(special)		C jump if res_ptr >= s_ptr + size
     79  1.1  mrg 
     80  1.1  mrg L(normal):
     81  1.1  mrg 	lea	-4(up,%edx,4), up
     82  1.1  mrg 	mov	%ebx, SAVE_EBX
     83  1.1  mrg 	lea	-4(rp,%edx,4), rp
     84  1.1  mrg 
     85  1.1  mrg 	shr	%edx
     86  1.1  mrg 	mov	(up), %eax
     87  1.1  mrg 	mov	%edx, VAR_COUNT
     88  1.1  mrg 	jnc	L(evn)
     89  1.1  mrg 
     90  1.1  mrg 	mov	%eax, %ebx
     91  1.1  mrg 	shl	%cl, %ebx
     92  1.1  mrg 	neg	cnt
     93  1.1  mrg 	shr	%cl, %eax
     94  1.1  mrg 	test	%edx, %edx
     95  1.1  mrg 	jnz	L(gt1)
     96  1.1  mrg 	mov	%ebx, (rp)
     97  1.1  mrg 	jmp	L(quit)
     98  1.1  mrg 
     99  1.1  mrg L(gt1):	mov	%ebp, SAVE_EBP
    100  1.1  mrg 	push	%eax
    101  1.1  mrg 	mov	-4(up), %eax
    102  1.1  mrg 	mov	%eax, %ebp
    103  1.1  mrg 	shr	%cl, %eax
    104  1.1  mrg 	jmp	L(lo1)
    105  1.1  mrg 
    106  1.1  mrg L(evn):	mov	%ebp, SAVE_EBP
    107  1.1  mrg 	neg	cnt
    108  1.1  mrg 	mov	%eax, %ebp
    109  1.1  mrg 	mov	-4(up), %edx
    110  1.1  mrg 	shr	%cl, %eax
    111  1.1  mrg 	mov	%edx, %ebx
    112  1.1  mrg 	shr	%cl, %edx
    113  1.1  mrg 	neg	cnt
    114  1.1  mrg 	decl	VAR_COUNT
    115  1.1  mrg 	lea	4(rp), rp
    116  1.1  mrg 	lea	-4(up), up
    117  1.1  mrg 	jz	L(end)
    118  1.1  mrg 	push	%eax			FRAME_pushl()
    119  1.1  mrg 
    120  1.1  mrg 	ALIGN(8)
    121  1.1  mrg L(top):	shl	%cl, %ebp
    122  1.1  mrg 	or	%ebp, %edx
    123  1.1  mrg 	shl	%cl, %ebx
    124  1.1  mrg 	neg	cnt
    125  1.1  mrg 	mov	-4(up), %eax
    126  1.1  mrg 	mov	%eax, %ebp
    127  1.1  mrg 	mov	%edx, -4(rp)
    128  1.1  mrg 	shr	%cl, %eax
    129  1.1  mrg 	lea	-8(rp), rp
    130  1.1  mrg L(lo1):	mov	-8(up), %edx
    131  1.1  mrg 	or	%ebx, %eax
    132  1.1  mrg 	mov	%edx, %ebx
    133  1.1  mrg 	shr	%cl, %edx
    134  1.1  mrg 	lea	-8(up), up
    135  1.1  mrg 	neg	cnt
    136  1.1  mrg 	mov	%eax, (rp)
    137  1.1  mrg 	decl	VAR_COUNT
    138  1.1  mrg 	jg	L(top)
    139  1.1  mrg 
    140  1.1  mrg 	pop	%eax			FRAME_popl()
    141  1.1  mrg L(end):
    142  1.1  mrg 	shl	%cl, %ebp
    143  1.1  mrg 	shl	%cl, %ebx
    144  1.1  mrg 	or	%ebp, %edx
    145  1.1  mrg 	mov	SAVE_EBP, %ebp
    146  1.1  mrg 	mov	%edx, -4(rp)
    147  1.1  mrg 	mov	%ebx, -8(rp)
    148  1.1  mrg 
    149  1.1  mrg L(quit):
    150  1.1  mrg 	mov	SAVE_UP, up
    151  1.1  mrg 	mov	SAVE_EBX, %ebx
    152  1.1  mrg 	pop	rp			FRAME_popl()
    153  1.1  mrg 	ret
    154  1.1  mrg 
    155  1.1  mrg L(special):
    156  1.1  mrg deflit(`FRAME',4)
    157  1.1  mrg 	lea	3(%edx), %eax		C size + 3
    158  1.1  mrg 	dec	%edx			C size - 1
    159  1.1  mrg 	mov	(up), %ecx
    160  1.1  mrg 	shr	$2, %eax		C (size + 3) / 4
    161  1.1  mrg 	and	$3, %edx		C (size - 1) % 4
    162  1.1  mrg 	jz	L(goloop)		C jmp if  size == 1 (mod 4)
    163  1.1  mrg 	shr	%edx
    164  1.1  mrg 	jnc	L(odd)			C jum if  size == 3 (mod 4)
    165  1.1  mrg 
    166  1.1  mrg 	add	%ecx, %ecx
    167  1.1  mrg 	lea	4(up), up
    168  1.1  mrg 	mov	%ecx, (rp)
    169  1.1  mrg 	mov	(up), %ecx
    170  1.1  mrg 	lea	4(rp), rp
    171  1.1  mrg 
    172  1.1  mrg 	dec	%edx
    173  1.1  mrg 	jnz	L(goloop)		C jump if  size == 0 (mod 4)
    174  1.1  mrg L(odd):	lea	-8(up), up
    175  1.1  mrg 	lea	-8(rp), rp
    176  1.1  mrg 	jmp	L(sentry)		C reached if size == 2 or 3 (mod 4)
    177  1.1  mrg 
    178  1.1  mrg L(sloop):
    179  1.1  mrg 	adc	%ecx, %ecx
    180  1.1  mrg 	mov	4(up), %edx
    181  1.1  mrg 	mov	%ecx, (rp)
    182  1.1  mrg 	adc	%edx, %edx
    183  1.1  mrg 	mov	8(up), %ecx
    184  1.1  mrg 	mov	%edx, 4(rp)
    185  1.1  mrg L(sentry):
    186  1.1  mrg 	adc	%ecx, %ecx
    187  1.1  mrg 	mov	12(up), %edx
    188  1.1  mrg 	mov	%ecx, 8(rp)
    189  1.1  mrg 	adc	%edx, %edx
    190  1.1  mrg 	lea	16(up), up
    191  1.1  mrg 	mov	%edx, 12(rp)
    192  1.1  mrg 	lea	16(rp), rp
    193  1.1  mrg 	mov	(up), %ecx
    194  1.1  mrg L(goloop):
    195  1.1  mrg 	decl	%eax
    196  1.1  mrg 	jnz	L(sloop)
    197  1.1  mrg 
    198  1.1  mrg L(squit):
    199  1.1  mrg 	adc	%ecx, %ecx
    200  1.1  mrg 	mov	%ecx, (rp)
    201  1.1  mrg 	adc	%eax, %eax
    202  1.1  mrg 
    203  1.1  mrg 	mov	SAVE_UP, up
    204  1.1  mrg 	pop	rp			FRAME_popl()
    205  1.1  mrg 	ret
    206  1.1  mrg EPILOGUE()
    207  1.1  mrg ASM_END()
    208