Home | History | Annotate | Line # | Download | only in atom
      1      1.1  mrg dnl  Intel Atom mpn_lshift -- mpn left shift.
      2      1.1  mrg 
      3      1.1  mrg dnl  Copyright 2011 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
      6      1.1  mrg 
      7      1.1  mrg dnl  This file is part of the GNU MP Library.
      8      1.1  mrg dnl
      9  1.1.1.2  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1.1.2  mrg dnl  it under the terms of either:
     11      1.1  mrg dnl
     12  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1.1.2  mrg dnl      option) any later version.
     15  1.1.1.2  mrg dnl
     16  1.1.1.2  mrg dnl  or
     17  1.1.1.2  mrg dnl
     18  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1.1.2  mrg dnl      later version.
     21  1.1.1.2  mrg dnl
     22  1.1.1.2  mrg dnl  or both in parallel, as here.
     23  1.1.1.2  mrg dnl
     24  1.1.1.2  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25  1.1.1.2  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1.1.2  mrg dnl  for more details.
     28  1.1.1.2  mrg dnl
     29  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     32      1.1  mrg 
     33      1.1  mrg include(`../config.m4')
     34      1.1  mrg 
     35      1.1  mrg C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
     36      1.1  mrg C			unsigned cnt);
     37      1.1  mrg 
     38      1.1  mrg C				  cycles/limb
     39      1.1  mrg C				cnt!=1	cnt==1
     40      1.1  mrg C P5
     41      1.1  mrg C P6 model 0-8,10-12
     42      1.1  mrg C P6 model 9  (Banias)
     43      1.1  mrg C P6 model 13 (Dothan)
     44      1.1  mrg C P4 model 0  (Willamette)
     45      1.1  mrg C P4 model 1  (?)
     46      1.1  mrg C P4 model 2  (Northwood)
     47      1.1  mrg C P4 model 3  (Prescott)
     48      1.1  mrg C P4 model 4  (Nocona)
     49      1.1  mrg C Intel Atom			 5	 2.5
     50      1.1  mrg C AMD K6
     51      1.1  mrg C AMD K7
     52      1.1  mrg C AMD K8
     53      1.1  mrg C AMD K10
     54      1.1  mrg 
     55      1.1  mrg defframe(PARAM_CNT, 16)
     56      1.1  mrg defframe(PARAM_SIZE,12)
     57      1.1  mrg defframe(PARAM_SRC,  8)
     58      1.1  mrg defframe(PARAM_DST,  4)
     59      1.1  mrg 
     60      1.1  mrg dnl  re-use parameter space
     61      1.1  mrg define(SAVE_UP,`PARAM_CNT')
     62      1.1  mrg define(VAR_COUNT,`PARAM_SIZE')
     63      1.1  mrg define(SAVE_EBX,`PARAM_SRC')
     64      1.1  mrg define(SAVE_EBP,`PARAM_DST')
     65      1.1  mrg 
     66      1.1  mrg define(`rp',  `%edi')
     67      1.1  mrg define(`up',  `%esi')
     68      1.1  mrg define(`cnt',  `%ecx')
     69      1.1  mrg 
     70      1.1  mrg ASM_START()
     71      1.1  mrg 	TEXT
     72      1.1  mrg 	ALIGN(8)
     73      1.1  mrg deflit(`FRAME',0)
     74      1.1  mrg PROLOGUE(mpn_lshift)
     75      1.1  mrg 	mov	PARAM_CNT, cnt
     76      1.1  mrg 	mov	PARAM_SIZE, %edx
     77      1.1  mrg 	mov	up, SAVE_UP
     78      1.1  mrg 	mov	PARAM_SRC, up
     79      1.1  mrg 	push	rp			FRAME_pushl()
     80      1.1  mrg 	mov	PARAM_DST, rp
     81      1.1  mrg 
     82      1.1  mrg C We can use faster code for shift-by-1 under certain conditions.
     83      1.1  mrg 	cmp	$1,cnt
     84      1.1  mrg 	jne	L(normal)
     85      1.1  mrg 	cmpl	rp, up
     86      1.1  mrg 	jnc	L(special)		C jump if s_ptr + 1 >= res_ptr
     87      1.1  mrg 	leal	(up,%edx,4),%eax
     88      1.1  mrg 	cmpl	%eax,rp
     89      1.1  mrg 	jnc	L(special)		C jump if res_ptr >= s_ptr + size
     90      1.1  mrg 
     91      1.1  mrg L(normal):
     92      1.1  mrg 	lea	-4(up,%edx,4), up
     93      1.1  mrg 	mov	%ebx, SAVE_EBX
     94      1.1  mrg 	lea	-4(rp,%edx,4), rp
     95      1.1  mrg 
     96      1.1  mrg 	shr	%edx
     97      1.1  mrg 	mov	(up), %eax
     98      1.1  mrg 	mov	%edx, VAR_COUNT
     99      1.1  mrg 	jnc	L(evn)
    100      1.1  mrg 
    101      1.1  mrg 	mov	%eax, %ebx
    102      1.1  mrg 	shl	%cl, %ebx
    103      1.1  mrg 	neg	cnt
    104      1.1  mrg 	shr	%cl, %eax
    105      1.1  mrg 	test	%edx, %edx
    106      1.1  mrg 	jnz	L(gt1)
    107      1.1  mrg 	mov	%ebx, (rp)
    108      1.1  mrg 	jmp	L(quit)
    109      1.1  mrg 
    110      1.1  mrg L(gt1):	mov	%ebp, SAVE_EBP
    111      1.1  mrg 	push	%eax
    112      1.1  mrg 	mov	-4(up), %eax
    113      1.1  mrg 	mov	%eax, %ebp
    114      1.1  mrg 	shr	%cl, %eax
    115      1.1  mrg 	jmp	L(lo1)
    116      1.1  mrg 
    117      1.1  mrg L(evn):	mov	%ebp, SAVE_EBP
    118      1.1  mrg 	neg	cnt
    119      1.1  mrg 	mov	%eax, %ebp
    120      1.1  mrg 	mov	-4(up), %edx
    121      1.1  mrg 	shr	%cl, %eax
    122      1.1  mrg 	mov	%edx, %ebx
    123      1.1  mrg 	shr	%cl, %edx
    124      1.1  mrg 	neg	cnt
    125      1.1  mrg 	decl	VAR_COUNT
    126      1.1  mrg 	lea	4(rp), rp
    127      1.1  mrg 	lea	-4(up), up
    128      1.1  mrg 	jz	L(end)
    129      1.1  mrg 	push	%eax			FRAME_pushl()
    130      1.1  mrg 
    131      1.1  mrg 	ALIGN(8)
    132      1.1  mrg L(top):	shl	%cl, %ebp
    133      1.1  mrg 	or	%ebp, %edx
    134      1.1  mrg 	shl	%cl, %ebx
    135      1.1  mrg 	neg	cnt
    136      1.1  mrg 	mov	-4(up), %eax
    137      1.1  mrg 	mov	%eax, %ebp
    138      1.1  mrg 	mov	%edx, -4(rp)
    139      1.1  mrg 	shr	%cl, %eax
    140      1.1  mrg 	lea	-8(rp), rp
    141      1.1  mrg L(lo1):	mov	-8(up), %edx
    142      1.1  mrg 	or	%ebx, %eax
    143      1.1  mrg 	mov	%edx, %ebx
    144      1.1  mrg 	shr	%cl, %edx
    145      1.1  mrg 	lea	-8(up), up
    146      1.1  mrg 	neg	cnt
    147      1.1  mrg 	mov	%eax, (rp)
    148      1.1  mrg 	decl	VAR_COUNT
    149      1.1  mrg 	jg	L(top)
    150      1.1  mrg 
    151      1.1  mrg 	pop	%eax			FRAME_popl()
    152      1.1  mrg L(end):
    153      1.1  mrg 	shl	%cl, %ebp
    154      1.1  mrg 	shl	%cl, %ebx
    155      1.1  mrg 	or	%ebp, %edx
    156      1.1  mrg 	mov	SAVE_EBP, %ebp
    157      1.1  mrg 	mov	%edx, -4(rp)
    158      1.1  mrg 	mov	%ebx, -8(rp)
    159      1.1  mrg 
    160      1.1  mrg L(quit):
    161      1.1  mrg 	mov	SAVE_UP, up
    162      1.1  mrg 	mov	SAVE_EBX, %ebx
    163      1.1  mrg 	pop	rp			FRAME_popl()
    164      1.1  mrg 	ret
    165      1.1  mrg 
    166      1.1  mrg L(special):
    167      1.1  mrg deflit(`FRAME',4)
    168      1.1  mrg 	lea	3(%edx), %eax		C size + 3
    169      1.1  mrg 	dec	%edx			C size - 1
    170      1.1  mrg 	mov	(up), %ecx
    171      1.1  mrg 	shr	$2, %eax		C (size + 3) / 4
    172      1.1  mrg 	and	$3, %edx		C (size - 1) % 4
    173      1.1  mrg 	jz	L(goloop)		C jmp if  size == 1 (mod 4)
    174      1.1  mrg 	shr	%edx
    175      1.1  mrg 	jnc	L(odd)			C jum if  size == 3 (mod 4)
    176      1.1  mrg 
    177      1.1  mrg 	add	%ecx, %ecx
    178      1.1  mrg 	lea	4(up), up
    179      1.1  mrg 	mov	%ecx, (rp)
    180      1.1  mrg 	mov	(up), %ecx
    181      1.1  mrg 	lea	4(rp), rp
    182      1.1  mrg 
    183      1.1  mrg 	dec	%edx
    184      1.1  mrg 	jnz	L(goloop)		C jump if  size == 0 (mod 4)
    185      1.1  mrg L(odd):	lea	-8(up), up
    186      1.1  mrg 	lea	-8(rp), rp
    187      1.1  mrg 	jmp	L(sentry)		C reached if size == 2 or 3 (mod 4)
    188      1.1  mrg 
    189      1.1  mrg L(sloop):
    190      1.1  mrg 	adc	%ecx, %ecx
    191      1.1  mrg 	mov	4(up), %edx
    192      1.1  mrg 	mov	%ecx, (rp)
    193      1.1  mrg 	adc	%edx, %edx
    194      1.1  mrg 	mov	8(up), %ecx
    195      1.1  mrg 	mov	%edx, 4(rp)
    196      1.1  mrg L(sentry):
    197      1.1  mrg 	adc	%ecx, %ecx
    198      1.1  mrg 	mov	12(up), %edx
    199      1.1  mrg 	mov	%ecx, 8(rp)
    200      1.1  mrg 	adc	%edx, %edx
    201      1.1  mrg 	lea	16(up), up
    202      1.1  mrg 	mov	%edx, 12(rp)
    203      1.1  mrg 	lea	16(rp), rp
    204      1.1  mrg 	mov	(up), %ecx
    205      1.1  mrg L(goloop):
    206      1.1  mrg 	decl	%eax
    207      1.1  mrg 	jnz	L(sloop)
    208      1.1  mrg 
    209      1.1  mrg L(squit):
    210      1.1  mrg 	adc	%ecx, %ecx
    211      1.1  mrg 	mov	%ecx, (rp)
    212      1.1  mrg 	adc	%eax, %eax
    213      1.1  mrg 
    214      1.1  mrg 	mov	SAVE_UP, up
    215      1.1  mrg 	pop	rp			FRAME_popl()
    216      1.1  mrg 	ret
    217      1.1  mrg EPILOGUE()
    218      1.1  mrg ASM_END()
    219