Home | History | Annotate | Line # | Download | only in atom
      1 dnl  Intel Atom mpn_lshiftc -- mpn left shift with complement.
      2 
      3 dnl  Copyright 2011 Free Software Foundation, Inc.
      4 
      5 dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
     36 C			 unsigned cnt);
     37 
     38 C				cycles/limb
     39 C P5
     40 C P6 model 0-8,10-12
     41 C P6 model 9  (Banias)
     42 C P6 model 13 (Dothan)
     43 C P4 model 0  (Willamette)
     44 C P4 model 1  (?)
     45 C P4 model 2  (Northwood)
     46 C P4 model 3  (Prescott)
     47 C P4 model 4  (Nocona)
     48 C Intel Atom			 5.5
     49 C AMD K6
     50 C AMD K7
     51 C AMD K8
     52 C AMD K10
     53 
     54 defframe(PARAM_CNT, 16)
     55 defframe(PARAM_SIZE,12)
     56 defframe(PARAM_SRC,  8)
     57 defframe(PARAM_DST,  4)
     58 
     59 dnl  re-use parameter space
     60 define(SAVE_UP,`PARAM_CNT')
     61 define(VAR_COUNT,`PARAM_SIZE')
     62 define(SAVE_EBX,`PARAM_SRC')
     63 define(SAVE_EBP,`PARAM_DST')
     64 
     65 define(`rp',  `%edi')
     66 define(`up',  `%esi')
     67 define(`cnt',  `%ecx')
     68 
     69 ASM_START()
     70 	TEXT
     71 
     72 PROLOGUE(mpn_lshiftc)
     73 deflit(`FRAME',0)
     74 	mov	PARAM_CNT, cnt
     75 	mov	PARAM_SIZE, %edx
     76 	mov	up, SAVE_UP
     77 	mov	PARAM_SRC, up
     78 	push	rp			FRAME_pushl()
     79 	mov	PARAM_DST, rp
     80 
     81 	lea	-4(up,%edx,4), up
     82 	mov	%ebx, SAVE_EBX
     83 	lea	-4(rp,%edx,4), rp
     84 
     85 	shr	%edx
     86 	mov	(up), %eax
     87 	mov	%edx, VAR_COUNT
     88 	jnc	L(evn)
     89 
     90 	mov	%eax, %ebx
     91 	shl	%cl, %ebx
     92 	neg	cnt
     93 	shr	%cl, %eax
     94 	test	%edx, %edx
     95 	jnz	L(gt1)
     96 	not	%ebx
     97 	mov	%ebx, (rp)
     98 	jmp	L(quit)
     99 
    100 L(gt1):	mov	%ebp, SAVE_EBP
    101 	push	%eax
    102 	mov	-4(up), %eax
    103 	mov	%eax, %ebp
    104 	shr	%cl, %eax
    105 	jmp	L(lo1)
    106 
    107 L(evn):	mov	%ebp, SAVE_EBP
    108 	neg	cnt
    109 	mov	%eax, %ebp
    110 	mov	-4(up), %edx
    111 	shr	%cl, %eax
    112 	mov	%edx, %ebx
    113 	shr	%cl, %edx
    114 	neg	cnt
    115 	decl	VAR_COUNT
    116 	lea	4(rp), rp
    117 	lea	-4(up), up
    118 	jz	L(end)
    119 	push	%eax			FRAME_pushl()
    120 
    121 L(top):	shl	%cl, %ebp
    122 	or	%ebp, %edx
    123 	shl	%cl, %ebx
    124 	neg	cnt
    125 	not	%edx
    126 	mov	-4(up), %eax
    127 	mov	%eax, %ebp
    128 	mov	%edx, -4(rp)
    129 	shr	%cl, %eax
    130 	lea	-8(rp), rp
    131 L(lo1):	mov	-8(up), %edx
    132 	or	%ebx, %eax
    133 	mov	%edx, %ebx
    134 	shr	%cl, %edx
    135 	not	%eax
    136 	lea	-8(up), up
    137 	neg	cnt
    138 	mov	%eax, (rp)
    139 	decl	VAR_COUNT
    140 	jg	L(top)
    141 
    142 	pop	%eax			FRAME_popl()
    143 L(end):
    144 	shl	%cl, %ebp
    145 	shl	%cl, %ebx
    146 	or	%ebp, %edx
    147 	mov	SAVE_EBP, %ebp
    148 	not	%edx
    149 	not	%ebx
    150 	mov	%edx, -4(rp)
    151 	mov	%ebx, -8(rp)
    152 
    153 L(quit):
    154 	mov	SAVE_UP, up
    155 	mov	SAVE_EBX, %ebx
    156 	pop	rp			FRAME_popl()
    157 	ret
    158 EPILOGUE()
    159 ASM_END()
    160