Home | History | Annotate | Line # | Download | only in alpha
      1 dnl  Alpha mpn_sec_tabselect.
      2 
      3 dnl  Contributed to the GNU project by Torbjrn Granlund.
      4 
      5 dnl  Copyright 2011-2013 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C      cycles/limb
     36 C EV4:      ?
     37 C EV5:      2.25
     38 C EV6:      1.64
     39 
     40 define(`rp',     `r16')
     41 define(`tp',     `r17')
     42 define(`n',      `r18')
     43 define(`nents',  `r19')
     44 define(`which',  `r20')
     45 
     46 define(`i',      `r21')
     47 define(`j',      `r22')
     48 define(`stride', `r23')
     49 define(`mask',   `r24')
     50 define(`k',      `r25')
     51 
     52 
     53 ASM_START()
     54 PROLOGUE(mpn_sec_tabselect)
     55 	subq	n, 4, j			C outer loop induction variable
     56 
     57 	blt	j, L(outer_end)
     58 L(outer_top):
     59 	mov	tp, r8
     60 	lda	r0, 0(r31)
     61 	lda	r1, 0(r31)
     62 	lda	r2, 0(r31)
     63 	lda	r3, 0(r31)
     64 	subq	j, 4, j			C outer loop induction variable
     65 	subq	nents, which, k
     66 	mov	nents, i
     67 
     68 	ALIGN(16)
     69 L(top):	ldq	r4, 0(tp)
     70 	ldq	r5, 8(tp)
     71 	cmpeq	k, i, mask
     72 	subq	i, 1, i
     73 	subq	r31, mask, mask
     74 	ldq	r6, 16(tp)
     75 	ldq	r7, 24(tp)
     76 	and	r4, mask, r4
     77 	and	r5, mask, r5
     78 	or	r0, r4, r0
     79 	or	r1, r5, r1
     80 	and	r6, mask, r6
     81 	and	r7, mask, r7
     82 	or	r2, r6, r2
     83 	or	r3, r7, r3
     84 	s8addq	n, tp, tp
     85 	bne	i, L(top)
     86 
     87 	stq	r0, 0(rp)
     88 	stq	r1, 8(rp)
     89 	stq	r2, 16(rp)
     90 	stq	r3, 24(rp)
     91 	addq	r8, 32, tp
     92 	addq	rp, 32, rp
     93 	bge	j, L(outer_top)
     94 L(outer_end):
     95 
     96 	and	n, 2, r0
     97 	beq	r0, L(b0x)
     98 L(b1x):	mov	tp, r8
     99 	lda	r0, 0(r31)
    100 	lda	r1, 0(r31)
    101 	subq	nents, which, k
    102 	mov	nents, i
    103 	ALIGN(16)
    104 L(tp2):	ldq	r4, 0(tp)
    105 	ldq	r5, 8(tp)
    106 	cmpeq	k, i, mask
    107 	subq	i, 1, i
    108 	subq	r31, mask, mask
    109 	and	r4, mask, r4
    110 	and	r5, mask, r5
    111 	or	r0, r4, r0
    112 	or	r1, r5, r1
    113 	s8addq	n, tp, tp
    114 	bne	i, L(tp2)
    115 	stq	r0, 0(rp)
    116 	stq	r1, 8(rp)
    117 	addq	r8, 16, tp
    118 	addq	rp, 16, rp
    119 
    120 L(b0x):	and	n, 1, r0
    121 	beq	r0, L(b00)
    122 L(b01):	lda	r0, 0(r31)
    123 	subq	nents, which, k
    124 	mov	nents, i
    125 	ALIGN(16)
    126 L(tp1):	ldq	r4, 0(tp)
    127 	cmpeq	k, i, mask
    128 	subq	i, 1, i
    129 	subq	r31, mask, mask
    130 	and	r4, mask, r4
    131 	or	r0, r4, r0
    132 	s8addq	n, tp, tp
    133 	bne	i, L(tp1)
    134 	stq	r0, 0(rp)
    135 
    136 L(b00):	ret	r31, (r26), 1
    137 EPILOGUE()
    138