Home | History | Annotate | Line # | Download | only in s390_64
      1 dnl  S/390-64 mpn_add_n and mpn_sub_n.
      2 
      3 dnl  Copyright 2011 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C            cycles/limb
     34 C z900		 5.5
     35 C z990		 3
     36 C z9		 ?
     37 C z10		 6
     38 C z196		 ?
     39 
     40 C TODO
     41 C  * Optimise for small n
     42 C  * Use r0 and save/restore one less register
     43 C  * Using logops_n's v1 inner loop operand order make the loop about 20%
     44 C    faster, at the expense of highly alignment-dependent performance.
     45 
     46 C INPUT PARAMETERS
     47 define(`rp',	`%r2')
     48 define(`up',	`%r3')
     49 define(`vp',	`%r4')
     50 define(`n',	`%r5')
     51 
     52 ifdef(`OPERATION_add_n', `
     53   define(ADSB,		alg)
     54   define(ADSBCR,	alcgr)
     55   define(ADSBC,		alcg)
     56   define(RETVAL,`dnl
     57 	lghi	%r2, 0
     58 	alcgr	%r2, %r2')
     59   define(func,		mpn_add_n)
     60   define(func_nc,	mpn_add_nc)')
     61 ifdef(`OPERATION_sub_n', `
     62   define(ADSB,		slg)
     63   define(ADSBCR,	slbgr)
     64   define(ADSBC,		slbg)
     65   define(RETVAL,`dnl
     66 	slbgr	%r2, %r2
     67 	lcgr	%r2, %r2')
     68   define(func,		mpn_sub_n)
     69   define(func_nc,	mpn_sub_nc)')
     70 
     71 MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
     72 
     73 ASM_START()
     74 PROLOGUE(func)
     75 	stmg	%r6, %r8, 48(%r15)
     76 
     77 	aghi	n, 3
     78 	lghi	%r7, 3
     79 	srlg	%r1, n, 2
     80 	ngr	%r7, n			C n mod 4
     81 	je	L(b1)
     82 	cghi	%r7, 2
     83 	jl	L(b2)
     84 	jne	L(b0)
     85 
     86 L(b3):	lmg	%r5, %r7, 0(up)
     87 	la	up, 24(up)
     88 	ADSB	%r5, 0(vp)
     89 	ADSBC	%r6, 8(vp)
     90 	ADSBC	%r7, 16(vp)
     91 	la	vp, 24(vp)
     92 	stmg	%r5, %r7, 0(rp)
     93 	la	rp, 24(rp)
     94 	brctg	%r1, L(top)
     95 	j	L(end)
     96 
     97 L(b0):	lmg	%r5, %r8, 0(up)		C This redundant insns is no mistake,
     98 	la	up, 32(up)		C it is needed to make main loop run
     99 	ADSB	%r5, 0(vp)		C fast for n = 0 (mod 4).
    100 	ADSBC	%r6, 8(vp)
    101 	j	L(m0)
    102 
    103 L(b1):	lg	%r5, 0(up)
    104 	la	up, 8(up)
    105 	ADSB	%r5, 0(vp)
    106 	la	vp, 8(vp)
    107 	stg	%r5, 0(rp)
    108 	la	rp, 8(rp)
    109 	brctg	%r1, L(top)
    110 	j	L(end)
    111 
    112 L(b2):	lmg	%r5, %r6, 0(up)
    113 	la	up, 16(up)
    114 	ADSB	%r5, 0(vp)
    115 	ADSBC	%r6, 8(vp)
    116 	la	vp, 16(vp)
    117 	stmg	%r5, %r6, 0(rp)
    118 	la	rp, 16(rp)
    119 	brctg	%r1, L(top)
    120 	j	L(end)
    121 
    122 L(top):	lmg	%r5, %r8, 0(up)
    123 	la	up, 32(up)
    124 	ADSBC	%r5, 0(vp)
    125 	ADSBC	%r6, 8(vp)
    126 L(m0):	ADSBC	%r7, 16(vp)
    127 	ADSBC	%r8, 24(vp)
    128 	la	vp, 32(vp)
    129 	stmg	%r5, %r8, 0(rp)
    130 	la	rp, 32(rp)
    131 	brctg	%r1, L(top)
    132 
    133 L(end):	RETVAL
    134 	lmg	%r6, %r8, 48(%r15)
    135 	br	%r14
    136 EPILOGUE()
    137