Home | History | Annotate | Line # | Download | only in p9
      1 dnl  POWER9 mpn_addmul_1 and mpn_submul_1.
      2 
      3 dnl  Copyright 2018 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C                   mpn_addmul_1    mpn_submul_1
     34 C                   cycles/limb     cycles/limb
     35 C POWER3/PPC630		 -		 -
     36 C POWER4/PPC970		 -		 -
     37 C POWER5		 -		 -
     38 C POWER6		 -		 -
     39 C POWER7		 -		 -
     40 C POWER8		 -		 -
     41 C POWER9		 2.63		 2.63
     42 
     43 C INPUT PARAMETERS
     44 define(`rp', `r3')
     45 define(`up', `r4')
     46 define(`n',  `r5')
     47 define(`v0', `r6')
     48 
     49 
     50 ifdef(`OPERATION_addmul_1',`
     51   define(`ADDSUBC',	adde)
     52   define(`ADDSUB',	addc)
     53   define(`func',	mpn_addmul_1)
     54   define(`AM',		`$1')
     55   define(`SM',		`')
     56 ')
     57 ifdef(`OPERATION_submul_1',`
     58   define(`ADDSUBC',	subfe)
     59   define(`ADDSUB',	subfc)
     60   define(`func',	mpn_submul_1)
     61   define(`AM',		`')
     62   define(`SM',		`$1')
     63 ')
     64 
     65 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
     66 
     67 ASM_START()
     68 PROLOGUE(func)
     69 	cmpdi	cr7, n, 3
     70 	srdi	r10, n, 2
     71 	mtctr	r10
     72 	rldicl.	r9, n, 0, 63
     73 	ld	r11, 0(up)
     74 	bne	cr0, L(bx1)
     75 
     76 L(bx0):	rldicl. r9, n, 63, 63
     77 AM(`	subfzeo	r12, n		')	C ov = 0, ca = 0
     78 AM(`	li	r12, 0		')
     79 SM(`	subfco	r12, r12, r12	')	C r12 = 0, ov = 0, ca = 1
     80 	ld	r9, 8(up)
     81 	mulld	r0, r11, v0
     82 	mulhdu	r5, r11, v0
     83 	blt	cr7, L(2)
     84 	ld	r8, 16(up)
     85 	bne	cr0, L(b10)
     86 
     87 L(b00):	addi	rp, rp, -24
     88 	b	L(lo0)
     89 L(b10):	addi	rp, rp, -8
     90 	addi	up, up, 16
     91 	b	L(lo2)
     92 
     93 L(2):	addi	rp, rp, -8
     94 	b	L(cj2)
     95 
     96 L(bx1):	rldicl. r9, n, 63, 63
     97 AM(`	subfzeo	r5, n		')	C ov = 0, ca = 0
     98 AM(`	li	r5, 0		')
     99 SM(`	subfco	r5, r5, r5	')	C r5 = 0, ov = 0, ca = 1
    100 	blt	cr7, L(1)
    101 	ld	r8, 8(up)
    102 	mulld	r7, r11, v0
    103 	mulhdu	r12, r11, v0
    104 	ld	r9, 16(up)
    105 	bne	cr0, L(b11)
    106 
    107 L(b01):	addi	rp, rp, -16
    108 	addi	up, up, 8
    109 	b	L(lo1)
    110 
    111 L(1):	mulld	r7, r11, v0
    112 	mulhdu	r12, r11, v0
    113 	ld	r11, 0(rp)
    114 	ADDSUB	r10, r7, r11
    115 	std	r10, 0(rp)
    116 AM(`	addze	r3, r12		')
    117 SM(`	subfe	r0, r0, r0	')
    118 SM(`	sub	r3, r12, r0	')
    119 	blr
    120 
    121 L(b11):	addi	up, up, 24
    122 	ble	cr7, L(end)
    123 
    124 	ALIGN(16)
    125 L(top):	ld	r11, 0(rp)
    126 	mulld	r0, r8, v0
    127 	addex(	r7, r7, r5, 0)
    128 	mulhdu	r5, r8, v0
    129 	ld	r8, 0(up)
    130 	ADDSUBC	r10, r7, r11
    131 	std	r10, 0(rp)
    132 L(lo2):	ld	r11, 8(rp)
    133 	mulld	r7, r9, v0
    134 	addex(	r0, r0, r12, 0)
    135 	mulhdu	r12, r9, v0
    136 	ld	r9, 8(up)
    137 	ADDSUBC	r10, r0, r11
    138 	std	r10, 8(rp)
    139 L(lo1):	ld	r11, 16(rp)
    140 	mulld	r0, r8, v0
    141 	addex(	r7, r7, r5, 0)
    142 	mulhdu	r5, r8, v0
    143 	ld	r8, 16(up)
    144 	ADDSUBC	r10, r7, r11
    145 	std	r10, 16(rp)
    146 L(lo0):	ld	r11, 24(rp)
    147 	mulld	r7, r9, v0
    148 	addex(	r0, r0, r12, 0)
    149 	mulhdu	r12, r9, v0
    150 	ld	r9, 24(up)
    151 	ADDSUBC	r10, r0, r11
    152 	std	r10, 24(rp)
    153 	addi	up, up, 32
    154 	addi	rp, rp, 32
    155 	bdnz	L(top)
    156 
    157 L(end):	ld	r11, 0(rp)
    158 	mulld	r0, r8, v0
    159 	addex(	r7, r7, r5, 0)
    160 	mulhdu	r5, r8, v0
    161 	ADDSUBC	r10, r7, r11
    162 	std	r10, 0(rp)
    163 L(cj2):	ld	r11, 8(rp)
    164 	mulld	r7, r9, v0
    165 	addex(	r0, r0, r12, 0)
    166 	mulhdu	r12, r9, v0
    167 	ADDSUBC	r10, r0, r11
    168 	std	r10, 8(rp)
    169 	ld	r11, 16(rp)
    170 	addex(	r7, r7, r5, 0)
    171 	ADDSUBC	r10, r7, r11
    172 	std	r10, 16(rp)
    173 	li	r0, 0
    174 	addex(	r3, r12, r0, 0)
    175 AM(`	addze	r3, r3		')
    176 SM(`	subfe	r0, r0, r0	')
    177 SM(`	sub	r3, r3, r0	')
    178 	blr
    179 EPILOGUE()
    180