Home | History | Annotate | Line # | Download | only in p9
      1 dnl  Power9 mpn_addmul_1.
      2 
      3 dnl  Copyright 2017, 2018 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C                   cycles/limb
     34 C POWER3/PPC630		 -
     35 C POWER4/PPC970		 -
     36 C POWER5		 -
     37 C POWER6		 -
     38 C POWER7		 -
     39 C POWER8		 -
     40 C POWER9		 2.5
     41 
     42 C TODO
     43 C  * Schedule for Power9 pipeline.
     44 C  * Unroll 4x if that proves beneficial.
     45 C  * This is marginally faster (but much smaller) than ../aorsmul_1.asm.
     46 
     47 C INPUT PARAMETERS
     48 define(`rp', `r3')
     49 define(`up', `r4')
     50 define(`n',  `r5')
     51 define(`v0', `r6')
     52 
     53 ASM_START()
     54 PROLOGUE(mpn_addmul_1)
     55 	cmpdi	cr6, n, 2
     56 	addi	r0, n, -1	C FIXME: postpone
     57 	srdi	r0, r0, 1	C FIXME: postpone
     58 	mtctr	r0		C FIXME: postpone
     59 	rldicl.	r0, n, 0,63	C r0 = n & 3, set cr0
     60 	bne	cr0, L(b1)
     61 
     62 L(b0):	ld	r10, 0(rp)
     63 	ld	r12, 0(up)
     64 	ld	r11, 8(rp)
     65 	ld	r0, 8(up)
     66 	maddld(	r9, r12, v0, r10)
     67 	maddhdu(r7, r12, v0, r10)
     68 	ble	cr6, L(2)
     69 	ld	r10, 16(rp)
     70 	ld	r12, 16(up)
     71 	maddld(	r8, r0, v0, r11)
     72 	maddhdu(r5, r0, v0, r11)
     73 	addic	up, up, 16
     74 	addi	rp, rp, -8
     75 	b	L(mid)
     76 
     77 L(b1):	ld	r11, 0(rp)
     78 	ld	r0, 0(up)
     79 	ble	cr6, L(1)
     80 	ld	r10, 8(rp)
     81 	ld	r12, 8(up)
     82 	maddld(	r8, r0, v0, r11)
     83 	maddhdu(r5, r0, v0, r11)
     84 	ld	r11, 16(rp)
     85 	ld	r0, 16(up)
     86 	maddld(	r9, r12, v0, r10)
     87 	maddhdu(r7, r12, v0, r10)
     88 	addic	up, up, 24
     89 	bdz	L(end)
     90 
     91 	ALIGN(16)
     92 L(top):	ld	r10, 24(rp)
     93 	ld	r12, 0(up)
     94 	std	r8, 0(rp)
     95 	adde	r9, r5, r9
     96 	maddld(	r8, r0, v0, r11)	C W:0,2,4
     97 	maddhdu(r5, r0, v0, r11)	C W:1,3,5
     98 L(mid):	ld	r11, 32(rp)
     99 	ld	r0, 8(up)
    100 	std	r9, 8(rp)
    101 	adde	r8, r7, r8
    102 	maddld(	r9, r12, v0, r10)	C W:1,3,5
    103 	maddhdu(r7, r12, v0, r10)	C W:2,4,6
    104 	addi	rp, rp, 16
    105 	addi	up, up, 16
    106 	bdnz	L(top)
    107 
    108 L(end):	std	r8, 0(rp)
    109 	maddld(	r8, r0, v0, r11)
    110 	adde	r9, r5, r9
    111 	maddhdu(r5, r0, v0, r11)
    112 	std	r9, 8(rp)
    113 	adde	r8, r7, r8
    114 	std	r8, 16(rp)
    115 	addze	r3, r5
    116 	blr
    117 
    118 L(2):	maddld(	r8, r0, v0, r11)
    119 	maddhdu(r5, r0, v0, r11)
    120 	std	r9, 0(rp)
    121 	addc	r8, r7, r8
    122 	std	r8, 8(rp)
    123 	addze	r3, r5
    124 	blr
    125 
    126 L(1):	maddld(	r8,  r0, v0, r11)
    127 	std	r8, 0(rp)
    128 	maddhdu(r3, r0, v0, r11)
    129 	blr
    130 EPILOGUE()
    131