1 1.1 mrg dnl S/390-64 mpn_mul_basecase. 2 1.1 mrg 3 1.1 mrg dnl Copyright 2011 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg dnl This file is part of the GNU MP Library. 6 1.1.1.2 mrg dnl 7 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.2 mrg dnl it under the terms of either: 9 1.1.1.2 mrg dnl 10 1.1.1.2 mrg dnl * the GNU Lesser General Public License as published by the Free 11 1.1.1.2 mrg dnl Software Foundation; either version 3 of the License, or (at your 12 1.1.1.2 mrg dnl option) any later version. 13 1.1.1.2 mrg dnl 14 1.1.1.2 mrg dnl or 15 1.1.1.2 mrg dnl 16 1.1.1.2 mrg dnl * the GNU General Public License as published by the Free Software 17 1.1.1.2 mrg dnl Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.2 mrg dnl later version. 19 1.1.1.2 mrg dnl 20 1.1.1.2 mrg dnl or both in parallel, as here. 21 1.1.1.2 mrg dnl 22 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.2 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.2 mrg dnl for more details. 26 1.1.1.2 mrg dnl 27 1.1.1.2 mrg dnl You should have received copies of the GNU General Public License and the 28 1.1.1.2 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.2 mrg dnl see https://www.gnu.org/licenses/. 30 1.1 mrg 31 1.1 mrg include(`../config.m4') 32 1.1 mrg 33 1.1 mrg C cycles/limb 34 1.1 mrg C z900 ? 35 1.1 mrg C z990 23 36 1.1 mrg C z9 ? 37 1.1.1.2 mrg C z10 28 38 1.1 mrg C z196 ? 39 1.1 mrg 40 1.1 mrg C TODO 41 1.1 mrg C * Perhaps add special case for un <= 2. 42 1.1 mrg C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped 43 1.1 mrg C up by about 10%. 44 1.1 mrg 45 1.1 mrg C INPUT PARAMETERS 46 1.1 mrg define(`rp', `%r2') 47 1.1 mrg define(`up', `%r3') 48 1.1 mrg define(`un', `%r4') 49 1.1 mrg define(`vp', `%r5') 50 1.1 mrg define(`vn', `%r6') 51 1.1 mrg 52 1.1 mrg define(`zero', `%r8') 53 1.1 mrg 54 1.1 mrg ASM_START() 55 1.1 mrg PROLOGUE(mpn_mul_basecase) 56 1.1 mrg cghi un, 2 57 1.1 mrg jhe L(ge2) 58 1.1 mrg 59 1.1 mrg C un = vn = 1 60 1.1 mrg lg %r1, 0(vp) 61 1.1 mrg mlg %r0, 0(up) 62 1.1 mrg stg %r1, 0(rp) 63 1.1 mrg stg %r0, 8(rp) 64 1.1 mrg br %r14 65 1.1 mrg 66 1.1 mrg L(ge2): C jne L(gen) 67 1.1 mrg 68 1.1 mrg 69 1.1 mrg L(gen): 70 1.1 mrg C mul_1 ======================================================================= 71 1.1 mrg 72 1.1 mrg stmg %r6, %r12, 48(%r15) 73 1.1 mrg lghi zero, 0 74 1.1 mrg aghi un, -1 75 1.1 mrg 76 1.1 mrg lg %r7, 0(vp) 77 1.1 mrg lg %r11, 0(up) 78 1.1 mrg lghi %r12, 8 C init index register 79 1.1 mrg mlgr %r10, %r7 80 1.1 mrg lgr %r9, un 81 1.1 mrg stg %r11, 0(rp) 82 1.1 mrg cr %r15, %r15 C clear carry flag 83 1.1 mrg 84 1.1 mrg L(tm): lg %r1, 0(%r12,up) 85 1.1 mrg mlgr %r0, %r7 86 1.1 mrg alcgr %r1, %r10 87 1.1 mrg lgr %r10, %r0 C copy high part to carry limb 88 1.1 mrg stg %r1, 0(%r12,rp) 89 1.1 mrg la %r12, 8(%r12) 90 1.1 mrg brctg %r9, L(tm) 91 1.1 mrg 92 1.1 mrg alcgr %r0, zero 93 1.1 mrg stg %r0, 0(%r12,rp) 94 1.1 mrg 95 1.1 mrg C addmul_1 loop =============================================================== 96 1.1 mrg 97 1.1 mrg aghi vn, -1 98 1.1 mrg je L(outer_end) 99 1.1 mrg L(outer_loop): 100 1.1 mrg 101 1.1 mrg la rp, 8(rp) C rp += 1 102 1.1 mrg la vp, 8(vp) C up += 1 103 1.1 mrg lg %r7, 0(vp) 104 1.1 mrg lg %r11, 0(up) 105 1.1 mrg lghi %r12, 8 C init index register 106 1.1 mrg mlgr %r10, %r7 107 1.1 mrg lgr %r9, un 108 1.1 mrg alg %r11, 0(rp) 109 1.1 mrg stg %r11, 0(rp) 110 1.1 mrg 111 1.1 mrg L(tam): lg %r1, 0(%r12,up) 112 1.1 mrg lg %r11, 0(%r12,rp) 113 1.1 mrg mlgr %r0, %r7 114 1.1 mrg alcgr %r1, %r11 115 1.1 mrg alcgr %r0, zero 116 1.1 mrg algr %r1, %r10 117 1.1 mrg lgr %r10, %r0 118 1.1 mrg stg %r1, 0(%r12,rp) 119 1.1 mrg la %r12, 8(%r12) 120 1.1 mrg brctg %r9, L(tam) 121 1.1 mrg 122 1.1 mrg alcgr %r0, zero 123 1.1 mrg stg %r0, 0(%r12,rp) 124 1.1 mrg 125 1.1 mrg brctg vn, L(outer_loop) 126 1.1 mrg L(outer_end): 127 1.1 mrg 128 1.1 mrg lmg %r6, %r12, 48(%r15) 129 1.1 mrg br %r14 130 1.1 mrg EPILOGUE() 131