lshiftc.asm revision 1.1 1 1.1 mrg dnl PowerPC-32 mpn_lshiftc.
2 1.1 mrg
3 1.1 mrg dnl Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
4 1.1 mrg dnl Foundation, Inc.
5 1.1 mrg
6 1.1 mrg dnl This file is part of the GNU MP Library.
7 1.1 mrg
8 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 1.1 mrg dnl it under the terms of the GNU Lesser General Public License as published
10 1.1 mrg dnl by the Free Software Foundation; either version 3 of the License, or (at
11 1.1 mrg dnl your option) any later version.
12 1.1 mrg
13 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 1.1 mrg dnl License for more details.
17 1.1 mrg
18 1.1 mrg dnl You should have received a copy of the GNU Lesser General Public License
19 1.1 mrg dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 1.1 mrg
21 1.1 mrg include(`../config.m4')
22 1.1 mrg
23 1.1 mrg C cycles/limb
24 1.1 mrg C 603e: ?
25 1.1 mrg C 604e: 3.0
26 1.1 mrg C 75x (G3): 3.0
27 1.1 mrg C 7400,7410 (G4): 3.0
28 1.1 mrg C 7445,7455 (G4+): 2.5
29 1.1 mrg C 7447,7457 (G4+): 2.25
30 1.1 mrg C power4/ppc970: 2.5
31 1.1 mrg C power5: 2.5
32 1.1 mrg
33 1.1 mrg C INPUT PARAMETERS
34 1.1 mrg C rp r3
35 1.1 mrg C up r4
36 1.1 mrg C n r5
37 1.1 mrg C cnt r6
38 1.1 mrg
39 1.1 mrg ASM_START()
40 1.1 mrg PROLOGUE(mpn_lshiftc)
41 1.1 mrg cmpwi cr0, r5, 30 C more than 30 limbs?
42 1.1 mrg slwi r0, r5, 2
43 1.1 mrg add r4, r4, r0 C make r4 point at end of s1
44 1.1 mrg add r7, r3, r0 C make r7 point at end of res
45 1.1 mrg bgt L(BIG) C branch if more than 12 limbs
46 1.1 mrg
47 1.1 mrg mtctr r5 C copy size into CTR
48 1.1 mrg subfic r8, r6, 32
49 1.1 mrg lwzu r11, -4(r4) C load first s1 limb
50 1.1 mrg srw r3, r11, r8 C compute function return value
51 1.1 mrg bdz L(end1)
52 1.1 mrg
53 1.1 mrg L(oop): lwzu r10, -4(r4)
54 1.1 mrg slw r9, r11, r6
55 1.1 mrg srw r12, r10, r8
56 1.1 mrg nor r9, r9, r12
57 1.1 mrg stwu r9, -4(r7)
58 1.1 mrg bdz L(end2)
59 1.1 mrg lwzu r11, -4(r4)
60 1.1 mrg slw r9, r10, r6
61 1.1 mrg srw r12, r11, r8
62 1.1 mrg nor r9, r9, r12
63 1.1 mrg stwu r9, -4(r7)
64 1.1 mrg bdnz L(oop)
65 1.1 mrg
66 1.1 mrg L(end1):
67 1.1 mrg slw r0, r11, r6
68 1.1 mrg nor r0, r0, r0
69 1.1 mrg stw r0, -4(r7)
70 1.1 mrg blr
71 1.1 mrg L(end2):
72 1.1 mrg slw r0, r10, r6
73 1.1 mrg nor r0, r0, r0
74 1.1 mrg stw r0, -4(r7)
75 1.1 mrg blr
76 1.1 mrg
77 1.1 mrg L(BIG):
78 1.1 mrg stmw r24, -32(r1) C save registers we are supposed to preserve
79 1.1 mrg lwzu r9, -4(r4)
80 1.1 mrg subfic r8, r6, 32
81 1.1 mrg srw r3, r9, r8 C compute function return value
82 1.1 mrg slw r0, r9, r6
83 1.1 mrg addi r5, r5, -1
84 1.1 mrg
85 1.1 mrg andi. r10, r5, 3 C count for spill loop
86 1.1 mrg beq L(e)
87 1.1 mrg mtctr r10
88 1.1 mrg lwzu r28, -4(r4)
89 1.1 mrg bdz L(xe0)
90 1.1 mrg
91 1.1 mrg L(loop0):
92 1.1 mrg slw r12, r28, r6
93 1.1 mrg srw r24, r28, r8
94 1.1 mrg lwzu r28, -4(r4)
95 1.1 mrg nor r24, r0, r24
96 1.1 mrg stwu r24, -4(r7)
97 1.1 mrg mr r0, r12
98 1.1 mrg bdnz L(loop0) C taken at most once!
99 1.1 mrg
100 1.1 mrg L(xe0): slw r12, r28, r6
101 1.1 mrg srw r24, r28, r8
102 1.1 mrg nor r24, r0, r24
103 1.1 mrg stwu r24, -4(r7)
104 1.1 mrg mr r0, r12
105 1.1 mrg
106 1.1 mrg L(e): srwi r5, r5, 2 C count for unrolled loop
107 1.1 mrg addi r5, r5, -1
108 1.1 mrg mtctr r5
109 1.1 mrg lwz r28, -4(r4)
110 1.1 mrg lwz r29, -8(r4)
111 1.1 mrg lwz r30, -12(r4)
112 1.1 mrg lwzu r31, -16(r4)
113 1.1 mrg
114 1.1 mrg L(loopU):
115 1.1 mrg slw r9, r28, r6
116 1.1 mrg srw r24, r28, r8
117 1.1 mrg lwz r28, -4(r4)
118 1.1 mrg slw r10, r29, r6
119 1.1 mrg srw r25, r29, r8
120 1.1 mrg lwz r29, -8(r4)
121 1.1 mrg slw r11, r30, r6
122 1.1 mrg srw r26, r30, r8
123 1.1 mrg lwz r30, -12(r4)
124 1.1 mrg slw r12, r31, r6
125 1.1 mrg srw r27, r31, r8
126 1.1 mrg lwzu r31, -16(r4)
127 1.1 mrg nor r24, r0, r24
128 1.1 mrg stw r24, -4(r7)
129 1.1 mrg nor r25, r9, r25
130 1.1 mrg stw r25, -8(r7)
131 1.1 mrg nor r26, r10, r26
132 1.1 mrg stw r26, -12(r7)
133 1.1 mrg nor r27, r11, r27
134 1.1 mrg stwu r27, -16(r7)
135 1.1 mrg mr r0, r12
136 1.1 mrg bdnz L(loopU)
137 1.1 mrg
138 1.1 mrg slw r9, r28, r6
139 1.1 mrg srw r24, r28, r8
140 1.1 mrg slw r10, r29, r6
141 1.1 mrg srw r25, r29, r8
142 1.1 mrg slw r11, r30, r6
143 1.1 mrg srw r26, r30, r8
144 1.1 mrg slw r12, r31, r6
145 1.1 mrg srw r27, r31, r8
146 1.1 mrg nor r24, r0, r24
147 1.1 mrg stw r24, -4(r7)
148 1.1 mrg nor r25, r9, r25
149 1.1 mrg stw r25, -8(r7)
150 1.1 mrg nor r26, r10, r26
151 1.1 mrg stw r26, -12(r7)
152 1.1 mrg nor r27, r11, r27
153 1.1 mrg stw r27, -16(r7)
154 1.1 mrg nor r12, r12, r12
155 1.1 mrg stw r12, -20(r7)
156 1.1 mrg lmw r24, -32(r1) C restore registers
157 1.1 mrg blr
158 1.1 mrg EPILOGUE()
159