1 1.1 christos #ifdef __linux__ 2 1.1 christos #include <asm/regdef.h> 3 1.1 christos #else 4 1.1 christos #include <asm.h> 5 1.1 christos #include <regdef.h> 6 1.1 christos #endif 7 1.1 christos 8 1.1 christos .text 9 1.1 christos 10 1.1 christos .set noat 11 1.1 christos .set noreorder 12 1.1 christos 13 1.1 christos .globl bn_mul_mont 14 1.1 christos .align 5 15 1.1 christos .ent bn_mul_mont 16 1.1 christos bn_mul_mont: 17 1.1 christos lda sp,-48(sp) 18 1.1 christos stq ra,0(sp) 19 1.1 christos stq s3,8(sp) 20 1.1 christos stq s4,16(sp) 21 1.1 christos stq s5,24(sp) 22 1.1 christos stq fp,32(sp) 23 1.1 christos mov sp,fp 24 1.1 christos .mask 0x0400f000,-48 25 1.1 christos .frame fp,48,ra 26 1.1 christos .prologue 0 27 1.1 christos 28 1.1 christos .align 4 29 1.1 christos .set reorder 30 1.1 christos sextl a5,a5 31 1.1 christos mov 0,v0 32 1.1 christos cmplt a5,4,AT 33 1.1 christos bne AT,.Lexit 34 1.1 christos 35 1.1 christos ldq t1,0(a1) # ap[0] 36 1.1 christos s8addq a5,16,AT 37 1.1 christos ldq t4,8(a1) 38 1.1 christos subq sp,AT,sp 39 1.1 christos ldq t5,0(a2) # bp[0] 40 1.1 christos lda AT,-4096(zero) # mov -4096,AT 41 1.1 christos ldq a4,0(a4) 42 1.1 christos and sp,AT,sp 43 1.1 christos 44 1.1 christos mulq t1,t5,t0 45 1.1 christos ldq t3,0(a3) # np[0] 46 1.1 christos umulh t1,t5,t1 47 1.1 christos ldq t6,8(a3) 48 1.1 christos 49 1.1 christos mulq t0,a4,s5 50 1.1 christos 51 1.1 christos mulq t3,s5,t2 52 1.1 christos umulh t3,s5,t3 53 1.1 christos 54 1.1 christos addq t2,t0,t2 55 1.1 christos cmpult t2,t0,AT 56 1.1 christos addq t3,AT,t3 57 1.1 christos 58 1.1 christos mulq t4,t5,t8 59 1.1 christos mov 2,s4 60 1.1 christos umulh t4,t5,t9 61 1.1 christos mov sp,t7 62 1.1 christos 63 1.1 christos mulq t6,s5,t10 64 1.1 christos s8addq s4,a1,t4 65 1.1 christos umulh t6,s5,t11 66 1.1 christos s8addq s4,a3,t6 67 1.1 christos .align 4 68 1.1 christos .L1st: 69 1.1 christos .set noreorder 70 1.1 christos ldq t4,0(t4) 71 1.1 christos addl s4,1,s4 72 1.1 christos ldq t6,0(t6) 73 1.1 christos lda t7,8(t7) 74 1.1 christos 75 1.1 christos addq t8,t1,t0 76 1.1 christos mulq t4,t5,t8 77 1.1 christos cmpult t0,t1,AT 78 1.1 christos addq t10,t3,t2 79 1.1 christos 80 1.1 christos mulq t6,s5,t10 81 1.1 christos addq t9,AT,t1 82 1.1 christos cmpult t2,t3,v0 83 1.1 christos cmplt s4,a5,t12 84 1.1 christos 85 1.1 christos umulh t4,t5,t9 86 1.1 christos addq t11,v0,t3 87 1.1 christos addq t2,t0,t2 88 1.1 christos s8addq s4,a1,t4 89 1.1 christos 90 1.1 christos umulh t6,s5,t11 91 1.1 christos cmpult t2,t0,v0 92 1.1 christos addq t3,v0,t3 93 1.1 christos s8addq s4,a3,t6 94 1.1 christos 95 1.1 christos stq t2,-8(t7) 96 1.1 christos nop 97 1.1 christos unop 98 1.1 christos bne t12,.L1st 99 1.1 christos .set reorder 100 1.1 christos 101 1.1 christos addq t8,t1,t0 102 1.1 christos addq t10,t3,t2 103 1.1 christos cmpult t0,t1,AT 104 1.1 christos cmpult t2,t3,v0 105 1.1 christos addq t9,AT,t1 106 1.1 christos addq t11,v0,t3 107 1.1 christos 108 1.1 christos addq t2,t0,t2 109 1.1 christos cmpult t2,t0,v0 110 1.1 christos addq t3,v0,t3 111 1.1 christos 112 1.1 christos stq t2,0(t7) 113 1.1 christos 114 1.1 christos addq t3,t1,t3 115 1.1 christos cmpult t3,t1,AT 116 1.1 christos stq t3,8(t7) 117 1.1 christos stq AT,16(t7) 118 1.1 christos 119 1.1 christos mov 1,s3 120 1.1 christos .align 4 121 1.1 christos .Louter: 122 1.1 christos s8addq s3,a2,t5 123 1.1 christos ldq t1,0(a1) 124 1.1 christos ldq t4,8(a1) 125 1.1 christos ldq t5,0(t5) 126 1.1 christos ldq t3,0(a3) 127 1.1 christos ldq t6,8(a3) 128 1.1 christos ldq t12,0(sp) 129 1.1 christos 130 1.1 christos mulq t1,t5,t0 131 1.1 christos umulh t1,t5,t1 132 1.1 christos 133 1.1 christos addq t0,t12,t0 134 1.1 christos cmpult t0,t12,AT 135 1.1 christos addq t1,AT,t1 136 1.1 christos 137 1.1 christos mulq t0,a4,s5 138 1.1 christos 139 1.1 christos mulq t3,s5,t2 140 1.1 christos umulh t3,s5,t3 141 1.1 christos 142 1.1 christos addq t2,t0,t2 143 1.1 christos cmpult t2,t0,AT 144 1.1 christos mov 2,s4 145 1.1 christos addq t3,AT,t3 146 1.1 christos 147 1.1 christos mulq t4,t5,t8 148 1.1 christos mov sp,t7 149 1.1 christos umulh t4,t5,t9 150 1.1 christos 151 1.1 christos mulq t6,s5,t10 152 1.1 christos s8addq s4,a1,t4 153 1.1 christos umulh t6,s5,t11 154 1.1 christos .align 4 155 1.1 christos .Linner: 156 1.1 christos .set noreorder 157 1.1 christos ldq t12,8(t7) #L0 158 1.1 christos nop #U1 159 1.1 christos ldq t4,0(t4) #L1 160 1.1 christos s8addq s4,a3,t6 #U0 161 1.1 christos 162 1.1 christos ldq t6,0(t6) #L0 163 1.1 christos nop #U1 164 1.1 christos addq t8,t1,t0 #L1 165 1.1 christos lda t7,8(t7) 166 1.1 christos 167 1.1 christos mulq t4,t5,t8 #U1 168 1.1 christos cmpult t0,t1,AT #L0 169 1.1 christos addq t10,t3,t2 #L1 170 1.1 christos addl s4,1,s4 171 1.1 christos 172 1.1 christos mulq t6,s5,t10 #U1 173 1.1 christos addq t9,AT,t1 #L0 174 1.1 christos addq t0,t12,t0 #L1 175 1.1 christos cmpult t2,t3,v0 #U0 176 1.1 christos 177 1.1 christos umulh t4,t5,t9 #U1 178 1.1 christos cmpult t0,t12,AT #L0 179 1.1 christos addq t2,t0,t2 #L1 180 1.1 christos addq t11,v0,t3 #U0 181 1.1 christos 182 1.1 christos umulh t6,s5,t11 #U1 183 1.1 christos s8addq s4,a1,t4 #L0 184 1.1 christos cmpult t2,t0,v0 #L1 185 1.1 christos cmplt s4,a5,t12 #U0 # borrow t12 186 1.1 christos 187 1.1 christos addq t1,AT,t1 #L0 188 1.1 christos addq t3,v0,t3 #U1 189 1.1 christos stq t2,-8(t7) #L1 190 1.1 christos bne t12,.Linner #U0 191 1.1 christos .set reorder 192 1.1 christos 193 1.1 christos ldq t12,8(t7) 194 1.1 christos addq t8,t1,t0 195 1.1 christos addq t10,t3,t2 196 1.1 christos cmpult t0,t1,AT 197 1.1 christos cmpult t2,t3,v0 198 1.1 christos addq t9,AT,t1 199 1.1 christos addq t11,v0,t3 200 1.1 christos 201 1.1 christos addq t0,t12,t0 202 1.1 christos cmpult t0,t12,AT 203 1.1 christos addq t1,AT,t1 204 1.1 christos 205 1.1 christos ldq t12,16(t7) 206 1.1 christos addq t2,t0,s4 207 1.1 christos cmpult s4,t0,v0 208 1.1 christos addq t3,v0,t3 209 1.1 christos 210 1.1 christos addq t3,t1,t2 211 1.1 christos stq s4,0(t7) 212 1.1 christos cmpult t2,t1,t3 213 1.1 christos addq t2,t12,t2 214 1.1 christos cmpult t2,t12,AT 215 1.1 christos addl s3,1,s3 216 1.1 christos addq t3,AT,t3 217 1.1 christos stq t2,8(t7) 218 1.1 christos cmplt s3,a5,t12 # borrow t12 219 1.1 christos stq t3,16(t7) 220 1.1 christos bne t12,.Louter 221 1.1 christos 222 1.1 christos s8addq a5,sp,t12 # &tp[num] 224 1.1 christos mov a0,a2 # put rp aside 225 1.1 christos mov sp,t7 226 1.1 christos mov sp,a1 227 1.1 christos mov 0,t1 # clear borrow bit 228 1.1 christos 229 1.1 christos .align 4 230 1.1 christos .Lsub: ldq t0,0(t7) 231 1.1 christos ldq t2,0(a3) 232 1.1 christos lda t7,8(t7) 233 1.1 christos lda a3,8(a3) 234 1.1 christos subq t0,t2,t2 # tp[i]-np[i] 235 1.1 christos cmpult t0,t2,AT 236 1.1 christos subq t2,t1,t0 237 1.1 christos cmpult t2,t0,t1 238 1.1 christos or t1,AT,t1 239 1.1 christos stq t0,0(a0) 240 1.1 christos cmpult t7,t12,v0 241 1.1 christos lda a0,8(a0) 242 1.1 christos bne v0,.Lsub 243 1.1 christos 244 1.1 christos subq t3,t1,t1 # handle upmost overflow bit 245 1.1 christos mov sp,t7 246 1.1 christos mov a2,a0 # restore rp 247 1.1 christos 248 1.1 christos .align 4 249 1.1 christos .Lcopy: ldq t4,0(t7) # conditional copy 250 1.1 christos ldq t6,0(a0) 251 1.1 christos lda t7,8(t7) 252 1.1 christos lda a0,8(a0) 253 1.1 christos cmoveq t1,t6,t4 254 1.1 christos stq zero,-8(t7) # zap tp 255 1.1 christos cmpult t7,t12,AT 256 1.1 christos stq t4,-8(a0) 257 1.1 christos bne AT,.Lcopy 258 1.1 christos mov 1,v0 259 1.1 christos 260 1.1 christos .Lexit: 261 1.1 christos .set noreorder 262 1.1 christos mov fp,sp 263 1.1 christos /*ldq ra,0(sp)*/ 264 1.1 christos ldq s3,8(sp) 265 1.1 christos ldq s4,16(sp) 266 1.1 christos ldq s5,24(sp) 267 1.1 christos ldq fp,32(sp) 268 1.1 christos lda sp,48(sp) 269 1.1 christos ret (ra) 270 1.1 christos .end bn_mul_mont 271 1.1 christos .ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro (at) openssl.org>" 272 .align 2 273