Home | History | Annotate | Line # | Download | only in mips
      1 #include "mips_arch.h"
      2 
      3 .text
      4 
      5 .set	noat
      6 .set	noreorder
      7 
      8 .align	5
      9 .globl	bn_mul_mont
     10 .ent	bn_mul_mont
     11 bn_mul_mont:
     12 	slt	$1,$9,4
     13 	bnez	$1,1f
     14 	li	$2,0
     15 	slt	$1,$9,17	# on in-order CPU
     16 	bnez	$1,bn_mul_mont_internal
     17 	nop
     18 1:	jr	$31
     19 	li	$4,0
     20 .end	bn_mul_mont
     21 
     22 .align	5
     23 .ent	bn_mul_mont_internal
     24 bn_mul_mont_internal:
     25 	.frame	$30,14*8,$31
     26 	.mask	0x40000000|16711680,-8
     27 	dsubu $29,14*8
     28 	sd	$30,(14-1)*8($29)
     29 	sd	$23,(14-2)*8($29)
     30 	sd	$22,(14-3)*8($29)
     31 	sd	$21,(14-4)*8($29)
     32 	sd	$20,(14-5)*8($29)
     33 	sd	$19,(14-6)*8($29)
     34 	sd	$18,(14-7)*8($29)
     35 	sd	$17,(14-8)*8($29)
     36 	sd	$16,(14-9)*8($29)
     37 	move	$30,$29
     38 
     39 	.set	reorder
     40 	ld	$8,0($8)
     41 	ld	$13,0($6)	# bp[0]
     42 	ld	$12,0($5)	# ap[0]
     43 	ld	$14,0($7)	# np[0]
     44 
     45 	dsubu $29,2*8	# place for two extra words
     46 	sll	$9,3
     47 	li	$1,-4096
     48 	dsubu $29,$9
     49 	and	$29,$1
     50 
     51 	dmultu	($12,$13)
     52 	ld	$17,8($5)
     53 	ld	$19,8($7)
     54 	mflo	($10,$12,$13)
     55 	mfhi	($11,$12,$13)
     56 	dmultu	($10,$8)
     57 	mflo	($23,$10,$8)
     58 
     59 	dmultu	($17,$13)
     60 	mflo	($16,$17,$13)
     61 	mfhi	($17,$17,$13)
     62 
     63 	dmultu	($14,$23)
     64 	mflo	($24,$14,$23)
     65 	mfhi	($25,$14,$23)
     66 	dmultu	($19,$23)
     67 	daddu	$24,$10
     68 	sltu	$1,$24,$10
     69 	daddu	$25,$1
     70 	mflo	($18,$19,$23)
     71 	mfhi	($19,$19,$23)
     72 
     73 	move	$15,$29
     74 	li	$22,2*8
     75 .align	4
     76 .L1st:
     77 	.set	noreorder
     78 	daddu $12,$5,$22
     79 	daddu $14,$7,$22
     80 	ld	$12,($12)
     81 	ld	$14,($14)
     82 
     83 	dmultu	($12,$13)
     84 	daddu	$10,$16,$11
     85 	daddu	$24,$18,$25
     86 	sltu	$1,$10,$11
     87 	sltu	$2,$24,$25
     88 	daddu	$11,$17,$1
     89 	daddu	$25,$19,$2
     90 	mflo	($16,$12,$13)
     91 	mfhi	($17,$12,$13)
     92 
     93 	daddu	$24,$10
     94 	sltu	$1,$24,$10
     95 	dmultu	($14,$23)
     96 	daddu	$25,$1
     97 	addu	$22,8
     98 	sd	$24,($15)
     99 	sltu	$2,$22,$9
    100 	mflo	($18,$14,$23)
    101 	mfhi	($19,$14,$23)
    102 
    103 	bnez	$2,.L1st
    104 	daddu $15,8
    105 	.set	reorder
    106 
    107 	daddu	$10,$16,$11
    108 	sltu	$1,$10,$11
    109 	daddu	$11,$17,$1
    110 
    111 	daddu	$24,$18,$25
    112 	sltu	$2,$24,$25
    113 	daddu	$25,$19,$2
    114 	daddu	$24,$10
    115 	sltu	$1,$24,$10
    116 	daddu	$25,$1
    117 
    118 	sd	$24,($15)
    119 
    120 	daddu	$25,$11
    121 	sltu	$1,$25,$11
    122 	sd	$25,8($15)
    123 	sd	$1,2*8($15)
    124 
    125 	li	$21,8
    126 .align	4
    127 .Louter:
    128 	daddu $13,$6,$21
    129 	ld	$13,($13)
    130 	ld	$12,($5)
    131 	ld	$17,8($5)
    132 	ld	$20,($29)
    133 
    134 	dmultu	($12,$13)
    135 	ld	$14,($7)
    136 	ld	$19,8($7)
    137 	mflo	($10,$12,$13)
    138 	mfhi	($11,$12,$13)
    139 	daddu	$10,$20
    140 	dmultu	($10,$8)
    141 	sltu	$1,$10,$20
    142 	daddu	$11,$1
    143 	mflo	($23,$10,$8)
    144 
    145 	dmultu	($17,$13)
    146 	mflo	($16,$17,$13)
    147 	mfhi	($17,$17,$13)
    148 
    149 	dmultu	($14,$23)
    150 	mflo	($24,$14,$23)
    151 	mfhi	($25,$14,$23)
    152 
    153 	dmultu	($19,$23)
    154 	daddu	$24,$10
    155 	sltu	$1,$24,$10
    156 	daddu	$25,$1
    157 	mflo	($18,$19,$23)
    158 	mfhi	($19,$19,$23)
    159 
    160 	move	$15,$29
    161 	li	$22,2*8
    162 	ld	$20,8($15)
    163 .align	4
    164 .Linner:
    165 	.set	noreorder
    166 	daddu $12,$5,$22
    167 	daddu $14,$7,$22
    168 	ld	$12,($12)
    169 	ld	$14,($14)
    170 
    171 	dmultu	($12,$13)
    172 	daddu	$10,$16,$11
    173 	daddu	$24,$18,$25
    174 	sltu	$1,$10,$11
    175 	sltu	$2,$24,$25
    176 	daddu	$11,$17,$1
    177 	daddu	$25,$19,$2
    178 	mflo	($16,$12,$13)
    179 	mfhi	($17,$12,$13)
    180 
    181 	daddu	$10,$20
    182 	addu	$22,8
    183 	dmultu	($14,$23)
    184 	sltu	$1,$10,$20
    185 	daddu	$24,$10
    186 	daddu	$11,$1
    187 	sltu	$2,$24,$10
    188 	ld	$20,2*8($15)
    189 	daddu	$25,$2
    190 	sltu	$1,$22,$9
    191 	mflo	($18,$14,$23)
    192 	mfhi	($19,$14,$23)
    193 	sd	$24,($15)
    194 	bnez	$1,.Linner
    195 	daddu $15,8
    196 	.set	reorder
    197 
    198 	daddu	$10,$16,$11
    199 	sltu	$1,$10,$11
    200 	daddu	$11,$17,$1
    201 	daddu	$10,$20
    202 	sltu	$2,$10,$20
    203 	daddu	$11,$2
    204 
    205 	ld	$20,2*8($15)
    206 	daddu	$24,$18,$25
    207 	sltu	$1,$24,$25
    208 	daddu	$25,$19,$1
    209 	daddu	$24,$10
    210 	sltu	$2,$24,$10
    211 	daddu	$25,$2
    212 	sd	$24,($15)
    213 
    214 	daddu	$24,$25,$11
    215 	sltu	$25,$24,$11
    216 	daddu	$24,$20
    217 	sltu	$1,$24,$20
    218 	daddu	$25,$1
    219 	sd	$24,8($15)
    220 	sd	$25,2*8($15)
    221 
    222 	addu	$21,8
    223 	sltu	$2,$21,$9
    224 	bnez	$2,.Louter
    225 
    226 	.set	noreorder
    228 	daddu $20,$29,$9	# &tp[num]
    229 	move	$15,$29
    230 	move	$5,$29
    231 	li	$11,0		# clear borrow bit
    232 
    233 .align	4
    234 .Lsub:	ld	$10,($15)
    235 	ld	$24,($7)
    236 	daddu $15,8
    237 	daddu $7,8
    238 	dsubu	$24,$10,$24	# tp[i]-np[i]
    239 	sgtu	$1,$24,$10
    240 	dsubu	$10,$24,$11
    241 	sgtu	$11,$10,$24
    242 	sd	$10,($4)
    243 	or	$11,$1
    244 	sltu	$1,$15,$20
    245 	bnez	$1,.Lsub
    246 	daddu $4,8
    247 
    248 	dsubu	$11,$25,$11	# handle upmost overflow bit
    249 	move	$15,$29
    250 	dsubu $4,$9	# restore rp
    251 	not	$25,$11
    252 
    253 .Lcopy:	ld	$14,($15)	# conditional move
    254 	ld	$12,($4)
    255 	sd	$0,($15)
    256 	daddu $15,8
    257 	and	$14,$11
    258 	and	$12,$25
    259 	or	$12,$14
    260 	sltu	$1,$15,$20
    261 	sd	$12,($4)
    262 	bnez	$1,.Lcopy
    263 	daddu $4,8
    264 
    265 	li	$4,1
    266 	li	$2,1
    267 
    268 	.set	noreorder
    269 	move	$29,$30
    270 	ld	$30,(14-1)*8($29)
    271 	ld	$23,(14-2)*8($29)
    272 	ld	$22,(14-3)*8($29)
    273 	ld	$21,(14-4)*8($29)
    274 	ld	$20,(14-5)*8($29)
    275 	ld	$19,(14-6)*8($29)
    276 	ld	$18,(14-7)*8($29)
    277 	ld	$17,(14-8)*8($29)
    278 	ld	$16,(14-9)*8($29)
    279 	jr	$31
    280 	daddu $29,14*8
    281 .end	bn_mul_mont_internal
    282 .rdata
    283 .asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro (at) openssl.org>"
    284