Home | History | Annotate | Line # | Download | only in asm
      1 #! /usr/bin/env perl
      2 # Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
      3 #
      4 # Licensed under the OpenSSL license (the "License").  You may not use
      5 # this file except in compliance with the License.  You can obtain a copy
      6 # in the file LICENSE in the source distribution or at
      7 # https://www.openssl.org/source/license.html
      8 
      9 #
     10 # ====================================================================
     11 # Written by Andy Polyakov <appro (at] openssl.org> for the OpenSSL
     12 # project. The module is, however, dual licensed under OpenSSL and
     13 # CRYPTOGAMS licenses depending on where you obtain it. For further
     14 # details see http://www.openssl.org/~appro/cryptogams/.
     15 # ====================================================================
     16 #
     17 # SHA512 for C64x+.
     18 #
     19 # January 2012
     20 #
     21 # Performance is 19 cycles per processed byte. Compared to block
     22 # transform function from sha512.c compiled with cl6x with -mv6400+
     23 # -o2 -DOPENSSL_SMALL_FOOTPRINT it's almost 7x faster and 2x smaller.
     24 # Loop unroll won't make it, this implementation, any faster, because
     25 # it's effectively dominated by SHRU||SHL pairs and you can't schedule
     26 # more of them.
     27 #
     28 # !!! Note that this module uses AMR, which means that all interrupt
     29 # service routines are expected to preserve it and for own well-being
     30 # zero it upon entry.
     31 
     32 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
     33 open STDOUT,">$output";
     34 
     35 ($CTXA,$INP,$NUM) = ("A4","B4","A6");            # arguments
     36  $K512="A3";
     37 
     38 ($Ahi,$Actxhi,$Bhi,$Bctxhi,$Chi,$Cctxhi,$Dhi,$Dctxhi,
     39  $Ehi,$Ectxhi,$Fhi,$Fctxhi,$Ghi,$Gctxhi,$Hhi,$Hctxhi)=map("A$_",(16..31));
     40 ($Alo,$Actxlo,$Blo,$Bctxlo,$Clo,$Cctxlo,$Dlo,$Dctxlo,
     41  $Elo,$Ectxlo,$Flo,$Fctxlo,$Glo,$Gctxlo,$Hlo,$Hctxlo)=map("B$_",(16..31));
     42 
     43 ($S1hi,$CHhi,$S0hi,$t0hi)=map("A$_",(10..13));
     44 ($S1lo,$CHlo,$S0lo,$t0lo)=map("B$_",(10..13));
     45 ($T1hi,         $T2hi)=         ("A6","A7");
     46 ($T1lo,$T1carry,$T2lo,$T2carry)=("B6","B7","B8","B9");
     47 ($Khi,$Klo)=("A9","A8");
     48 ($MAJhi,$MAJlo)=($T2hi,$T2lo);
     49 ($t1hi,$t1lo)=($Khi,"B2");
     50  $CTXB=$t1lo;
     51 
     52 ($Xihi,$Xilo)=("A5","B5");			# circular/ring buffer
     53 
     54 $code.=<<___;
     55 	.text
     56 
     57 	.if	.ASSEMBLER_VERSION<7000000
     58 	.asg	0,__TI_EABI__
     59 	.endif
     60 	.if	__TI_EABI__
     61 	.nocmp
     62 	.asg	sha512_block_data_order,_sha512_block_data_order
     63 	.endif
     64 
     65 	.asg	B3,RA
     66 	.asg	A15,FP
     67 	.asg	B15,SP
     68 
     69 	.if	.BIG_ENDIAN
     70 	.asg	$Khi,KHI
     71 	.asg	$Klo,KLO
     72 	.else
     73 	.asg	$Khi,KLO
     74 	.asg	$Klo,KHI
     75 	.endif
     76 
     77 	.global	_sha512_block_data_order
     78 _sha512_block_data_order:
     79 __sha512_block:
     80 	.asmfunc stack_usage(40+128)
     81 	MV	$NUM,A0				; reassign $NUM
     82 ||	MVK	-128,B0
     83   [!A0]	BNOP	RA				; if ($NUM==0) return;
     84 || [A0]	STW	FP,*SP--(40)			; save frame pointer
     85 || [A0]	MV	SP,FP
     86    [A0]	STDW	B13:B12,*SP[4]
     87 || [A0]	MVK	0x00404,B1
     88    [A0]	STDW	B11:B10,*SP[3]
     89 || [A0]	STDW	A13:A12,*FP[-3]
     90 || [A0]	MVKH	0x60000,B1
     91    [A0]	STDW	A11:A10,*SP[1]
     92 || [A0]	MVC	B1,AMR				; setup circular addressing
     93 || [A0]	ADD	B0,SP,SP			; alloca(128)
     94 	.if	__TI_EABI__
     95    [A0]	AND	B0,SP,SP			; align stack at 128 bytes
     96 || [A0]	ADDKPC	__sha512_block,B1
     97 || [A0]	MVKL	\$PCR_OFFSET(K512,__sha512_block),$K512
     98    [A0]	MVKH	\$PCR_OFFSET(K512,__sha512_block),$K512
     99 || [A0]	SUBAW	SP,2,SP				; reserve two words above buffer
    100 	.else
    101    [A0]	AND	B0,SP,SP			; align stack at 128 bytes
    102 || [A0]	ADDKPC	__sha512_block,B1
    103 || [A0]	MVKL	(K512-__sha512_block),$K512
    104    [A0]	MVKH	(K512-__sha512_block),$K512
    105 || [A0]	SUBAW	SP,2,SP				; reserve two words above buffer
    106 	.endif
    107 	ADDAW	SP,3,$Xilo
    108 	ADDAW	SP,2,$Xihi
    109 
    110 ||	MV	$CTXA,$CTXB
    111 	LDW	*${CTXA}[0^.LITTLE_ENDIAN],$Ahi	; load ctx
    112 ||	LDW	*${CTXB}[1^.LITTLE_ENDIAN],$Alo
    113 ||	ADD	B1,$K512,$K512
    114 	LDW	*${CTXA}[2^.LITTLE_ENDIAN],$Bhi
    115 ||	LDW	*${CTXB}[3^.LITTLE_ENDIAN],$Blo
    116 	LDW	*${CTXA}[4^.LITTLE_ENDIAN],$Chi
    117 ||	LDW	*${CTXB}[5^.LITTLE_ENDIAN],$Clo
    118 	LDW	*${CTXA}[6^.LITTLE_ENDIAN],$Dhi
    119 ||	LDW	*${CTXB}[7^.LITTLE_ENDIAN],$Dlo
    120 	LDW	*${CTXA}[8^.LITTLE_ENDIAN],$Ehi
    121 ||	LDW	*${CTXB}[9^.LITTLE_ENDIAN],$Elo
    122 	LDW	*${CTXA}[10^.LITTLE_ENDIAN],$Fhi
    123 ||	LDW	*${CTXB}[11^.LITTLE_ENDIAN],$Flo
    124 	LDW	*${CTXA}[12^.LITTLE_ENDIAN],$Ghi
    125 ||	LDW	*${CTXB}[13^.LITTLE_ENDIAN],$Glo
    126 	LDW	*${CTXA}[14^.LITTLE_ENDIAN],$Hhi
    127 ||	LDW	*${CTXB}[15^.LITTLE_ENDIAN],$Hlo
    128 
    129 	LDNDW	*$INP++,B11:B10			; pre-fetch input
    130 	LDDW	*$K512++,$Khi:$Klo		; pre-fetch K512[0]
    131 outerloop?:
    132 	MVK	15,B0				; loop counters
    133 ||	MVK	64,B1
    134 ||	SUB	A0,1,A0
    135 	MV	$Ahi,$Actxhi
    136 ||	MV	$Alo,$Actxlo
    137 ||	MV	$Bhi,$Bctxhi
    138 ||	MV	$Blo,$Bctxlo
    139 ||	MV	$Chi,$Cctxhi
    140 ||	MV	$Clo,$Cctxlo
    141 ||	MVD	$Dhi,$Dctxhi
    142 ||	MVD	$Dlo,$Dctxlo
    143 	MV	$Ehi,$Ectxhi
    144 ||	MV	$Elo,$Ectxlo
    145 ||	MV	$Fhi,$Fctxhi
    146 ||	MV	$Flo,$Fctxlo
    147 ||	MV	$Ghi,$Gctxhi
    148 ||	MV	$Glo,$Gctxlo
    149 ||	MVD	$Hhi,$Hctxhi
    150 ||	MVD	$Hlo,$Hctxlo
    151 loop0_15?:
    152 	.if	.BIG_ENDIAN
    153 	MV	B11,$T1hi
    154 ||	MV	B10,$T1lo
    155 	.else
    156 	SWAP4	B10,$T1hi
    157 ||	SWAP4	B11,$T1lo
    158 	SWAP2	$T1hi,$T1hi
    159 ||	SWAP2	$T1lo,$T1lo
    160 	.endif
    161 loop16_79?:
    162 	STW	$T1hi,*$Xihi++[2]
    163 ||	STW	$T1lo,*$Xilo++[2]			; X[i] = T1
    164 ||	ADD	$Hhi,$T1hi,$T1hi
    165 ||	ADDU	$Hlo,$T1lo,$T1carry:$T1lo		; T1 += h
    166 ||	SHRU	$Ehi,14,$S1hi
    167 ||	SHL	$Ehi,32-14,$S1lo
    168 	XOR	$Fhi,$Ghi,$CHhi
    169 ||	XOR	$Flo,$Glo,$CHlo
    170 ||	ADD	KHI,$T1hi,$T1hi
    171 ||	ADDU	KLO,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += K512[i]
    172 ||	SHRU	$Elo,14,$t0lo
    173 ||	SHL	$Elo,32-14,$t0hi
    174 	XOR	$t0hi,$S1hi,$S1hi
    175 ||	XOR	$t0lo,$S1lo,$S1lo
    176 ||	AND	$Ehi,$CHhi,$CHhi
    177 ||	AND	$Elo,$CHlo,$CHlo
    178 ||	ROTL	$Ghi,0,$Hhi
    179 ||	ROTL	$Glo,0,$Hlo				; h = g
    180 ||	SHRU	$Ehi,18,$t0hi
    181 ||	SHL	$Ehi,32-18,$t0lo
    182 	XOR	$t0hi,$S1hi,$S1hi
    183 ||	XOR	$t0lo,$S1lo,$S1lo
    184 ||	XOR	$Ghi,$CHhi,$CHhi
    185 ||	XOR	$Glo,$CHlo,$CHlo			; Ch(e,f,g) = ((f^g)&e)^g
    186 ||	ROTL	$Fhi,0,$Ghi
    187 ||	ROTL	$Flo,0,$Glo				; g = f
    188 ||	SHRU	$Elo,18,$t0lo
    189 ||	SHL	$Elo,32-18,$t0hi
    190 	XOR	$t0hi,$S1hi,$S1hi
    191 ||	XOR	$t0lo,$S1lo,$S1lo
    192 ||	OR	$Ahi,$Bhi,$MAJhi
    193 ||	OR	$Alo,$Blo,$MAJlo
    194 ||	ROTL	$Ehi,0,$Fhi
    195 ||	ROTL	$Elo,0,$Flo				; f = e
    196 ||	SHRU	$Ehi,41-32,$t0lo
    197 ||	SHL	$Ehi,64-41,$t0hi
    198 	XOR	$t0hi,$S1hi,$S1hi
    199 ||	XOR	$t0lo,$S1lo,$S1lo
    200 ||	AND	$Chi,$MAJhi,$MAJhi
    201 ||	AND	$Clo,$MAJlo,$MAJlo
    202 ||	ROTL	$Dhi,0,$Ehi
    203 ||	ROTL	$Dlo,0,$Elo				; e = d
    204 ||	SHRU	$Elo,41-32,$t0hi
    205 ||	SHL	$Elo,64-41,$t0lo
    206 	XOR	$t0hi,$S1hi,$S1hi
    207 ||	XOR	$t0lo,$S1lo,$S1lo			; Sigma1(e)
    208 ||	AND	$Ahi,$Bhi,$t1hi
    209 ||	AND	$Alo,$Blo,$t1lo
    210 ||	ROTL	$Chi,0,$Dhi
    211 ||	ROTL	$Clo,0,$Dlo				; d = c
    212 ||	SHRU	$Ahi,28,$S0hi
    213 ||	SHL	$Ahi,32-28,$S0lo
    214 	OR	$t1hi,$MAJhi,$MAJhi
    215 ||	OR	$t1lo,$MAJlo,$MAJlo			; Maj(a,b,c) = ((a|b)&c)|(a&b)
    216 ||	ADD	$CHhi,$T1hi,$T1hi
    217 ||	ADDU	$CHlo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += Ch(e,f,g)
    218 ||	ROTL	$Bhi,0,$Chi
    219 ||	ROTL	$Blo,0,$Clo				; c = b
    220 ||	SHRU	$Alo,28,$t0lo
    221 ||	SHL	$Alo,32-28,$t0hi
    222 	XOR	$t0hi,$S0hi,$S0hi
    223 ||	XOR	$t0lo,$S0lo,$S0lo
    224 ||	ADD	$S1hi,$T1hi,$T1hi
    225 ||	ADDU	$S1lo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += Sigma1(e)
    226 ||	ROTL	$Ahi,0,$Bhi
    227 ||	ROTL	$Alo,0,$Blo				; b = a
    228 ||	SHRU	$Ahi,34-32,$t0lo
    229 ||	SHL	$Ahi,64-34,$t0hi
    230 	XOR	$t0hi,$S0hi,$S0hi
    231 ||	XOR	$t0lo,$S0lo,$S0lo
    232 ||	ADD	$MAJhi,$T1hi,$T2hi
    233 ||	ADDU	$MAJlo,$T1carry:$T1lo,$T2carry:$T2lo	; T2 = T1+Maj(a,b,c)
    234 ||	SHRU	$Alo,34-32,$t0hi
    235 ||	SHL	$Alo,64-34,$t0lo
    236 	XOR	$t0hi,$S0hi,$S0hi
    237 ||	XOR	$t0lo,$S0lo,$S0lo
    238 ||	ADD	$Ehi,$T1hi,$T1hi
    239 ||	ADDU	$Elo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += e
    240 || [B0]	BNOP	loop0_15?
    241 ||	SHRU	$Ahi,39-32,$t0lo
    242 ||	SHL	$Ahi,64-39,$t0hi
    243 	XOR	$t0hi,$S0hi,$S0hi
    244 ||	XOR	$t0lo,$S0lo,$S0lo
    245 || [B0]	LDNDW	*$INP++,B11:B10				; pre-fetch input
    246 ||[!B1]	BNOP	break?
    247 ||	SHRU	$Alo,39-32,$t0hi
    248 ||	SHL	$Alo,64-39,$t0lo
    249 	XOR	$t0hi,$S0hi,$S0hi
    250 ||	XOR	$t0lo,$S0lo,$S0lo			; Sigma0(a)
    251 ||	ADD	$T1carry,$T1hi,$Ehi
    252 ||	MV	$T1lo,$Elo				; e = T1
    253 ||[!B0]	LDW	*${Xihi}[28],$T1hi
    254 ||[!B0]	LDW	*${Xilo}[28],$T1lo			; X[i+14]
    255 	ADD	$S0hi,$T2hi,$T2hi
    256 ||	ADDU	$S0lo,$T2carry:$T2lo,$T2carry:$T2lo	; T2 += Sigma0(a)
    257 || [B1]	LDDW	*$K512++,$Khi:$Klo			; pre-fetch K512[i]
    258 	NOP						; avoid cross-path stall
    259 	ADD	$T2carry,$T2hi,$Ahi
    260 ||	MV	$T2lo,$Alo				; a = T2
    261 || [B0]	SUB	B0,1,B0
    262 ;;===== branch to loop00_15? is taken here
    263 	NOP
    264 ;;===== branch to break? is taken here
    265 	LDW	*${Xihi}[2],$T2hi
    266 ||	LDW	*${Xilo}[2],$T2lo			; X[i+1]
    267 ||	SHRU	$T1hi,19,$S1hi
    268 ||	SHL	$T1hi,32-19,$S1lo
    269 	SHRU	$T1lo,19,$t0lo
    270 ||	SHL	$T1lo,32-19,$t0hi
    271 	XOR	$t0hi,$S1hi,$S1hi
    272 ||	XOR	$t0lo,$S1lo,$S1lo
    273 ||	SHRU	$T1hi,61-32,$t0lo
    274 ||	SHL	$T1hi,64-61,$t0hi
    275 	XOR	$t0hi,$S1hi,$S1hi
    276 ||	XOR	$t0lo,$S1lo,$S1lo
    277 ||	SHRU	$T1lo,61-32,$t0hi
    278 ||	SHL	$T1lo,64-61,$t0lo
    279 	XOR	$t0hi,$S1hi,$S1hi
    280 ||	XOR	$t0lo,$S1lo,$S1lo
    281 ||	SHRU	$T1hi,6,$t0hi
    282 ||	SHL	$T1hi,32-6,$t0lo
    283 	XOR	$t0hi,$S1hi,$S1hi
    284 ||	XOR	$t0lo,$S1lo,$S1lo
    285 ||	SHRU	$T1lo,6,$t0lo
    286 ||	LDW	*${Xihi}[18],$T1hi
    287 ||	LDW	*${Xilo}[18],$T1lo			; X[i+9]
    288 	XOR	$t0lo,$S1lo,$S1lo			; sigma1(Xi[i+14])
    289 
    290 ||	LDW	*${Xihi}[0],$CHhi
    291 ||	LDW	*${Xilo}[0],$CHlo			; X[i]
    292 ||	SHRU	$T2hi,1,$S0hi
    293 ||	SHL	$T2hi,32-1,$S0lo
    294 	SHRU	$T2lo,1,$t0lo
    295 ||	SHL	$T2lo,32-1,$t0hi
    296 	XOR	$t0hi,$S0hi,$S0hi
    297 ||	XOR	$t0lo,$S0lo,$S0lo
    298 ||	SHRU	$T2hi,8,$t0hi
    299 ||	SHL	$T2hi,32-8,$t0lo
    300 	XOR	$t0hi,$S0hi,$S0hi
    301 ||	XOR	$t0lo,$S0lo,$S0lo
    302 ||	SHRU	$T2lo,8,$t0lo
    303 ||	SHL	$T2lo,32-8,$t0hi
    304 	XOR	$t0hi,$S0hi,$S0hi
    305 ||	XOR	$t0lo,$S0lo,$S0lo
    306 ||	ADD	$S1hi,$T1hi,$T1hi
    307 ||	ADDU	$S1lo,$T1lo,$T1carry:$T1lo		; T1 = X[i+9]+sigma1()
    308 || [B1]	BNOP	loop16_79?
    309 ||	SHRU	$T2hi,7,$t0hi
    310 ||	SHL	$T2hi,32-7,$t0lo
    311 	XOR	$t0hi,$S0hi,$S0hi
    312 ||	XOR	$t0lo,$S0lo,$S0lo
    313 ||	ADD	$CHhi,$T1hi,$T1hi
    314 ||	ADDU	$CHlo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += X[i]
    315 ||	SHRU	$T2lo,7,$t0lo
    316 	XOR	$t0lo,$S0lo,$S0lo			; sigma0(Xi[i+1]
    317 
    318 	ADD	$S0hi,$T1hi,$T1hi
    319 ||	ADDU	$S0lo,$T1carry:$T1lo,$T1carry:$T1lo	; T1 += sigma0()
    320 || [B1]	SUB	B1,1,B1
    321 	NOP						; avoid cross-path stall
    322 	ADD	$T1carry,$T1hi,$T1hi
    323 ;;===== branch to loop16_79? is taken here
    324 
    325 break?:
    326 	ADD	$Ahi,$Actxhi,$Ahi		; accumulate ctx
    327 ||	ADDU	$Alo,$Actxlo,$Actxlo:$Alo
    328 || [A0]	LDNDW	*$INP++,B11:B10			; pre-fetch input
    329 || [A0]	ADDK	-640,$K512			; rewind pointer to K512
    330 	ADD	$Bhi,$Bctxhi,$Bhi
    331 ||	ADDU	$Blo,$Bctxlo,$Bctxlo:$Blo
    332 || [A0]	LDDW	*$K512++,$Khi:$Klo		; pre-fetch K512[0]
    333 	ADD	$Chi,$Cctxhi,$Chi
    334 ||	ADDU	$Clo,$Cctxlo,$Cctxlo:$Clo
    335 ||	ADD	$Actxlo,$Ahi,$Ahi
    336 ||[!A0]	MV	$CTXA,$CTXB
    337 	ADD	$Dhi,$Dctxhi,$Dhi
    338 ||	ADDU	$Dlo,$Dctxlo,$Dctxlo:$Dlo
    339 ||	ADD	$Bctxlo,$Bhi,$Bhi
    340 ||[!A0]	STW	$Ahi,*${CTXA}[0^.LITTLE_ENDIAN]	; save ctx
    341 ||[!A0]	STW	$Alo,*${CTXB}[1^.LITTLE_ENDIAN]
    342 	ADD	$Ehi,$Ectxhi,$Ehi
    343 ||	ADDU	$Elo,$Ectxlo,$Ectxlo:$Elo
    344 ||	ADD	$Cctxlo,$Chi,$Chi
    345 || [A0]	BNOP	outerloop?
    346 ||[!A0]	STW	$Bhi,*${CTXA}[2^.LITTLE_ENDIAN]
    347 ||[!A0]	STW	$Blo,*${CTXB}[3^.LITTLE_ENDIAN]
    348 	ADD	$Fhi,$Fctxhi,$Fhi
    349 ||	ADDU	$Flo,$Fctxlo,$Fctxlo:$Flo
    350 ||	ADD	$Dctxlo,$Dhi,$Dhi
    351 ||[!A0]	STW	$Chi,*${CTXA}[4^.LITTLE_ENDIAN]
    352 ||[!A0]	STW	$Clo,*${CTXB}[5^.LITTLE_ENDIAN]
    353 	ADD	$Ghi,$Gctxhi,$Ghi
    354 ||	ADDU	$Glo,$Gctxlo,$Gctxlo:$Glo
    355 ||	ADD	$Ectxlo,$Ehi,$Ehi
    356 ||[!A0]	STW	$Dhi,*${CTXA}[6^.LITTLE_ENDIAN]
    357 ||[!A0]	STW	$Dlo,*${CTXB}[7^.LITTLE_ENDIAN]
    358 	ADD	$Hhi,$Hctxhi,$Hhi
    359 ||	ADDU	$Hlo,$Hctxlo,$Hctxlo:$Hlo
    360 ||	ADD	$Fctxlo,$Fhi,$Fhi
    361 ||[!A0]	STW	$Ehi,*${CTXA}[8^.LITTLE_ENDIAN]
    362 ||[!A0]	STW	$Elo,*${CTXB}[9^.LITTLE_ENDIAN]
    363 	ADD	$Gctxlo,$Ghi,$Ghi
    364 ||[!A0]	STW	$Fhi,*${CTXA}[10^.LITTLE_ENDIAN]
    365 ||[!A0]	STW	$Flo,*${CTXB}[11^.LITTLE_ENDIAN]
    366 	ADD	$Hctxlo,$Hhi,$Hhi
    367 ||[!A0]	STW	$Ghi,*${CTXA}[12^.LITTLE_ENDIAN]
    368 ||[!A0]	STW	$Glo,*${CTXB}[13^.LITTLE_ENDIAN]
    369 ;;===== branch to outerloop? is taken here
    370 
    371 	STW	$Hhi,*${CTXA}[14^.LITTLE_ENDIAN]
    372 ||	STW	$Hlo,*${CTXB}[15^.LITTLE_ENDIAN]
    373 ||	MVK	-40,B0
    374 	ADD	FP,B0,SP			; destroy circular buffer
    375 ||	LDDW	*FP[-4],A11:A10
    376 	LDDW	*SP[2],A13:A12
    377 ||	LDDW	*FP[-2],B11:B10
    378 	LDDW	*SP[4],B13:B12
    379 ||	BNOP	RA
    380 	LDW	*++SP(40),FP			; restore frame pointer
    381 	MVK	0,B0
    382 	MVC	B0,AMR				; clear AMR
    383 	NOP	2				; wait till FP is committed
    384 	.endasmfunc
    385 
    386 	.if	__TI_EABI__
    387 	.sect	".text:sha_asm.const"
    388 	.else
    389 	.sect	".const:sha_asm"
    390 	.endif
    391 	.align	128
    392 K512:
    393 	.uword	0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
    394 	.uword	0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
    395 	.uword	0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
    396 	.uword	0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
    397 	.uword	0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
    398 	.uword	0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
    399 	.uword	0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
    400 	.uword	0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
    401 	.uword	0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
    402 	.uword	0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
    403 	.uword	0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
    404 	.uword	0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
    405 	.uword	0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
    406 	.uword	0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
    407 	.uword	0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
    408 	.uword	0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
    409 	.uword	0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
    410 	.uword	0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
    411 	.uword	0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
    412 	.uword	0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
    413 	.uword	0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
    414 	.uword	0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
    415 	.uword	0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
    416 	.uword	0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
    417 	.uword	0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
    418 	.uword	0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
    419 	.uword	0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
    420 	.uword	0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
    421 	.uword	0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
    422 	.uword	0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
    423 	.uword	0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
    424 	.uword	0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
    425 	.uword	0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
    426 	.uword	0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
    427 	.uword	0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
    428 	.uword	0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
    429 	.uword	0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
    430 	.uword	0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
    431 	.uword	0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
    432 	.uword	0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
    433 	.cstring "SHA512 block transform for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
    434 	.align	4
    435 ___
    436 
    437 print $code;
    438 close STDOUT or die "error closing STDOUT: $!";
    439