1 #include <machine/asm.h> 2 .text 3 .p2align 2 4 .globl sha256_block_data_order_zvkb_zvknha_or_zvknhb 5 .type sha256_block_data_order_zvkb_zvknha_or_zvknhb,@function 6 sha256_block_data_order_zvkb_zvknha_or_zvknhb: 7 .word 3439489111 8 9 la a3, K256 # Load round constants K256 10 .word 34006279 11 addi a3, a3, 16 12 .word 34006407 13 addi a3, a3, 16 14 .word 34006535 15 addi a3, a3, 16 16 .word 34006663 17 addi a3, a3, 16 18 .word 34006791 19 addi a3, a3, 16 20 .word 34006919 21 addi a3, a3, 16 22 .word 34007047 23 addi a3, a3, 16 24 .word 34007175 25 addi a3, a3, 16 26 .word 34007303 27 addi a3, a3, 16 28 .word 34007431 29 addi a3, a3, 16 30 .word 34007559 31 addi a3, a3, 16 32 .word 34007687 33 addi a3, a3, 16 34 .word 34007815 35 addi a3, a3, 16 36 .word 34007943 37 addi a3, a3, 16 38 .word 34008071 39 addi a3, a3, 16 40 .word 34008199 41 42 43 # H is stored as {a,b,c,d},{e,f,g,h}, but we need {f,e,b,a},{h,g,d,c} 44 # The dst vtype is e32m1 and the index vtype is e8mf4. 45 # We use index-load with the following index pattern at v26. 46 # i8 index: 47 # 20, 16, 4, 0 48 # Instead of setting the i8 index, we could use a single 32bit 49 # little-endian value to cover the 4xi8 index. 50 # i32 value: 51 # 0x 00 04 10 14 52 li t4, 0x00041014 53 .word 3439390807 54 .word 1578028375 55 56 addi t3, a0, 8 57 58 # Use index-load to get {f,e,b,a},{h,g,d,c} 59 .word 3439489111 60 .word 128254727 61 .word 128844679 62 63 # Setup v0 mask for the vmerge to replace the first word (idx==0) in key-scheduling. 64 # The AVL is 4 in SHA, so we could use a single e8(8 element masking) for masking. 65 .word 3422613591 66 .word 1577103447 67 68 .word 3439489111 69 70 L_round_loop: 71 # Decrement length by 1 72 add a2, a2, -1 73 74 # Keep the current state as we need it later: H' = H+{a',b',c',...,h'}. 75 .word 1577258839 76 .word 1577291735 77 78 # Load the 512-bits of the message block in v1-v4 and perform 79 # an endian swap on each 4 bytes element. 80 .word 33939591 81 .word 1242865879 82 add a1, a1, 16 83 .word 33939719 84 .word 1243914583 85 add a1, a1, 16 86 .word 33939847 87 .word 1244963287 88 add a1, a1, 16 89 .word 33939975 90 .word 1246011991 91 add a1, a1, 16 92 93 # Quad-round 0 (+0, Wt from oldest to newest in v1->v2->v3->v4) 94 .word 44073687 95 .word 3194135543 96 .word 3128075127 97 .word 1546715863 98 .word 3058835703 # Generate W[19:16] 99 100 # Quad-round 1 (+1, v2->v3->v4->v1) 101 .word 45155031 102 .word 3194135543 103 .word 3128075127 104 .word 1547797207 105 .word 3058737527 # Generate W[23:20] 106 107 # Quad-round 2 (+2, v3->v4->v1->v2) 108 .word 46236375 109 .word 3194135543 110 .word 3128075127 111 .word 1544684247 112 .word 3058770423 # Generate W[27:24] 113 114 # Quad-round 3 (+3, v4->v1->v2->v3) 115 .word 47317719 116 .word 3194135543 117 .word 3128075127 118 .word 1545634519 119 .word 3058803319 # Generate W[31:28] 120 121 # Quad-round 4 (+0, v1->v2->v3->v4) 122 .word 48267991 123 .word 3194135543 124 .word 3128075127 125 .word 1546715863 126 .word 3058835703 # Generate W[35:32] 127 128 # Quad-round 5 (+1, v2->v3->v4->v1) 129 .word 49349335 130 .word 3194135543 131 .word 3128075127 132 .word 1547797207 133 .word 3058737527 # Generate W[39:36] 134 135 # Quad-round 6 (+2, v3->v4->v1->v2) 136 .word 50430679 137 .word 3194135543 138 .word 3128075127 139 .word 1544684247 140 .word 3058770423 # Generate W[43:40] 141 142 # Quad-round 7 (+3, v4->v1->v2->v3) 143 .word 51512023 144 .word 3194135543 145 .word 3128075127 146 .word 1545634519 147 .word 3058803319 # Generate W[47:44] 148 149 # Quad-round 8 (+0, v1->v2->v3->v4) 150 .word 52462295 151 .word 3194135543 152 .word 3128075127 153 .word 1546715863 154 .word 3058835703 # Generate W[51:48] 155 156 # Quad-round 9 (+1, v2->v3->v4->v1) 157 .word 53543639 158 .word 3194135543 159 .word 3128075127 160 .word 1547797207 161 .word 3058737527 # Generate W[55:52] 162 163 # Quad-round 10 (+2, v3->v4->v1->v2) 164 .word 54624983 165 .word 3194135543 166 .word 3128075127 167 .word 1544684247 168 .word 3058770423 # Generate W[59:56] 169 170 # Quad-round 11 (+3, v4->v1->v2->v3) 171 .word 55706327 172 .word 3194135543 173 .word 3128075127 174 .word 1545634519 175 .word 3058803319 # Generate W[63:60] 176 177 # Quad-round 12 (+0, v1->v2->v3->v4) 178 # Note that we stop generating new message schedule words (Wt, v1-13) 179 # as we already generated all the words we end up consuming (i.e., W[63:60]). 180 .word 56656599 181 .word 3194135543 182 .word 3128075127 183 184 # Quad-round 13 (+1, v2->v3->v4->v1) 185 .word 57737943 186 .word 3194135543 187 .word 3128075127 188 189 # Quad-round 14 (+2, v3->v4->v1->v2) 190 .word 58819287 191 .word 3194135543 192 .word 3128075127 193 194 # Quad-round 15 (+3, v4->v1->v2->v3) 195 .word 59900631 196 .word 3194135543 197 .word 3128075127 198 199 # H' = H+{a',b',c',...,h'} 200 .word 65209175 201 .word 66290647 202 bnez a2, L_round_loop 203 204 # Store {f,e,b,a},{h,g,d,c} back to {a,b,c,d},{e,f,g,h}. 205 .word 128254759 206 .word 128844711 207 208 ret 209 .size sha256_block_data_order_zvkb_zvknha_or_zvknhb,.-sha256_block_data_order_zvkb_zvknha_or_zvknhb 210 211 .p2align 2 212 .type K256,@object 213 K256: 214 .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 215 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 216 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 217 .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 218 .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc 219 .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da 220 .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 221 .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 222 .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 223 .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 224 .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 225 .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 226 .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 227 .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 228 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 229 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 230 .size K256,.-K256 231