1 1.11 riastrad /* $NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /*- 4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 1.1 riastrad * All rights reserved. 6 1.1 riastrad * 7 1.1 riastrad * Redistribution and use in source and binary forms, with or without 8 1.1 riastrad * modification, are permitted provided that the following conditions 9 1.1 riastrad * are met: 10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright 11 1.1 riastrad * notice, this list of conditions and the following disclaimer. 12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the 14 1.1 riastrad * documentation and/or other materials provided with the distribution. 15 1.1 riastrad * 16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE. 27 1.1 riastrad */ 28 1.1 riastrad 29 1.1 riastrad #include <arm/asm.h> 30 1.1 riastrad 31 1.11 riastrad RCSID("$NetBSD: aes_neon_32.S,v 1.11 2020/09/10 11:31:03 riastradh Exp $") 32 1.4 riastrad 33 1.1 riastrad .fpu neon 34 1.1 riastrad 35 1.2 riastrad .text 36 1.2 riastrad .p2align 2 37 1.2 riastrad .Lconstants_addr: 38 1.2 riastrad .long .Lconstants - . 39 1.2 riastrad 40 1.1 riastrad .section .rodata 41 1.7 riastrad .p2align 5 42 1.2 riastrad .Lconstants: 43 1.1 riastrad 44 1.7 riastrad .Linv_inva: /* inv and inva must be consecutive */ 45 1.1 riastrad .type inv,_ASM_TYPE_OBJECT 46 1.1 riastrad inv: 47 1.1 riastrad .byte 0x80,0x01,0x08,0x0D,0x0F,0x06,0x05,0x0E 48 1.1 riastrad .byte 0x02,0x0C,0x0B,0x0A,0x09,0x03,0x07,0x04 49 1.1 riastrad END(inv) 50 1.1 riastrad 51 1.1 riastrad .type inva,_ASM_TYPE_OBJECT 52 1.1 riastrad inva: 53 1.1 riastrad .byte 0x80,0x07,0x0B,0x0F,0x06,0x0A,0x04,0x01 54 1.1 riastrad .byte 0x09,0x08,0x05,0x02,0x0C,0x0E,0x0D,0x03 55 1.1 riastrad END(inva) 56 1.1 riastrad 57 1.11 riastrad .type mc,_ASM_TYPE_OBJECT 58 1.11 riastrad mc: 59 1.11 riastrad .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 /* 0 forward */ 60 1.1 riastrad .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C 61 1.11 riastrad .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 /* 0 backward */ 62 1.11 riastrad .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E 63 1.11 riastrad .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 /* 1 forward */ 64 1.1 riastrad .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 65 1.11 riastrad .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 /* 1 backward */ 66 1.11 riastrad .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A 67 1.11 riastrad .byte 0x09,0x0A,0x0B,0x08,0x0D,0x0E,0x0F,0x0C /* 2 forward */ 68 1.1 riastrad .byte 0x01,0x02,0x03,0x00,0x05,0x06,0x07,0x04 69 1.11 riastrad .byte 0x0B,0x08,0x09,0x0A,0x0F,0x0C,0x0D,0x0E /* 2 backward */ 70 1.11 riastrad .byte 0x03,0x00,0x01,0x02,0x07,0x04,0x05,0x06 71 1.1 riastrad .Lmc_forward_3: 72 1.11 riastrad .byte 0x0D,0x0E,0x0F,0x0C,0x01,0x02,0x03,0x00 /* 3 forward */ 73 1.1 riastrad .byte 0x05,0x06,0x07,0x04,0x09,0x0A,0x0B,0x08 74 1.11 riastrad .byte 0x07,0x04,0x05,0x06,0x0B,0x08,0x09,0x0A /* 3 backward */ 75 1.1 riastrad .byte 0x0F,0x0C,0x0D,0x0E,0x03,0x00,0x01,0x02 76 1.11 riastrad END(mc) 77 1.1 riastrad 78 1.1 riastrad .type sr,_ASM_TYPE_OBJECT 79 1.1 riastrad sr: 80 1.1 riastrad .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07 /* 0 */ 81 1.1 riastrad .byte 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F 82 1.1 riastrad 83 1.1 riastrad .byte 0x00,0x05,0x0A,0x0F,0x04,0x09,0x0E,0x03 /* 1 */ 84 1.1 riastrad .byte 0x08,0x0D,0x02,0x07,0x0C,0x01,0x06,0x0B 85 1.1 riastrad 86 1.1 riastrad .byte 0x00,0x09,0x02,0x0B,0x04,0x0D,0x06,0x0F /* 2 */ 87 1.1 riastrad .byte 0x08,0x01,0x0A,0x03,0x0C,0x05,0x0E,0x07 88 1.1 riastrad 89 1.1 riastrad .byte 0x00,0x0D,0x0A,0x07,0x04,0x01,0x0E,0x0B /* 3 */ 90 1.1 riastrad .byte 0x08,0x05,0x02,0x0F,0x0C,0x09,0x06,0x03 91 1.1 riastrad END(sr) 92 1.1 riastrad 93 1.7 riastrad .type ipt,_ASM_TYPE_OBJECT 94 1.7 riastrad ipt: 95 1.7 riastrad .byte 0x00,0x70,0x2A,0x5A,0x98,0xE8,0xB2,0xC2 /* lo */ 96 1.1 riastrad .byte 0x08,0x78,0x22,0x52,0x90,0xE0,0xBA,0xCA 97 1.7 riastrad .byte 0x00,0x4D,0x7C,0x31,0x7D,0x30,0x01,0x4C /* hi */ 98 1.1 riastrad .byte 0x81,0xCC,0xFD,0xB0,0xFC,0xB1,0x80,0xCD 99 1.7 riastrad END(ipt) 100 1.1 riastrad 101 1.7 riastrad .type sb1,_ASM_TYPE_OBJECT 102 1.7 riastrad sb1: 103 1.7 riastrad .byte 0x00,0x3E,0x50,0xCB,0x8F,0xE1,0x9B,0xB1 /* 0 */ 104 1.1 riastrad .byte 0x44,0xF5,0x2A,0x14,0x6E,0x7A,0xDF,0xA5 105 1.7 riastrad .byte 0x00,0x23,0xE2,0xFA,0x15,0xD4,0x18,0x36 /* 1 */ 106 1.1 riastrad .byte 0xEF,0xD9,0x2E,0x0D,0xC1,0xCC,0xF7,0x3B 107 1.7 riastrad END(sb1) 108 1.1 riastrad 109 1.7 riastrad .type sb2,_ASM_TYPE_OBJECT 110 1.7 riastrad sb2: 111 1.7 riastrad .byte 0x00,0x24,0x71,0x0B,0xC6,0x93,0x7A,0xE2 /* 0 */ 112 1.1 riastrad .byte 0xCD,0x2F,0x98,0xBC,0x55,0xE9,0xB7,0x5E 113 1.7 riastrad .byte 0x00,0x29,0xE1,0x0A,0x40,0x88,0xEB,0x69 /* 1 */ 114 1.1 riastrad .byte 0x4A,0x23,0x82,0xAB,0xC8,0x63,0xA1,0xC2 115 1.7 riastrad END(sb2) 116 1.1 riastrad 117 1.7 riastrad .type sbo,_ASM_TYPE_OBJECT 118 1.7 riastrad sbo: 119 1.7 riastrad .byte 0x00,0xC7,0xBD,0x6F,0x17,0x6D,0xD2,0xD0 /* 0 */ 120 1.1 riastrad .byte 0x78,0xA8,0x02,0xC5,0x7A,0xBF,0xAA,0x15 121 1.7 riastrad .byte 0x00,0x6A,0xBB,0x5F,0xA5,0x74,0xE4,0xCF /* 1 */ 122 1.1 riastrad .byte 0xFA,0x35,0x2B,0x41,0xD1,0x90,0x1E,0x8E 123 1.7 riastrad END(sbo) 124 1.1 riastrad 125 1.7 riastrad .type dipt,_ASM_TYPE_OBJECT 126 1.7 riastrad dipt: 127 1.7 riastrad .byte 0x00,0x5F,0x54,0x0B,0x04,0x5B,0x50,0x0F /* lo */ 128 1.1 riastrad .byte 0x1A,0x45,0x4E,0x11,0x1E,0x41,0x4A,0x15 129 1.7 riastrad .byte 0x00,0x65,0x05,0x60,0xE6,0x83,0xE3,0x86 /* hi */ 130 1.1 riastrad .byte 0x94,0xF1,0x91,0xF4,0x72,0x17,0x77,0x12 131 1.7 riastrad END(dipt) 132 1.1 riastrad 133 1.7 riastrad .type dsb9,_ASM_TYPE_OBJECT 134 1.7 riastrad dsb9: 135 1.7 riastrad .byte 0x00,0xD6,0x86,0x9A,0x53,0x03,0x1C,0x85 /* 0 */ 136 1.1 riastrad .byte 0xC9,0x4C,0x99,0x4F,0x50,0x1F,0xD5,0xCA 137 1.7 riastrad .byte 0x00,0x49,0xD7,0xEC,0x89,0x17,0x3B,0xC0 /* 1 */ 138 1.1 riastrad .byte 0x65,0xA5,0xFB,0xB2,0x9E,0x2C,0x5E,0x72 139 1.7 riastrad END(dsb9) 140 1.1 riastrad 141 1.7 riastrad .type dsbd,_ASM_TYPE_OBJECT 142 1.7 riastrad dsbd: 143 1.7 riastrad .byte 0x00,0xA2,0xB1,0xE6,0xDF,0xCC,0x57,0x7D /* 0 */ 144 1.1 riastrad .byte 0x39,0x44,0x2A,0x88,0x13,0x9B,0x6E,0xF5 145 1.7 riastrad .byte 0x00,0xCB,0xC6,0x24,0xF7,0xFA,0xE2,0x3C /* 1 */ 146 1.1 riastrad .byte 0xD3,0xEF,0xDE,0x15,0x0D,0x18,0x31,0x29 147 1.7 riastrad END(dsbd) 148 1.1 riastrad 149 1.7 riastrad .type dsbb,_ASM_TYPE_OBJECT 150 1.7 riastrad dsbb: 151 1.7 riastrad .byte 0x00,0x42,0xB4,0x96,0x92,0x64,0x22,0xD0 /* 0 */ 152 1.1 riastrad .byte 0x04,0xD4,0xF2,0xB0,0xF6,0x46,0x26,0x60 153 1.7 riastrad .byte 0x00,0x67,0x59,0xCD,0xA6,0x98,0x94,0xC1 /* 1 */ 154 1.1 riastrad .byte 0x6B,0xAA,0x55,0x32,0x3E,0x0C,0xFF,0xF3 155 1.7 riastrad END(dsbb) 156 1.1 riastrad 157 1.7 riastrad .type dsbe,_ASM_TYPE_OBJECT 158 1.7 riastrad dsbe: 159 1.7 riastrad .byte 0x00,0xD0,0xD4,0x26,0x96,0x92,0xF2,0x46 /* 0 */ 160 1.1 riastrad .byte 0xB0,0xF6,0xB4,0x64,0x04,0x60,0x42,0x22 161 1.7 riastrad .byte 0x00,0xC1,0xAA,0xFF,0xCD,0xA6,0x55,0x0C /* 1 */ 162 1.1 riastrad .byte 0x32,0x3E,0x59,0x98,0x6B,0xF3,0x67,0x94 163 1.7 riastrad END(dsbe) 164 1.1 riastrad 165 1.7 riastrad .type dsbo,_ASM_TYPE_OBJECT 166 1.7 riastrad dsbo: 167 1.7 riastrad .byte 0x00,0x40,0xF9,0x7E,0x53,0xEA,0x87,0x13 /* 0 */ 168 1.1 riastrad .byte 0x2D,0x3E,0x94,0xD4,0xB9,0x6D,0xAA,0xC7 169 1.7 riastrad .byte 0x00,0x1D,0x44,0x93,0x0F,0x56,0xD7,0x12 /* 1 */ 170 1.1 riastrad .byte 0x9C,0x8E,0xC5,0xD8,0x59,0x81,0x4B,0xCA 171 1.7 riastrad END(dsbo) 172 1.1 riastrad 173 1.1 riastrad /* 174 1.1 riastrad * aes_neon_enc1(enc, x, nrounds) 175 1.1 riastrad * 176 1.1 riastrad * With -mfloat-abi=hard: 177 1.1 riastrad * 178 1.1 riastrad * uint8x16_t@q0 179 1.1 riastrad * aes_neon_enc1(const struct aesenc *enc@r0, uint8x16_t x@q0, 180 1.1 riastrad * unsigned nrounds@r1) 181 1.1 riastrad * 182 1.6 riastrad * With -mfloat-abi=soft(fp) (i.e., __SOFTFP__): 183 1.1 riastrad * 184 1.1 riastrad * uint8x16_t@(r0,r1,r2,r3) 185 1.1 riastrad * aes_neon_enc1(const struct aesenc *enc@r0, 186 1.1 riastrad * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8]) 187 1.1 riastrad */ 188 1.1 riastrad ENTRY(aes_neon_enc1) 189 1.6 riastrad #ifdef __SOFTFP__ 190 1.6 riastrad #ifdef __ARM_BIG_ENDIAN 191 1.6 riastrad vmov d0, r3, r2 /* d0 := x lo */ 192 1.6 riastrad #else 193 1.1 riastrad vmov d0, r2, r3 /* d0 := x lo */ 194 1.6 riastrad #endif 195 1.1 riastrad vldr d1, [sp] /* d1 := x hi */ 196 1.1 riastrad ldr r1, [sp, #8] /* r1 := nrounds */ 197 1.1 riastrad #endif 198 1.9 riastrad push {r4, r5, r6, r8, r10, lr} 199 1.1 riastrad vpush {d8-d15} 200 1.1 riastrad 201 1.1 riastrad /* 202 1.1 riastrad * r3: rmod4 203 1.11 riastrad * r4: mc 204 1.9 riastrad * r6,r8,r10,ip: temporaries 205 1.1 riastrad * q0={d0-d1}: x/ak/A 206 1.1 riastrad * q1={d2-d3}: 0x0f0f... 207 1.1 riastrad * q2={d4-d5}: lo/k/j/io 208 1.1 riastrad * q3={d6-d7}: hi/i/jo 209 1.1 riastrad * q4={d8-d9}: iptlo 210 1.1 riastrad * q5={d10-d11}: ipthi 211 1.1 riastrad * q6={d12-d13}: sb1[0]/sbo[0] 212 1.1 riastrad * q7={d14-d15}: sb1[1]/sbo[1] 213 1.1 riastrad * q8={d16-d17}: sb2[0] 214 1.1 riastrad * q9={d18-d19}: sb2[1] 215 1.1 riastrad * q10={d20-d21}: inv 216 1.1 riastrad * q11={d22-d23}: inva 217 1.11 riastrad * q12={d24-d25}: ir/iak/iakr/sb1_0(io)/mc[rmod4].backward 218 1.11 riastrad * q13={d26-d27}: jr/jak/jakr/sb1_1(jo)/mc[rmod4].forward 219 1.1 riastrad * q14={d28-d29}: rk/A2/A2_B_D 220 1.1 riastrad * q15={d30-d31}: A2_B/sr[rmod4] 221 1.1 riastrad */ 222 1.1 riastrad 223 1.9 riastrad /* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */ 224 1.9 riastrad ldr ip, .Lconstants_addr 225 1.9 riastrad adr r10, .Lconstants_addr 226 1.2 riastrad 227 1.7 riastrad vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ 228 1.1 riastrad movw r3, #0 229 1.1 riastrad vmov.i8 q1, #0x0f 230 1.1 riastrad 231 1.9 riastrad /* ip := .Lconstants */ 232 1.9 riastrad add ip, ip, r10 233 1.2 riastrad 234 1.1 riastrad /* (q4, q5) := (iptlo, ipthi) */ 235 1.9 riastrad add r6, ip, #(ipt - .Lconstants) 236 1.7 riastrad vld1.8 {q4-q5}, [r6 :256] 237 1.1 riastrad 238 1.1 riastrad /* load the rest of the constants */ 239 1.9 riastrad add r4, ip, #(sb1 - .Lconstants) 240 1.9 riastrad add r6, ip, #(sb2 - .Lconstants) 241 1.9 riastrad add r8, ip, #(.Linv_inva - .Lconstants) 242 1.7 riastrad vld1.8 {q6-q7}, [r4 :256] /* q6 = sb1[0], q7 = sb1[1] */ 243 1.7 riastrad vld1.8 {q8-q9}, [r6 :256] /* q8 = sb2[0], q9 = sb2[1] */ 244 1.7 riastrad vld1.8 {q10-q11}, [r8 :256] /* q10 = inv, q11 = inva */ 245 1.1 riastrad 246 1.11 riastrad /* r4 := mc */ 247 1.11 riastrad add r4, ip, #(mc - .Lconstants) 248 1.1 riastrad 249 1.1 riastrad /* (q2, q3) := (lo, hi) */ 250 1.1 riastrad vshr.u8 q3, q0, #4 251 1.1 riastrad vand q2, q0, q1 /* q2 := x & 0x0f0f... */ 252 1.1 riastrad vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ 253 1.1 riastrad 254 1.1 riastrad /* (q2, q3) := (iptlo(lo), ipthi(hi)) */ 255 1.8 riastrad vtbl.8 d4, {q4}, d4 256 1.8 riastrad vtbl.8 d5, {q4}, d5 257 1.8 riastrad vtbl.8 d6, {q5}, d6 258 1.8 riastrad vtbl.8 d7, {q5}, d7 259 1.1 riastrad 260 1.1 riastrad /* q0 := rk[0] + iptlo(lo) + ipthi(hi) */ 261 1.1 riastrad veor q0, q14, q2 262 1.1 riastrad veor q0, q0, q3 263 1.1 riastrad 264 1.1 riastrad b 2f 265 1.1 riastrad 266 1.3 riastrad _ALIGN_TEXT 267 1.7 riastrad 1: vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ 268 1.1 riastrad 269 1.1 riastrad /* q0 := A = rk[i] + sb1_0(io) + sb1_1(jo) */ 270 1.8 riastrad vtbl.8 d24, {q6}, d4 271 1.8 riastrad vtbl.8 d25, {q6}, d5 272 1.8 riastrad vtbl.8 d26, {q7}, d6 273 1.8 riastrad vtbl.8 d27, {q7}, d7 274 1.1 riastrad veor q0, q14, q12 275 1.1 riastrad veor q0, q0, q13 276 1.1 riastrad 277 1.1 riastrad /* q14 := A2 = sb2_0[io] + sb2_1[jo] */ 278 1.8 riastrad vtbl.8 d24, {q8}, d4 279 1.8 riastrad vtbl.8 d25, {q8}, d5 280 1.8 riastrad vtbl.8 d26, {q9}, d6 281 1.8 riastrad vtbl.8 d27, {q9}, d7 282 1.11 riastrad add r6, r4, r3, lsl #5 /* r6 := &mc[rmod4] */ 283 1.1 riastrad veor q14, q12, q13 284 1.1 riastrad 285 1.11 riastrad /* (q12, q13) := (mc[rmod4].forward, mc[rmod4].backward) */ 286 1.11 riastrad vld1.8 {q12-q13}, [r6 :256] 287 1.1 riastrad 288 1.1 riastrad /* q15 := A2_B = A2 + A(mcf) */ 289 1.8 riastrad vtbl.8 d30, {q0}, d24 290 1.8 riastrad vtbl.8 d31, {q0}, d25 291 1.1 riastrad veor q15, q15, q14 292 1.1 riastrad 293 1.1 riastrad /* q14 := A2_B_D = A2_B + A(mcb) */ 294 1.8 riastrad vtbl.8 d28, {q0}, d26 295 1.8 riastrad vtbl.8 d29, {q0}, d27 296 1.1 riastrad veor q14, q14, q15 297 1.1 riastrad 298 1.1 riastrad /* q0 := x = A2_B_D + A2_B(mcf) */ 299 1.8 riastrad vtbl.8 d0, {q15}, d24 300 1.8 riastrad vtbl.8 d1, {q15}, d25 301 1.1 riastrad veor q0, q0, q14 302 1.1 riastrad 303 1.1 riastrad 2: /* 304 1.1 riastrad * SubBytes 305 1.1 riastrad */ 306 1.1 riastrad 307 1.1 riastrad /* (q2, q3) := (k, i) */ 308 1.1 riastrad vshr.u8 q3, q0, #4 309 1.1 riastrad vand q2, q0, q1 /* q2 := x & 0x0f0f... */ 310 1.1 riastrad vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ 311 1.1 riastrad 312 1.1 riastrad /* q0 := a/k */ 313 1.8 riastrad vtbl.8 d0, {q11}, d4 314 1.8 riastrad vtbl.8 d1, {q11}, d5 315 1.1 riastrad 316 1.1 riastrad /* q2 := j = i + k */ 317 1.1 riastrad veor q2, q3, q2 318 1.1 riastrad 319 1.1 riastrad /* q12 := ir = 1/i */ 320 1.8 riastrad vtbl.8 d24, {q10}, d6 321 1.8 riastrad vtbl.8 d25, {q10}, d7 322 1.1 riastrad 323 1.1 riastrad /* q13 := jr = 1/j */ 324 1.8 riastrad vtbl.8 d26, {q10}, d4 325 1.8 riastrad vtbl.8 d27, {q10}, d5 326 1.1 riastrad 327 1.1 riastrad /* q12 := iak = 1/i + a/k */ 328 1.1 riastrad veor q12, q12, q0 329 1.1 riastrad 330 1.1 riastrad /* q13 := jak = 1/j + a/k */ 331 1.1 riastrad veor q13, q13, q0 332 1.1 riastrad 333 1.1 riastrad /* q12 := iakr = 1/(1/i + a/k) */ 334 1.8 riastrad vtbl.8 d24, {q10}, d24 335 1.8 riastrad vtbl.8 d25, {q10}, d25 336 1.1 riastrad 337 1.1 riastrad /* q13 := jakr = 1/(1/j + a/k) */ 338 1.8 riastrad vtbl.8 d26, {q10}, d26 339 1.8 riastrad vtbl.8 d27, {q10}, d27 340 1.1 riastrad 341 1.1 riastrad /* q2 := io = j + 1/(1/i + a/k) */ 342 1.1 riastrad veor q2, q2, q12 343 1.1 riastrad 344 1.1 riastrad /* q3 := jo = i + 1/(1/j + a/k) */ 345 1.1 riastrad veor q3, q3, q13 346 1.1 riastrad 347 1.1 riastrad /* advance round */ 348 1.1 riastrad add r3, r3, #1 349 1.1 riastrad subs r1, r1, #1 350 1.1 riastrad and r3, r3, #3 351 1.1 riastrad bne 1b 352 1.1 riastrad 353 1.1 riastrad /* (q6, q7, q15) := (sbo[0], sbo[1], sr[rmod4]) */ 354 1.9 riastrad add r8, ip, #(sr - .Lconstants) 355 1.9 riastrad add r6, ip, #(sbo - .Lconstants) 356 1.1 riastrad add r8, r8, r3, lsl #4 357 1.7 riastrad vld1.8 {q6-q7}, [r6 :256] 358 1.7 riastrad vld1.8 {q15}, [r8 :128] 359 1.1 riastrad 360 1.7 riastrad vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ 361 1.1 riastrad 362 1.1 riastrad /* (q2, q3) := (sbo_0(io), sbo_1(jo)) */ 363 1.8 riastrad vtbl.8 d4, {q6}, d4 364 1.8 riastrad vtbl.8 d5, {q6}, d5 365 1.8 riastrad vtbl.8 d6, {q7}, d6 366 1.8 riastrad vtbl.8 d7, {q7}, d7 367 1.1 riastrad 368 1.1 riastrad /* q2 := x = rk[nr] + sbo_0(io) + sbo_1(jo) */ 369 1.1 riastrad veor q2, q2, q14 370 1.1 riastrad veor q2, q2, q3 371 1.1 riastrad 372 1.1 riastrad /* q0 := x(sr[rmod4]) */ 373 1.8 riastrad vtbl.8 d0, {q2}, d30 374 1.8 riastrad vtbl.8 d1, {q2}, d31 375 1.1 riastrad 376 1.1 riastrad vpop {d8-d15} 377 1.9 riastrad pop {r4, r5, r6, r8, r10, lr} 378 1.6 riastrad #ifdef __SOFTFP__ 379 1.6 riastrad #ifdef __ARM_BIG_ENDIAN 380 1.6 riastrad vmov r1, r0, d0 381 1.6 riastrad vmov r3, r2, d1 382 1.6 riastrad #else 383 1.1 riastrad vmov r0, r1, d0 384 1.1 riastrad vmov r2, r3, d1 385 1.1 riastrad #endif 386 1.6 riastrad #endif 387 1.1 riastrad bx lr 388 1.1 riastrad END(aes_neon_enc1) 389 1.1 riastrad 390 1.1 riastrad /* 391 1.1 riastrad * aes_neon_dec1(dec, x, nrounds) 392 1.1 riastrad * 393 1.1 riastrad * With -mfloat-abi=hard: 394 1.1 riastrad * 395 1.1 riastrad * uint8x16_t@q0 396 1.1 riastrad * aes_neon_dec1(const struct aesdec *dec@r0, uint8x16_t x@q0, 397 1.1 riastrad * unsigned nrounds@r1) 398 1.1 riastrad * 399 1.1 riastrad * With -mfloat-abi=soft(fp) (here spelled `#ifdef _KERNEL'): 400 1.1 riastrad * 401 1.1 riastrad * uint8x16_t@(r0,r1,r2,r3) 402 1.1 riastrad * aes_neon_dec1(const struct aesdec *dec@r0, 403 1.1 riastrad * uint8x16_t x@(r2,r3,sp[0],sp[4]), nrounds@sp[8]) 404 1.1 riastrad */ 405 1.1 riastrad ENTRY(aes_neon_dec1) 406 1.6 riastrad #ifdef __SOFTFP__ 407 1.6 riastrad #ifdef __ARM_BIG_ENDIAN 408 1.6 riastrad vmov d0, r3, r2 /* d0 := x lo */ 409 1.6 riastrad #else 410 1.1 riastrad vmov d0, r2, r3 /* d0 := x lo */ 411 1.6 riastrad #endif 412 1.1 riastrad vldr d1, [sp] /* d1 := x hi */ 413 1.1 riastrad ldr r1, [sp, #8] /* r1 := nrounds */ 414 1.1 riastrad #endif 415 1.9 riastrad push {r4, r5, r6, r8, r10, lr} 416 1.1 riastrad vpush {d8-d15} 417 1.1 riastrad 418 1.1 riastrad /* 419 1.1 riastrad * r3: 3 & ~(nrounds - 1) 420 1.10 riastrad * r4: dsbd 421 1.10 riastrad * r5: dsbe 422 1.10 riastrad * r6,r8,r10,ip: temporaries 423 1.1 riastrad * q0={d0-d1}: x/ak 424 1.1 riastrad * q1={d2-d3}: 0x0f0f... 425 1.1 riastrad * q2={d4-d5}: lo/k/j/io 426 1.1 riastrad * q3={d6-d7}: hi/i/jo 427 1.1 riastrad * q4={d8-d9}: diptlo/dsb9[0] 428 1.1 riastrad * q5={d10-d11}: dipthi/dsb9[1] 429 1.1 riastrad * q6={d12-d13}: dsbb[0]/dsbo[0] 430 1.1 riastrad * q7={d14-d15}: dsbb[1]/dsbo[1] 431 1.1 riastrad * q8={d16-d17}: dsbd[0]/dsbe[0] 432 1.1 riastrad * q9={d18-d19}: dsbd[1]/dsbe[0] 433 1.1 riastrad * q10={d20-d21}: inv 434 1.1 riastrad * q11={d22-d23}: inva 435 1.1 riastrad * q12={d24-d25}: ir/iak/iakr/dsbX_0(io) 436 1.1 riastrad * q13={d26-d27}: jr/jak/jakr/dsbX_1(jo) 437 1.1 riastrad * q14={d28-d29}: rk/xmc 438 1.1 riastrad * q15={d30-d31}: mc/sr[3 & ~(nrounds - 1)] 439 1.1 riastrad */ 440 1.1 riastrad 441 1.9 riastrad /* ip := .Lconstants - .Lconstants_addr, r10 := .Lconstants_addr */ 442 1.9 riastrad ldr ip, .Lconstants_addr 443 1.9 riastrad adr r10, .Lconstants_addr 444 1.2 riastrad 445 1.7 riastrad vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ 446 1.1 riastrad rsb r3, r1, #0 /* r3 := ~(x - 1) = -x */ 447 1.1 riastrad vmov.i8 q1, #0x0f 448 1.1 riastrad and r3, r3, #3 /* r3 := 3 & ~(x - 1) */ 449 1.1 riastrad 450 1.9 riastrad /* ip := .Lconstants */ 451 1.9 riastrad add ip, ip, r10 452 1.2 riastrad 453 1.1 riastrad /* (q4, q5) := (diptlo, dipthi) */ 454 1.9 riastrad add r6, ip, #(dipt - .Lconstants) 455 1.7 riastrad vld1.8 {q4-q5}, [r6 :256] 456 1.1 riastrad 457 1.1 riastrad /* load the rest of the constants */ 458 1.9 riastrad add r4, ip, #(dsbb - .Lconstants) 459 1.9 riastrad add r6, ip, #(.Linv_inva - .Lconstants) 460 1.9 riastrad add r8, ip, #(.Lmc_forward_3 - .Lconstants) 461 1.7 riastrad vld1.8 {q6-q7}, [r4 :256] /* q6 := dsbb[0], q7 := dsbb[1] */ 462 1.7 riastrad vld1.8 {q10-q11}, [r6 :256] /* q10 := inv, q11 := inva */ 463 1.11 riastrad vld1.8 {q15}, [r8 :128] /* q15 := mc[3].forward */ 464 1.1 riastrad 465 1.1 riastrad /* (q2, q3) := (lo, hi) */ 466 1.1 riastrad vshr.u8 q3, q0, #4 467 1.1 riastrad vand q2, q0, q1 /* q2 := x & 0x0f0f... */ 468 1.1 riastrad vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ 469 1.1 riastrad 470 1.1 riastrad /* (q2, q3) := (diptlo(lo), dipthi(hi)) */ 471 1.8 riastrad vtbl.8 d4, {q4}, d4 472 1.8 riastrad vtbl.8 d5, {q4}, d5 473 1.8 riastrad vtbl.8 d6, {q5}, d6 474 1.8 riastrad vtbl.8 d7, {q5}, d7 475 1.1 riastrad 476 1.1 riastrad /* load dsb9 */ 477 1.9 riastrad add r4, ip, #(dsb9 - .Lconstants) 478 1.7 riastrad vld1.8 {q4-q5}, [r4 :256] /* q4 := dsb9[0], q5 := dsb9[1] */ 479 1.1 riastrad 480 1.10 riastrad /* r4 := dsbd, r5 := dsbe */ 481 1.10 riastrad add r4, ip, #(dsbd - .Lconstants) 482 1.10 riastrad add r5, ip, #(dsbe - .Lconstants) 483 1.10 riastrad 484 1.1 riastrad /* q0 := rk[0] + diptlo(lo) + dipthi(hi) */ 485 1.1 riastrad veor q0, q14, q2 486 1.1 riastrad veor q0, q0, q3 487 1.1 riastrad 488 1.1 riastrad b 2f 489 1.1 riastrad 490 1.3 riastrad _ALIGN_TEXT 491 1.1 riastrad 1: /* load dsbd */ 492 1.7 riastrad vld1.8 {q8-q9}, [r4 :256] /* q8 := dsbd[0], q9 := dsbd[1] */ 493 1.1 riastrad 494 1.7 riastrad vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ 495 1.1 riastrad 496 1.1 riastrad /* q0 := rk[i] + dsb9_0(io) + dsb9_1(jo) */ 497 1.8 riastrad vtbl.8 d24, {q4}, d4 498 1.8 riastrad vtbl.8 d25, {q4}, d5 499 1.8 riastrad vtbl.8 d26, {q5}, d6 500 1.8 riastrad vtbl.8 d27, {q5}, d7 501 1.1 riastrad veor q0, q14, q12 502 1.1 riastrad veor q0, q0, q13 503 1.1 riastrad 504 1.1 riastrad /* q14 := x(mc) */ 505 1.8 riastrad vtbl.8 d28, {q0}, d30 506 1.8 riastrad vtbl.8 d29, {q0}, d31 507 1.1 riastrad 508 1.1 riastrad /* q0 := x(mc) + dsbd_0(io) + dsbd_1(jo) */ 509 1.8 riastrad vtbl.8 d24, {q8}, d4 510 1.8 riastrad vtbl.8 d25, {q8}, d5 511 1.8 riastrad vtbl.8 d26, {q9}, d6 512 1.8 riastrad vtbl.8 d27, {q9}, d7 513 1.1 riastrad veor q0, q14, q12 514 1.1 riastrad veor q0, q0, q13 515 1.1 riastrad 516 1.1 riastrad /* load dsbe */ 517 1.10 riastrad vld1.8 {q8-q9}, [r5 :256] /* q8 := dsbe[0], q9 := dsbe[1] */ 518 1.1 riastrad 519 1.1 riastrad /* q0 := x(mc) + dsbb_0(io) + dsbb_1(jo) */ 520 1.8 riastrad vtbl.8 d28, {q0}, d30 521 1.8 riastrad vtbl.8 d29, {q0}, d31 522 1.8 riastrad vtbl.8 d24, {q6}, d4 523 1.8 riastrad vtbl.8 d25, {q6}, d5 524 1.8 riastrad vtbl.8 d26, {q7}, d6 525 1.8 riastrad vtbl.8 d27, {q7}, d7 526 1.1 riastrad veor q0, q14, q12 527 1.1 riastrad veor q0, q0, q13 528 1.1 riastrad 529 1.1 riastrad /* q0 := x(mc) + dsbe_0(io) + dsbe_1(jo) */ 530 1.8 riastrad vtbl.8 d28, {q0}, d30 531 1.8 riastrad vtbl.8 d29, {q0}, d31 532 1.8 riastrad vtbl.8 d24, {q8}, d4 533 1.8 riastrad vtbl.8 d25, {q8}, d5 534 1.8 riastrad vtbl.8 d26, {q9}, d6 535 1.8 riastrad vtbl.8 d27, {q9}, d7 536 1.1 riastrad veor q0, q14, q12 537 1.1 riastrad veor q0, q0, q13 538 1.1 riastrad 539 1.1 riastrad /* q15 := mc := mc <<< 12*8 */ 540 1.1 riastrad vext.8 q15, q15, q15, #12 541 1.1 riastrad 542 1.1 riastrad 2: /* 543 1.1 riastrad * SubBytes 544 1.1 riastrad */ 545 1.1 riastrad 546 1.1 riastrad /* (q2, q3) := (k, i) */ 547 1.1 riastrad vshr.u8 q3, q0, #4 548 1.1 riastrad vand q2, q0, q1 /* q2 := x & 0x0f0f... */ 549 1.1 riastrad vand q3, q3, q1 /* q3 := (x >> 4) & 0x0f0f... */ 550 1.1 riastrad 551 1.1 riastrad /* q0 := a/k */ 552 1.8 riastrad vtbl.8 d0, {q11}, d4 553 1.8 riastrad vtbl.8 d1, {q11}, d5 554 1.1 riastrad 555 1.1 riastrad /* q2 := j = i + k */ 556 1.1 riastrad veor q2, q3, q2 557 1.1 riastrad 558 1.1 riastrad /* q12 := ir = 1/i */ 559 1.8 riastrad vtbl.8 d24, {q10}, d6 560 1.8 riastrad vtbl.8 d25, {q10}, d7 561 1.1 riastrad 562 1.1 riastrad /* q13 := jr = 1/j */ 563 1.8 riastrad vtbl.8 d26, {q10}, d4 564 1.8 riastrad vtbl.8 d27, {q10}, d5 565 1.1 riastrad 566 1.1 riastrad /* q12 := iak = 1/i + a/k */ 567 1.1 riastrad veor q12, q12, q0 568 1.1 riastrad 569 1.1 riastrad /* q13 := jak = 1/j + a/k */ 570 1.1 riastrad veor q13, q13, q0 571 1.1 riastrad 572 1.1 riastrad /* q12 := iakr = 1/(1/i + a/k) */ 573 1.8 riastrad vtbl.8 d24, {q10}, d24 574 1.8 riastrad vtbl.8 d25, {q10}, d25 575 1.1 riastrad 576 1.1 riastrad /* q13 := jakr = 1/(1/j + a/k) */ 577 1.8 riastrad vtbl.8 d26, {q10}, d26 578 1.8 riastrad vtbl.8 d27, {q10}, d27 579 1.1 riastrad 580 1.1 riastrad /* q2 := io = j + 1/(1/i + a/k) */ 581 1.1 riastrad veor q2, q2, q12 582 1.1 riastrad 583 1.1 riastrad /* q3 := jo = i + 1/(1/j + a/k) */ 584 1.1 riastrad veor q3, q3, q13 585 1.1 riastrad 586 1.1 riastrad /* advance round */ 587 1.1 riastrad subs r1, r1, #1 588 1.1 riastrad bne 1b 589 1.1 riastrad 590 1.1 riastrad /* (q6, q7, q15) := (dsbo[0], dsbo[1], sr[i]) */ 591 1.9 riastrad add r8, ip, #(sr - .Lconstants) 592 1.9 riastrad add r6, ip, #(dsbo - .Lconstants) 593 1.1 riastrad add r8, r8, r3, lsl #4 594 1.7 riastrad vld1.8 {q6-q7}, [r6 :256] 595 1.7 riastrad vld1.8 {q15}, [r8 :128] 596 1.1 riastrad 597 1.7 riastrad vld1.8 {q14}, [r0 :128]! /* q14 = *rk++ */ 598 1.1 riastrad 599 1.1 riastrad /* (q2, q3) := (dsbo_0(io), dsbo_1(jo)) */ 600 1.8 riastrad vtbl.8 d4, {q6}, d4 601 1.8 riastrad vtbl.8 d5, {q6}, d5 602 1.8 riastrad vtbl.8 d6, {q7}, d6 603 1.8 riastrad vtbl.8 d7, {q7}, d7 604 1.1 riastrad 605 1.1 riastrad /* q2 := x = rk[nr] + dsbo_0(io) + dsbo_1(jo) */ 606 1.1 riastrad veor q2, q2, q14 607 1.1 riastrad veor q2, q2, q3 608 1.1 riastrad 609 1.1 riastrad /* q0 := x(sr[i]) */ 610 1.8 riastrad vtbl.8 d0, {q2}, d30 611 1.8 riastrad vtbl.8 d1, {q2}, d31 612 1.1 riastrad 613 1.1 riastrad vpop {d8-d15} 614 1.9 riastrad pop {r4, r5, r6, r8, r10, lr} 615 1.6 riastrad #ifdef __SOFTFP__ 616 1.6 riastrad #ifdef __ARM_BIG_ENDIAN 617 1.6 riastrad vmov r1, r0, d0 618 1.6 riastrad vmov r3, r2, d1 619 1.6 riastrad #else 620 1.1 riastrad vmov r0, r1, d0 621 1.1 riastrad vmov r2, r3, d1 622 1.1 riastrad #endif 623 1.6 riastrad #endif 624 1.1 riastrad bx lr 625 1.1 riastrad END(aes_neon_dec1) 626