1 ;; Machine description for AArch64 SVE2. 2 ;; Copyright (C) 2019-2022 Free Software Foundation, Inc. 3 ;; Contributed by ARM Ltd. 4 ;; 5 ;; This file is part of GCC. 6 ;; 7 ;; GCC is free software; you can redistribute it and/or modify it 8 ;; under the terms of the GNU General Public License as published by 9 ;; the Free Software Foundation; either version 3, or (at your option) 10 ;; any later version. 11 ;; 12 ;; GCC is distributed in the hope that it will be useful, but 13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of 14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 ;; General Public License for more details. 16 ;; 17 ;; You should have received a copy of the GNU General Public License 18 ;; along with GCC; see the file COPYING3. If not see 19 ;; <http://www.gnu.org/licenses/>. 20 21 ;; The file is organised into the following sections (search for the full 22 ;; line): 23 ;; 24 ;; == Moves 25 ;; ---- Non-temporal gather loads 26 ;; ---- Non-temporal scatter stores 27 ;; 28 ;; == Uniform binary arithmnetic 29 ;; ---- [INT] Multiplication 30 ;; ---- [INT] Scaled high-part multiplication 31 ;; ---- [INT] General binary arithmetic that maps to unspecs 32 ;; ---- [INT] Saturating binary arithmetic 33 ;; ---- [INT] Saturating left shifts 34 ;; 35 ;; == Uniform ternary arithmnetic 36 ;; ---- [INT] General ternary arithmetic that maps to unspecs 37 ;; ---- [INT] Multiply-and-accumulate operations 38 ;; ---- [INT] Binary logic operations with rotation 39 ;; ---- [INT] Ternary logic operations 40 ;; ---- [INT] Shift-and-accumulate operations 41 ;; ---- [INT] Shift-and-insert operations 42 ;; ---- [INT] Sum of absolute differences 43 ;; 44 ;; == Extending arithmetic 45 ;; ---- [INT] Wide binary arithmetic 46 ;; ---- [INT] Long binary arithmetic 47 ;; ---- [INT] Long left shifts 48 ;; ---- [INT] Long binary arithmetic with accumulation 49 ;; ---- [FP] Long multiplication with accumulation 50 ;; 51 ;; == Narrowing arithnetic 52 ;; ---- [INT] Narrowing unary arithmetic 53 ;; ---- [INT] Narrowing binary arithmetic 54 ;; ---- [INT] Narrowing right shifts 55 ;; 56 ;; == Pairwise arithmetic 57 ;; ---- [INT] Pairwise arithmetic 58 ;; ---- [FP] Pairwise arithmetic 59 ;; ---- [INT] Pairwise arithmetic with accumulation 60 ;; 61 ;; == Complex arithmetic 62 ;; ---- [INT] Complex binary operations 63 ;; ---- [INT] Complex ternary operations 64 ;; ---- [INT] Complex dot product 65 ;; 66 ;; == Conversions 67 ;; ---- [FP<-FP] Widening conversions 68 ;; ---- [FP<-FP] Narrowing conversions 69 ;; 70 ;; == Other arithmetic 71 ;; ---- [INT] Reciprocal approximation 72 ;; ---- [INT<-FP] Base-2 logarithm 73 ;; ---- [INT] Polynomial multiplication 74 ;; 75 ;; == Permutation 76 ;; ---- [INT,FP] General permutes 77 ;; ---- [INT] Optional bit-permute extensions 78 ;; 79 ;; == General 80 ;; ---- Check for aliases between pointers 81 ;; ---- Histogram processing 82 ;; ---- String matching 83 ;; 84 ;; == Crypotographic extensions 85 ;; ---- Optional AES extensions 86 ;; ---- Optional SHA-3 extensions 87 ;; ---- Optional SM4 extensions 88 89 ;; ========================================================================= 90 ;; == Moves 91 ;; ========================================================================= 92 93 ;; ------------------------------------------------------------------------- 94 ;; ---- Non-temporal gather loads 95 ;; ------------------------------------------------------------------------- 96 ;; Includes gather forms of: 97 ;; - LDNT1B 98 ;; - LDNT1D 99 ;; - LDNT1H 100 ;; - LDNT1W 101 ;; ------------------------------------------------------------------------- 102 103 ;; Non-extending loads. 104 (define_insn "@aarch64_gather_ldnt<mode>" 105 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w") 106 (unspec:SVE_FULL_SD 107 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 108 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") 109 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w") 110 (mem:BLK (scratch))] 111 UNSPEC_LDNT1_GATHER))] 112 "TARGET_SVE2" 113 "@ 114 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>] 115 ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]" 116 ) 117 118 ;; Extending loads. 119 (define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" 120 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w") 121 (unspec:SVE_FULL_SDI 122 [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm") 123 (ANY_EXTEND:SVE_FULL_SDI 124 (unspec:SVE_PARTIAL_I 125 [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl") 126 (match_operand:DI 2 "aarch64_reg_or_zero" "Z, r") 127 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w") 128 (mem:BLK (scratch))] 129 UNSPEC_LDNT1_GATHER))] 130 UNSPEC_PRED_X))] 131 "TARGET_SVE2 132 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 133 "@ 134 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>] 135 ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]" 136 "&& !CONSTANT_P (operands[4])" 137 { 138 operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode); 139 } 140 ) 141 142 ;; ------------------------------------------------------------------------- 143 ;; ---- Non-temporal scatter stores 144 ;; ------------------------------------------------------------------------- 145 ;; Includes scatter forms of: 146 ;; - STNT1B 147 ;; - STNT1D 148 ;; - STNT1H 149 ;; - STNT1W 150 ;; ------------------------------------------------------------------------- 151 152 ;; Non-truncating stores. 153 (define_insn "@aarch64_scatter_stnt<mode>" 154 [(set (mem:BLK (scratch)) 155 (unspec:BLK 156 [(match_operand:<VPRED> 0 "register_operand" "Upl, Upl") 157 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") 158 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w") 159 (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")] 160 161 UNSPEC_STNT1_SCATTER))] 162 "TARGET_SVE" 163 "@ 164 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>] 165 stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]" 166 ) 167 168 ;; Truncating stores. 169 (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" 170 [(set (mem:BLK (scratch)) 171 (unspec:BLK 172 [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl") 173 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, r") 174 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w") 175 (truncate:SVE_PARTIAL_I 176 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))] 177 UNSPEC_STNT1_SCATTER))] 178 "TARGET_SVE2 179 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 180 "@ 181 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>] 182 stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]" 183 ) 184 185 ;; ========================================================================= 186 ;; == Uniform binary arithmnetic 187 ;; ========================================================================= 188 189 ;; ------------------------------------------------------------------------- 190 ;; ---- [INT] Multiplication 191 ;; ------------------------------------------------------------------------- 192 ;; Includes the lane forms of: 193 ;; - MUL 194 ;; ------------------------------------------------------------------------- 195 196 (define_insn "@aarch64_mul_lane_<mode>" 197 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 198 (mult:SVE_FULL_HSDI 199 (unspec:SVE_FULL_HSDI 200 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>") 201 (match_operand:SI 3 "const_int_operand")] 202 UNSPEC_SVE_LANE_SELECT) 203 (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))] 204 "TARGET_SVE2" 205 "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 206 ) 207 208 ;; ------------------------------------------------------------------------- 209 ;; ---- [INT] Scaled high-part multiplication 210 ;; ------------------------------------------------------------------------- 211 ;; The patterns in this section are synthetic. 212 ;; ------------------------------------------------------------------------- 213 214 ;; Unpredicated integer multiply-high-with-(round-and-)scale. 215 (define_expand "<su>mulh<r>s<mode>3" 216 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") 217 (unspec:SVE_FULL_BHSI 218 [(match_dup 3) 219 (unspec:SVE_FULL_BHSI 220 [(match_operand:SVE_FULL_BHSI 1 "register_operand") 221 (match_operand:SVE_FULL_BHSI 2 "register_operand")] 222 MULHRS)] 223 UNSPEC_PRED_X))] 224 "TARGET_SVE2" 225 { 226 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 227 228 rtx prod_b = gen_reg_rtx (<VWIDE>mode); 229 rtx prod_t = gen_reg_rtx (<VWIDE>mode); 230 emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1], 231 operands[2])); 232 emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1], 233 operands[2])); 234 235 rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1); 236 emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift)); 237 emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0], 238 prod_t, shift)); 239 240 DONE; 241 } 242 ) 243 244 ;; ------------------------------------------------------------------------- 245 ;; ---- [INT] General binary arithmetic that maps to unspecs 246 ;; ------------------------------------------------------------------------- 247 ;; Includes: 248 ;; - SHADD 249 ;; - SHSUB 250 ;; - SHSUBR 251 ;; - SQRSHL 252 ;; - SQRSHLR 253 ;; - SRHADD 254 ;; - SRSHL 255 ;; - SRSHLR 256 ;; - SUQADD 257 ;; - UHADD 258 ;; - UHSUB 259 ;; - UHSUBR 260 ;; - UQRSHL 261 ;; - UQRSHLR 262 ;; - URHADD 263 ;; - URSHL 264 ;; - URSHLR 265 ;; - USQADD 266 ;; ------------------------------------------------------------------------- 267 268 ;; Integer average (floor). 269 (define_expand "<u>avg<mode>3_floor" 270 [(set (match_operand:SVE_FULL_I 0 "register_operand") 271 (unspec:SVE_FULL_I 272 [(match_dup 3) 273 (unspec:SVE_FULL_I 274 [(match_operand:SVE_FULL_I 1 "register_operand") 275 (match_operand:SVE_FULL_I 2 "register_operand")] 276 HADD)] 277 UNSPEC_PRED_X))] 278 "TARGET_SVE2" 279 { 280 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 281 } 282 ) 283 284 ;; Integer average (rounding). 285 (define_expand "<u>avg<mode>3_ceil" 286 [(set (match_operand:SVE_FULL_I 0 "register_operand") 287 (unspec:SVE_FULL_I 288 [(match_dup 3) 289 (unspec:SVE_FULL_I 290 [(match_operand:SVE_FULL_I 1 "register_operand") 291 (match_operand:SVE_FULL_I 2 "register_operand")] 292 RHADD)] 293 UNSPEC_PRED_X))] 294 "TARGET_SVE2" 295 { 296 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); 297 } 298 ) 299 300 ;; The immediate form of SQADD acts as an immediate form of SUQADD 301 ;; over its full range. In contrast to the ss_plus pattern, we do 302 ;; not need to treat byte immediates specially. E.g.: 303 ;; 304 ;; SQADD Z0.B, Z0.B, #128 305 ;; 306 ;; is equivalent to: 307 ;; 308 ;; MOV Z1.B, #128 309 ;; SUQADD Z0.B, P0/M, Z0.B, Z1.B 310 ;; 311 ;; even though it's not equivalent to: 312 ;; 313 ;; MOV Z1.B, #128 314 ;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128 315 (define_insn "@aarch64_sve_suqadd<mode>_const" 316 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 317 (unspec:SVE_FULL_I 318 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") 319 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")] 320 UNSPEC_SUQADD))] 321 "TARGET_SVE2" 322 "@ 323 sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2 324 movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2" 325 [(set_attr "movprfx" "*,yes")] 326 ) 327 328 ;; General predicated binary arithmetic. All operations handled here 329 ;; are commutative or have a reversed form. 330 (define_insn "@aarch64_pred_<sve_int_op><mode>" 331 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") 332 (unspec:SVE_FULL_I 333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 334 (unspec:SVE_FULL_I 335 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w") 336 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")] 337 SVE2_COND_INT_BINARY_REV)] 338 UNSPEC_PRED_X))] 339 "TARGET_SVE2" 340 "@ 341 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 342 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 343 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 344 [(set_attr "movprfx" "*,*,yes")] 345 ) 346 347 ;; Predicated binary arithmetic with merging. 348 (define_expand "@cond_<sve_int_op><mode>" 349 [(set (match_operand:SVE_FULL_I 0 "register_operand") 350 (unspec:SVE_FULL_I 351 [(match_operand:<VPRED> 1 "register_operand") 352 (unspec:SVE_FULL_I 353 [(match_dup 5) 354 (unspec:SVE_FULL_I 355 [(match_operand:SVE_FULL_I 2 "register_operand") 356 (match_operand:SVE_FULL_I 3 "register_operand")] 357 SVE2_COND_INT_BINARY)] 358 UNSPEC_PRED_X) 359 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 360 UNSPEC_SEL))] 361 "TARGET_SVE2" 362 { 363 operands[5] = CONSTM1_RTX (<MODE>mode); 364 } 365 ) 366 367 ;; Predicated binary arithmetic, merging with the first input. 368 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 369 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 370 (unspec:SVE_FULL_I 371 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 372 (unspec:SVE_FULL_I 373 [(match_operand 4) 374 (unspec:SVE_FULL_I 375 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 376 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 377 SVE2_COND_INT_BINARY)] 378 UNSPEC_PRED_X) 379 (match_dup 2)] 380 UNSPEC_SEL))] 381 "TARGET_SVE2" 382 "@ 383 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 384 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 385 "&& !CONSTANT_P (operands[4])" 386 { 387 operands[4] = CONSTM1_RTX (<VPRED>mode); 388 } 389 [(set_attr "movprfx" "*,yes")] 390 ) 391 392 ;; Predicated binary arithmetic, merging with the second input. 393 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3" 394 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 395 (unspec:SVE_FULL_I 396 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 397 (unspec:SVE_FULL_I 398 [(match_operand 4) 399 (unspec:SVE_FULL_I 400 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 401 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] 402 SVE2_COND_INT_BINARY_REV)] 403 UNSPEC_PRED_X) 404 (match_dup 3)] 405 UNSPEC_SEL))] 406 "TARGET_SVE2" 407 "@ 408 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 409 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 410 "&& !CONSTANT_P (operands[4])" 411 { 412 operands[4] = CONSTM1_RTX (<VPRED>mode); 413 } 414 [(set_attr "movprfx" "*,yes")] 415 ) 416 417 ;; Predicated binary operations, merging with an independent value. 418 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" 419 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") 420 (unspec:SVE_FULL_I 421 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 422 (unspec:SVE_FULL_I 423 [(match_operand 5) 424 (unspec:SVE_FULL_I 425 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") 426 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")] 427 SVE2_COND_INT_BINARY_REV)] 428 UNSPEC_PRED_X) 429 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 430 UNSPEC_SEL))] 431 "TARGET_SVE2 432 && !rtx_equal_p (operands[2], operands[4]) 433 && !rtx_equal_p (operands[3], operands[4])" 434 "@ 435 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 436 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 437 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 438 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 439 #" 440 "&& 1" 441 { 442 if (reload_completed 443 && register_operand (operands[4], <MODE>mode) 444 && !rtx_equal_p (operands[0], operands[4])) 445 { 446 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 447 operands[4], operands[1])); 448 operands[4] = operands[2] = operands[0]; 449 } 450 else if (!CONSTANT_P (operands[5])) 451 operands[5] = CONSTM1_RTX (<VPRED>mode); 452 else 453 FAIL; 454 } 455 [(set_attr "movprfx" "yes")] 456 ) 457 458 ;; Predicated binary operations with no reverse form, merging with zero. 459 ;; At present we don't generate these patterns via a cond_* optab, 460 ;; so there's no correctness requirement to handle merging with an 461 ;; independent value. 462 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z" 463 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") 464 (unspec:SVE_FULL_I 465 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 466 (unspec:SVE_FULL_I 467 [(match_operand 5) 468 (unspec:SVE_FULL_I 469 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 470 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 471 SVE2_COND_INT_BINARY_NOREV)] 472 UNSPEC_PRED_X) 473 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] 474 UNSPEC_SEL))] 475 "TARGET_SVE2" 476 "@ 477 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 478 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 479 "&& !CONSTANT_P (operands[5])" 480 { 481 operands[5] = CONSTM1_RTX (<VPRED>mode); 482 } 483 [(set_attr "movprfx" "yes")] 484 ) 485 486 ;; ------------------------------------------------------------------------- 487 ;; ---- [INT] Saturating binary arithmetic 488 ;; ------------------------------------------------------------------------- 489 ;; Includes: 490 ;; - SQDMULH 491 ;; - SQRDMULH 492 ;; ------------------------------------------------------------------------- 493 494 (define_insn "@aarch64_sve_<sve_int_op><mode>" 495 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 496 (unspec:SVE_FULL_I 497 [(match_operand:SVE_FULL_I 1 "register_operand" "w") 498 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 499 SVE2_INT_BINARY))] 500 "TARGET_SVE2" 501 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 502 ) 503 504 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" 505 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 506 (unspec:SVE_FULL_HSDI 507 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 508 (unspec:SVE_FULL_HSDI 509 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>") 510 (match_operand:SI 3 "const_int_operand")] 511 UNSPEC_SVE_LANE_SELECT)] 512 SVE2_INT_BINARY_LANE))] 513 "TARGET_SVE2" 514 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 515 ) 516 517 ;; ------------------------------------------------------------------------- 518 ;; ---- [INT] Saturating left shifts 519 ;; ------------------------------------------------------------------------- 520 ;; Includes: 521 ;; - SQSHL 522 ;; - SQSHLR 523 ;; - UQSHL 524 ;; - UQSHLR 525 ;; ------------------------------------------------------------------------- 526 527 ;; Predicated left shifts. 528 (define_insn "@aarch64_pred_<sve_int_op><mode>" 529 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w") 530 (unspec:SVE_FULL_I 531 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 532 (unspec:SVE_FULL_I 533 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w") 534 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")] 535 SVE2_COND_INT_SHIFT)] 536 UNSPEC_PRED_X))] 537 "TARGET_SVE2" 538 "@ 539 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 540 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 541 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 542 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 543 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 544 [(set_attr "movprfx" "*,*,*,yes,yes")] 545 ) 546 547 ;; Predicated left shifts with merging. 548 (define_expand "@cond_<sve_int_op><mode>" 549 [(set (match_operand:SVE_FULL_I 0 "register_operand") 550 (unspec:SVE_FULL_I 551 [(match_operand:<VPRED> 1 "register_operand") 552 (unspec:SVE_FULL_I 553 [(match_dup 5) 554 (unspec:SVE_FULL_I 555 [(match_operand:SVE_FULL_I 2 "register_operand") 556 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")] 557 SVE2_COND_INT_SHIFT)] 558 UNSPEC_PRED_X) 559 (match_operand:SVE_FULL_I 4 "register_operand")] 560 UNSPEC_SEL))] 561 "TARGET_SVE2" 562 { 563 operands[5] = CONSTM1_RTX (<VPRED>mode); 564 } 565 ) 566 567 ;; Predicated left shifts, merging with the first input. 568 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 569 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") 570 (unspec:SVE_FULL_I 571 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 572 (unspec:SVE_FULL_I 573 [(match_operand 4) 574 (unspec:SVE_FULL_I 575 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w") 576 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")] 577 SVE2_COND_INT_SHIFT)] 578 UNSPEC_PRED_X) 579 (match_dup 2)] 580 UNSPEC_SEL))] 581 "TARGET_SVE2" 582 "@ 583 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 584 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 585 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 586 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 587 "&& !CONSTANT_P (operands[4])" 588 { 589 operands[4] = CONSTM1_RTX (<VPRED>mode); 590 } 591 [(set_attr "movprfx" "*,*,yes,yes")] 592 ) 593 594 ;; Predicated left shifts, merging with the second input. 595 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3" 596 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 597 (unspec:SVE_FULL_I 598 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 599 (unspec:SVE_FULL_I 600 [(match_operand 4) 601 (unspec:SVE_FULL_I 602 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 603 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")] 604 SVE2_COND_INT_SHIFT)] 605 UNSPEC_PRED_X) 606 (match_dup 3)] 607 UNSPEC_SEL))] 608 "TARGET_SVE2" 609 "@ 610 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 611 movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 612 "&& !CONSTANT_P (operands[4])" 613 { 614 operands[4] = CONSTM1_RTX (<VPRED>mode); 615 } 616 [(set_attr "movprfx" "*,yes")] 617 ) 618 619 ;; Predicated left shifts, merging with an independent value. 620 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" 621 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w") 622 (unspec:SVE_FULL_I 623 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl") 624 (unspec:SVE_FULL_I 625 [(match_operand 5) 626 (unspec:SVE_FULL_I 627 [(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w") 628 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")] 629 SVE2_COND_INT_SHIFT)] 630 UNSPEC_PRED_X) 631 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")] 632 UNSPEC_SEL))] 633 "TARGET_SVE2 634 && !rtx_equal_p (operands[2], operands[4]) 635 && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))" 636 "@ 637 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 638 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 639 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 640 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 643 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 644 # 645 #" 646 "&& 1" 647 { 648 if (reload_completed 649 && register_operand (operands[4], <MODE>mode) 650 && !rtx_equal_p (operands[0], operands[4])) 651 { 652 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 653 operands[4], operands[1])); 654 operands[4] = operands[2] = operands[0]; 655 } 656 else if (!CONSTANT_P (operands[5])) 657 operands[5] = CONSTM1_RTX (<VPRED>mode); 658 else 659 FAIL; 660 } 661 [(set_attr "movprfx" "yes")] 662 ) 663 664 ;; ========================================================================= 665 ;; == Uniform ternary arithmnetic 666 ;; ========================================================================= 667 668 ;; ------------------------------------------------------------------------- 669 ;; ---- [INT] General ternary arithmetic that maps to unspecs 670 ;; ------------------------------------------------------------------------- 671 ;; Includes: 672 ;; - ADCLB 673 ;; - ADCLT 674 ;; - EORBT 675 ;; - EORTB 676 ;; - SBCLB 677 ;; - SBCLT 678 ;; - SQRDMLAH 679 ;; - SQRDMLSH 680 ;; ------------------------------------------------------------------------- 681 682 (define_insn "@aarch64_sve_<sve_int_op><mode>" 683 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 684 (unspec:SVE_FULL_I 685 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 686 (match_operand:SVE_FULL_I 3 "register_operand" "w, w") 687 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")] 688 SVE2_INT_TERNARY))] 689 "TARGET_SVE2" 690 "@ 691 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> 692 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 693 [(set_attr "movprfx" "*,yes")] 694 ) 695 696 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" 697 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 698 (unspec:SVE_FULL_HSDI 699 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w") 700 (unspec:SVE_FULL_HSDI 701 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 702 (match_operand:SI 4 "const_int_operand")] 703 UNSPEC_SVE_LANE_SELECT) 704 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")] 705 SVE2_INT_TERNARY_LANE))] 706 "TARGET_SVE2" 707 "@ 708 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] 709 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" 710 [(set_attr "movprfx" "*,yes")] 711 ) 712 713 ;; ------------------------------------------------------------------------- 714 ;; ---- [INT] Multiply-and-accumulate operations 715 ;; ------------------------------------------------------------------------- 716 ;; Includes the lane forms of: 717 ;; - MLA 718 ;; - MLS 719 ;; ------------------------------------------------------------------------- 720 721 (define_insn "@aarch64_sve_add_mul_lane_<mode>" 722 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 723 (plus:SVE_FULL_HSDI 724 (mult:SVE_FULL_HSDI 725 (unspec:SVE_FULL_HSDI 726 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 727 (match_operand:SI 4 "const_int_operand")] 728 UNSPEC_SVE_LANE_SELECT) 729 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")) 730 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] 731 "TARGET_SVE2" 732 "@ 733 mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] 734 movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" 735 [(set_attr "movprfx" "*,yes")] 736 ) 737 738 (define_insn "@aarch64_sve_sub_mul_lane_<mode>" 739 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 740 (minus:SVE_FULL_HSDI 741 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") 742 (mult:SVE_FULL_HSDI 743 (unspec:SVE_FULL_HSDI 744 [(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 745 (match_operand:SI 4 "const_int_operand")] 746 UNSPEC_SVE_LANE_SELECT) 747 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))] 748 "TARGET_SVE2" 749 "@ 750 mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4] 751 movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]" 752 [(set_attr "movprfx" "*,yes")] 753 ) 754 755 ;; ------------------------------------------------------------------------- 756 ;; ---- [INT] Binary logic operations with rotation 757 ;; ------------------------------------------------------------------------- 758 ;; Includes: 759 ;; - XAR 760 ;; ------------------------------------------------------------------------- 761 762 (define_insn "@aarch64_sve2_xar<mode>" 763 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 764 (rotatert:SVE_FULL_I 765 (xor:SVE_FULL_I 766 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w") 767 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) 768 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))] 769 "TARGET_SVE2" 770 "@ 771 xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 772 movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3" 773 [(set_attr "movprfx" "*,yes")] 774 ) 775 776 ;; ------------------------------------------------------------------------- 777 ;; ---- [INT] Ternary logic operations 778 ;; ------------------------------------------------------------------------- 779 ;; Includes: 780 ;; - BCAX 781 ;; - BSL 782 ;; - BSL1N 783 ;; - BSL2N 784 ;; - EOR3 785 ;; - NBSL 786 ;; ------------------------------------------------------------------------- 787 788 ;; Unpredicated exclusive OR of AND. 789 (define_expand "@aarch64_sve2_bcax<mode>" 790 [(set (match_operand:SVE_FULL_I 0 "register_operand") 791 (xor:SVE_FULL_I 792 (and:SVE_FULL_I 793 (unspec:SVE_FULL_I 794 [(match_dup 4) 795 (not:SVE_FULL_I 796 (match_operand:SVE_FULL_I 3 "register_operand"))] 797 UNSPEC_PRED_X) 798 (match_operand:SVE_FULL_I 2 "register_operand")) 799 (match_operand:SVE_FULL_I 1 "register_operand")))] 800 "TARGET_SVE2" 801 { 802 operands[4] = CONSTM1_RTX (<VPRED>mode); 803 } 804 ) 805 806 (define_insn_and_rewrite "*aarch64_sve2_bcax<mode>" 807 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 808 (xor:SVE_FULL_I 809 (and:SVE_FULL_I 810 (unspec:SVE_FULL_I 811 [(match_operand 4) 812 (not:SVE_FULL_I 813 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 814 UNSPEC_PRED_X) 815 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) 816 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 817 "TARGET_SVE2" 818 "@ 819 bcax\t%0.d, %0.d, %2.d, %3.d 820 movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d" 821 "&& !CONSTANT_P (operands[4])" 822 { 823 operands[4] = CONSTM1_RTX (<VPRED>mode); 824 } 825 [(set_attr "movprfx" "*,yes")] 826 ) 827 828 ;; Unpredicated 3-way exclusive OR. 829 (define_insn "@aarch64_sve2_eor3<mode>" 830 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") 831 (xor:SVE_FULL_I 832 (xor:SVE_FULL_I 833 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w") 834 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")) 835 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))] 836 "TARGET_SVE2" 837 "@ 838 eor3\t%0.d, %0.d, %2.d, %3.d 839 eor3\t%0.d, %0.d, %1.d, %3.d 840 eor3\t%0.d, %0.d, %1.d, %2.d 841 movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d" 842 [(set_attr "movprfx" "*,*,*,yes")] 843 ) 844 845 ;; Use NBSL for vector NOR. 846 (define_insn_and_rewrite "*aarch64_sve2_nor<mode>" 847 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 848 (unspec:SVE_FULL_I 849 [(match_operand 3) 850 (and:SVE_FULL_I 851 (not:SVE_FULL_I 852 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) 853 (not:SVE_FULL_I 854 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] 855 UNSPEC_PRED_X))] 856 "TARGET_SVE2" 857 "@ 858 nbsl\t%0.d, %0.d, %2.d, %0.d 859 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d" 860 "&& !CONSTANT_P (operands[3])" 861 { 862 operands[3] = CONSTM1_RTX (<VPRED>mode); 863 } 864 [(set_attr "movprfx" "*,yes")] 865 ) 866 867 ;; Use NBSL for vector NAND. 868 (define_insn_and_rewrite "*aarch64_sve2_nand<mode>" 869 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 870 (unspec:SVE_FULL_I 871 [(match_operand 3) 872 (ior:SVE_FULL_I 873 (not:SVE_FULL_I 874 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w")) 875 (not:SVE_FULL_I 876 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")))] 877 UNSPEC_PRED_X))] 878 "TARGET_SVE2" 879 "@ 880 nbsl\t%0.d, %0.d, %2.d, %2.d 881 movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d" 882 "&& !CONSTANT_P (operands[3])" 883 { 884 operands[3] = CONSTM1_RTX (<VPRED>mode); 885 } 886 [(set_attr "movprfx" "*,yes")] 887 ) 888 889 ;; Unpredicated bitwise select. 890 ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) 891 (define_expand "@aarch64_sve2_bsl<mode>" 892 [(set (match_operand:SVE_FULL_I 0 "register_operand") 893 (xor:SVE_FULL_I 894 (and:SVE_FULL_I 895 (xor:SVE_FULL_I 896 (match_operand:SVE_FULL_I 1 "register_operand") 897 (match_operand:SVE_FULL_I 2 "register_operand")) 898 (match_operand:SVE_FULL_I 3 "register_operand")) 899 (match_dup 2)))] 900 "TARGET_SVE2" 901 ) 902 903 (define_insn "*aarch64_sve2_bsl<mode>" 904 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 905 (xor:SVE_FULL_I 906 (and:SVE_FULL_I 907 (xor:SVE_FULL_I 908 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 909 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 910 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 911 (match_dup BSL_DUP)))] 912 "TARGET_SVE2" 913 "@ 914 bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d 915 movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" 916 [(set_attr "movprfx" "*,yes")] 917 ) 918 919 ;; Unpredicated bitwise inverted select. 920 ;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) 921 (define_expand "@aarch64_sve2_nbsl<mode>" 922 [(set (match_operand:SVE_FULL_I 0 "register_operand") 923 (unspec:SVE_FULL_I 924 [(match_dup 4) 925 (not:SVE_FULL_I 926 (xor:SVE_FULL_I 927 (and:SVE_FULL_I 928 (xor:SVE_FULL_I 929 (match_operand:SVE_FULL_I 1 "register_operand") 930 (match_operand:SVE_FULL_I 2 "register_operand")) 931 (match_operand:SVE_FULL_I 3 "register_operand")) 932 (match_dup 2)))] 933 UNSPEC_PRED_X))] 934 "TARGET_SVE2" 935 { 936 operands[4] = CONSTM1_RTX (<VPRED>mode); 937 } 938 ) 939 940 (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>" 941 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 942 (unspec:SVE_FULL_I 943 [(match_operand 4) 944 (not:SVE_FULL_I 945 (xor:SVE_FULL_I 946 (and:SVE_FULL_I 947 (xor:SVE_FULL_I 948 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 949 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 950 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 951 (match_dup BSL_DUP)))] 952 UNSPEC_PRED_X))] 953 "TARGET_SVE2" 954 "@ 955 nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d 956 movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d" 957 "&& !CONSTANT_P (operands[4])" 958 { 959 operands[4] = CONSTM1_RTX (<VPRED>mode); 960 } 961 [(set_attr "movprfx" "*,yes")] 962 ) 963 964 ;; Unpredicated bitwise select with inverted first operand. 965 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) 966 (define_expand "@aarch64_sve2_bsl1n<mode>" 967 [(set (match_operand:SVE_FULL_I 0 "register_operand") 968 (xor:SVE_FULL_I 969 (and:SVE_FULL_I 970 (unspec:SVE_FULL_I 971 [(match_dup 4) 972 (not:SVE_FULL_I 973 (xor:SVE_FULL_I 974 (match_operand:SVE_FULL_I 1 "register_operand") 975 (match_operand:SVE_FULL_I 2 "register_operand")))] 976 UNSPEC_PRED_X) 977 (match_operand:SVE_FULL_I 3 "register_operand")) 978 (match_dup 2)))] 979 "TARGET_SVE2" 980 { 981 operands[4] = CONSTM1_RTX (<VPRED>mode); 982 } 983 ) 984 985 (define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>" 986 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 987 (xor:SVE_FULL_I 988 (and:SVE_FULL_I 989 (unspec:SVE_FULL_I 990 [(match_operand 4) 991 (not:SVE_FULL_I 992 (xor:SVE_FULL_I 993 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 994 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))] 995 UNSPEC_PRED_X) 996 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 997 (match_dup BSL_DUP)))] 998 "TARGET_SVE2" 999 "@ 1000 bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d 1001 movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d" 1002 "&& !CONSTANT_P (operands[4])" 1003 { 1004 operands[4] = CONSTM1_RTX (<VPRED>mode); 1005 } 1006 [(set_attr "movprfx" "*,yes")] 1007 ) 1008 1009 ;; Unpredicated bitwise select with inverted second operand. 1010 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) 1011 (define_expand "@aarch64_sve2_bsl2n<mode>" 1012 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1013 (ior:SVE_FULL_I 1014 (and:SVE_FULL_I 1015 (match_operand:SVE_FULL_I 1 "register_operand") 1016 (match_operand:SVE_FULL_I 3 "register_operand")) 1017 (unspec:SVE_FULL_I 1018 [(match_dup 4) 1019 (and:SVE_FULL_I 1020 (not:SVE_FULL_I 1021 (match_operand:SVE_FULL_I 2 "register_operand")) 1022 (not:SVE_FULL_I 1023 (match_dup 3)))] 1024 UNSPEC_PRED_X)))] 1025 "TARGET_SVE2" 1026 { 1027 operands[4] = CONSTM1_RTX (<VPRED>mode); 1028 } 1029 ) 1030 1031 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" 1032 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1033 (ior:SVE_FULL_I 1034 (and:SVE_FULL_I 1035 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 1036 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 1037 (unspec:SVE_FULL_I 1038 [(match_operand 4) 1039 (and:SVE_FULL_I 1040 (not:SVE_FULL_I 1041 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 1042 (not:SVE_FULL_I 1043 (match_dup BSL_DUP)))] 1044 UNSPEC_PRED_X)))] 1045 "TARGET_SVE2" 1046 "@ 1047 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d 1048 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" 1049 "&& !CONSTANT_P (operands[4])" 1050 { 1051 operands[4] = CONSTM1_RTX (<VPRED>mode); 1052 } 1053 [(set_attr "movprfx" "*,yes")] 1054 ) 1055 1056 ;; Unpredicated bitwise select with inverted second operand, alternative form. 1057 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) 1058 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>" 1059 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1060 (ior:SVE_FULL_I 1061 (and:SVE_FULL_I 1062 (match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w") 1063 (match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")) 1064 (unspec:SVE_FULL_I 1065 [(match_operand 4) 1066 (and:SVE_FULL_I 1067 (not:SVE_FULL_I 1068 (match_dup BSL_DUP)) 1069 (not:SVE_FULL_I 1070 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))] 1071 UNSPEC_PRED_X)))] 1072 "TARGET_SVE2" 1073 "@ 1074 bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d 1075 movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d" 1076 "&& !CONSTANT_P (operands[4])" 1077 { 1078 operands[4] = CONSTM1_RTX (<VPRED>mode); 1079 } 1080 [(set_attr "movprfx" "*,yes")] 1081 ) 1082 1083 ;; ------------------------------------------------------------------------- 1084 ;; ---- [INT] Shift-and-accumulate operations 1085 ;; ------------------------------------------------------------------------- 1086 ;; Includes: 1087 ;; - SRSRA 1088 ;; - SSRA 1089 ;; - URSRA 1090 ;; - USRA 1091 ;; ------------------------------------------------------------------------- 1092 1093 ;; Provide the natural unpredicated interface for SSRA and USRA. 1094 (define_expand "@aarch64_sve_add_<sve_int_op><mode>" 1095 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1096 (plus:SVE_FULL_I 1097 (unspec:SVE_FULL_I 1098 [(match_dup 4) 1099 (SHIFTRT:SVE_FULL_I 1100 (match_operand:SVE_FULL_I 2 "register_operand") 1101 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] 1102 UNSPEC_PRED_X) 1103 (match_operand:SVE_FULL_I 1 "register_operand")))] 1104 "TARGET_SVE2" 1105 { 1106 operands[4] = CONSTM1_RTX (<VPRED>mode); 1107 } 1108 ) 1109 1110 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate 1111 ;; isn't needed. 1112 (define_insn_and_rewrite "*aarch64_sve2_sra<mode>" 1113 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1114 (plus:SVE_FULL_I 1115 (unspec:SVE_FULL_I 1116 [(match_operand 4) 1117 (SHIFTRT:SVE_FULL_I 1118 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1119 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))] 1120 UNSPEC_PRED_X) 1121 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1122 "TARGET_SVE2" 1123 "@ 1124 <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3 1125 movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3" 1126 "&& !CONSTANT_P (operands[4])" 1127 { 1128 operands[4] = CONSTM1_RTX (<VPRED>mode); 1129 } 1130 [(set_attr "movprfx" "*,yes")] 1131 ) 1132 1133 ;; SRSRA and URSRA. 1134 (define_insn "@aarch64_sve_add_<sve_int_op><mode>" 1135 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1136 (plus:SVE_FULL_I 1137 (unspec:SVE_FULL_I 1138 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1139 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] 1140 VRSHR_N) 1141 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1142 "TARGET_SVE2" 1143 "@ 1144 <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3 1145 movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3" 1146 [(set_attr "movprfx" "*,yes")] 1147 ) 1148 1149 ;; ------------------------------------------------------------------------- 1150 ;; ---- [INT] Shift-and-insert operations 1151 ;; ------------------------------------------------------------------------- 1152 ;; Includes: 1153 ;; - SLI 1154 ;; - SRI 1155 ;; ------------------------------------------------------------------------- 1156 1157 ;; These instructions do not take MOVPRFX. 1158 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1159 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 1160 (unspec:SVE_FULL_I 1161 [(match_operand:SVE_FULL_I 1 "register_operand" "0") 1162 (match_operand:SVE_FULL_I 2 "register_operand" "w") 1163 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")] 1164 SVE2_INT_SHIFT_INSERT))] 1165 "TARGET_SVE2" 1166 "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3" 1167 ) 1168 1169 ;; ------------------------------------------------------------------------- 1170 ;; ---- [INT] Sum of absolute differences 1171 ;; ------------------------------------------------------------------------- 1172 ;; Includes: 1173 ;; - SABA 1174 ;; - UABA 1175 ;; ------------------------------------------------------------------------- 1176 1177 ;; Provide the natural unpredicated interface for SABA and UABA. 1178 (define_expand "@aarch64_sve2_<su>aba<mode>" 1179 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1180 (plus:SVE_FULL_I 1181 (minus:SVE_FULL_I 1182 (unspec:SVE_FULL_I 1183 [(match_dup 4) 1184 (USMAX:SVE_FULL_I 1185 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1186 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 1187 UNSPEC_PRED_X) 1188 (unspec:SVE_FULL_I 1189 [(match_dup 4) 1190 (<max_opp>:SVE_FULL_I 1191 (match_dup 2) 1192 (match_dup 3))] 1193 UNSPEC_PRED_X)) 1194 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1195 "TARGET_SVE2" 1196 { 1197 operands[4] = CONSTM1_RTX (<VPRED>mode); 1198 } 1199 ) 1200 1201 ;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate 1202 ;; operation whose predicates aren't needed. 1203 (define_insn "*aarch64_sve2_<su>aba<mode>" 1204 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1205 (plus:SVE_FULL_I 1206 (minus:SVE_FULL_I 1207 (unspec:SVE_FULL_I 1208 [(match_operand 4) 1209 (USMAX:SVE_FULL_I 1210 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1211 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 1212 UNSPEC_PRED_X) 1213 (unspec:SVE_FULL_I 1214 [(match_operand 5) 1215 (<max_opp>:SVE_FULL_I 1216 (match_dup 2) 1217 (match_dup 3))] 1218 UNSPEC_PRED_X)) 1219 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] 1220 "TARGET_SVE2" 1221 "@ 1222 <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> 1223 movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 1224 [(set_attr "movprfx" "*,yes")] 1225 ) 1226 1227 ;; ========================================================================= 1228 ;; == Extending arithmetic 1229 ;; ========================================================================= 1230 1231 ;; ------------------------------------------------------------------------- 1232 ;; ---- [INT] Wide binary arithmetic 1233 ;; ------------------------------------------------------------------------- 1234 ;; Includes: 1235 ;; - SADDWB 1236 ;; - SADDWT 1237 ;; - SSUBWB 1238 ;; - SSUBWT 1239 ;; - UADDWB 1240 ;; - UADDWT 1241 ;; - USUBWB 1242 ;; - USUBWT 1243 ;; ------------------------------------------------------------------------- 1244 1245 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1246 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 1247 (unspec:SVE_FULL_HSDI 1248 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 1249 (match_operand:<VNARROW> 2 "register_operand" "w")] 1250 SVE2_INT_BINARY_WIDE))] 1251 "TARGET_SVE2" 1252 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>" 1253 ) 1254 1255 ;; ------------------------------------------------------------------------- 1256 ;; ---- [INT] Long binary arithmetic 1257 ;; ------------------------------------------------------------------------- 1258 ;; Includes: 1259 ;; - SABDLB 1260 ;; - SABDLT 1261 ;; - SADDLB 1262 ;; - SADDLBT 1263 ;; - SADDLT 1264 ;; - SMULLB 1265 ;; - SMULLT 1266 ;; - SQDMULLB 1267 ;; - SQDMULLT 1268 ;; - SSUBLB 1269 ;; - SSUBLBT 1270 ;; - SSUBLT 1271 ;; - SSUBLTB 1272 ;; - UABDLB 1273 ;; - UABDLT 1274 ;; - UADDLB 1275 ;; - UADDLT 1276 ;; - UMULLB 1277 ;; - UMULLT 1278 ;; - USUBLB 1279 ;; - USUBLT 1280 ;; ------------------------------------------------------------------------- 1281 1282 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1283 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 1284 (unspec:SVE_FULL_HSDI 1285 [(match_operand:<VNARROW> 1 "register_operand" "w") 1286 (match_operand:<VNARROW> 2 "register_operand" "w")] 1287 SVE2_INT_BINARY_LONG))] 1288 "TARGET_SVE2" 1289 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" 1290 ) 1291 1292 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>" 1293 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 1294 (unspec:SVE_FULL_SDI 1295 [(match_operand:<VNARROW> 1 "register_operand" "w") 1296 (unspec:<VNARROW> 1297 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>") 1298 (match_operand:SI 3 "const_int_operand")] 1299 UNSPEC_SVE_LANE_SELECT)] 1300 SVE2_INT_BINARY_LONG_LANE))] 1301 "TARGET_SVE2" 1302 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]" 1303 ) 1304 1305 ;; ------------------------------------------------------------------------- 1306 ;; ---- [INT] Long left shifts 1307 ;; ------------------------------------------------------------------------- 1308 ;; Includes: 1309 ;; - SSHLLB 1310 ;; - SSHLLT 1311 ;; - USHLLB 1312 ;; - USHLLT 1313 ;; ------------------------------------------------------------------------- 1314 1315 ;; The immediate range is enforced before generating the instruction. 1316 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1317 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") 1318 (unspec:SVE_FULL_HSDI 1319 [(match_operand:<VNARROW> 1 "register_operand" "w") 1320 (match_operand:DI 2 "const_int_operand")] 1321 SVE2_INT_SHIFT_IMM_LONG))] 1322 "TARGET_SVE2" 1323 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2" 1324 ) 1325 1326 ;; ------------------------------------------------------------------------- 1327 ;; ---- [INT] Long binary arithmetic with accumulation 1328 ;; ------------------------------------------------------------------------- 1329 ;; Includes: 1330 ;; - SABALB 1331 ;; - SABALT 1332 ;; - SMLALB 1333 ;; - SMLALT 1334 ;; - SMLSLB 1335 ;; - SMLSLT 1336 ;; - SQDMLALB 1337 ;; - SQDMLALBT 1338 ;; - SQDMLALT 1339 ;; - SQDMLSLB 1340 ;; - SQDMLSLBT 1341 ;; - SQDMLSLT 1342 ;; - UABALB 1343 ;; - UABALT 1344 ;; - UMLALB 1345 ;; - UMLALT 1346 ;; - UMLSLB 1347 ;; - UMLSLT 1348 ;; ------------------------------------------------------------------------- 1349 1350 ;; Non-saturating MLA operations. 1351 (define_insn "@aarch64_sve_add_<sve_int_op><mode>" 1352 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1353 (plus:SVE_FULL_HSDI 1354 (unspec:SVE_FULL_HSDI 1355 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1356 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1357 SVE2_INT_ADD_BINARY_LONG) 1358 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] 1359 "TARGET_SVE2" 1360 "@ 1361 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1362 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1363 [(set_attr "movprfx" "*,yes")] 1364 ) 1365 1366 ;; Non-saturating MLA operations with lane select. 1367 (define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>" 1368 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1369 (plus:SVE_FULL_SDI 1370 (unspec:SVE_FULL_SDI 1371 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1372 (unspec:<VNARROW> 1373 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1374 (match_operand:SI 4 "const_int_operand")] 1375 UNSPEC_SVE_LANE_SELECT)] 1376 SVE2_INT_ADD_BINARY_LONG_LANE) 1377 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] 1378 "TARGET_SVE2" 1379 "@ 1380 <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1381 movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1382 [(set_attr "movprfx" "*,yes")] 1383 ) 1384 1385 ;; Saturating MLA operations. 1386 (define_insn "@aarch64_sve_qadd_<sve_int_op><mode>" 1387 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1388 (ss_plus:SVE_FULL_HSDI 1389 (unspec:SVE_FULL_HSDI 1390 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1391 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1392 SVE2_INT_QADD_BINARY_LONG) 1393 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))] 1394 "TARGET_SVE2" 1395 "@ 1396 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1397 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1398 [(set_attr "movprfx" "*,yes")] 1399 ) 1400 1401 ;; Saturating MLA operations with lane select. 1402 (define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>" 1403 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1404 (ss_plus:SVE_FULL_SDI 1405 (unspec:SVE_FULL_SDI 1406 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1407 (unspec:<VNARROW> 1408 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1409 (match_operand:SI 4 "const_int_operand")] 1410 UNSPEC_SVE_LANE_SELECT)] 1411 SVE2_INT_QADD_BINARY_LONG_LANE) 1412 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))] 1413 "TARGET_SVE2" 1414 "@ 1415 <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1416 movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1417 [(set_attr "movprfx" "*,yes")] 1418 ) 1419 1420 ;; Non-saturating MLS operations. 1421 (define_insn "@aarch64_sve_sub_<sve_int_op><mode>" 1422 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1423 (minus:SVE_FULL_HSDI 1424 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") 1425 (unspec:SVE_FULL_HSDI 1426 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1427 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1428 SVE2_INT_SUB_BINARY_LONG)))] 1429 "TARGET_SVE2" 1430 "@ 1431 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1432 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1433 [(set_attr "movprfx" "*,yes")] 1434 ) 1435 1436 ;; Non-saturating MLS operations with lane select. 1437 (define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>" 1438 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1439 (minus:SVE_FULL_SDI 1440 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1441 (unspec:SVE_FULL_SDI 1442 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1443 (unspec:<VNARROW> 1444 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1445 (match_operand:SI 4 "const_int_operand")] 1446 UNSPEC_SVE_LANE_SELECT)] 1447 SVE2_INT_SUB_BINARY_LONG_LANE)))] 1448 "TARGET_SVE2" 1449 "@ 1450 <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1451 movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1452 [(set_attr "movprfx" "*,yes")] 1453 ) 1454 1455 ;; Saturating MLS operations. 1456 (define_insn "@aarch64_sve_qsub_<sve_int_op><mode>" 1457 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1458 (ss_minus:SVE_FULL_HSDI 1459 (match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w") 1460 (unspec:SVE_FULL_HSDI 1461 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1462 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1463 SVE2_INT_QSUB_BINARY_LONG)))] 1464 "TARGET_SVE2" 1465 "@ 1466 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype> 1467 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>" 1468 [(set_attr "movprfx" "*,yes")] 1469 ) 1470 1471 ;; Saturating MLS operations with lane select. 1472 (define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>" 1473 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1474 (ss_minus:SVE_FULL_SDI 1475 (match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1476 (unspec:SVE_FULL_SDI 1477 [(match_operand:<VNARROW> 2 "register_operand" "w, w") 1478 (unspec:<VNARROW> 1479 [(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1480 (match_operand:SI 4 "const_int_operand")] 1481 UNSPEC_SVE_LANE_SELECT)] 1482 SVE2_INT_QSUB_BINARY_LONG_LANE)))] 1483 "TARGET_SVE2" 1484 "@ 1485 <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4] 1486 movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]" 1487 [(set_attr "movprfx" "*,yes")] 1488 ) 1489 ;; ------------------------------------------------------------------------- 1490 ;; ---- [FP] Long multiplication with accumulation 1491 ;; ------------------------------------------------------------------------- 1492 ;; Includes: 1493 ;; - FMLALB 1494 ;; - FMLALT 1495 ;; - FMLSLB 1496 ;; - FMLSLT 1497 ;; ------------------------------------------------------------------------- 1498 1499 (define_insn "@aarch64_sve_<sve_fp_op><mode>" 1500 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") 1501 (unspec:VNx4SF_ONLY 1502 [(match_operand:<VNARROW> 1 "register_operand" "w, w") 1503 (match_operand:<VNARROW> 2 "register_operand" "w, w") 1504 (match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")] 1505 SVE2_FP_TERNARY_LONG))] 1506 "TARGET_SVE2" 1507 "@ 1508 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype> 1509 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" 1510 [(set_attr "movprfx" "*,yes")] 1511 ) 1512 1513 (define_insn "@aarch64_<sve_fp_op>_lane_<mode>" 1514 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") 1515 (unspec:VNx4SF_ONLY 1516 [(match_operand:<VNARROW> 1 "register_operand" "w, w") 1517 (unspec:<VNARROW> 1518 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1519 (match_operand:SI 3 "const_int_operand")] 1520 UNSPEC_SVE_LANE_SELECT) 1521 (match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")] 1522 SVE2_FP_TERNARY_LONG_LANE))] 1523 "TARGET_SVE2" 1524 "@ 1525 <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3] 1526 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]" 1527 [(set_attr "movprfx" "*,yes")] 1528 ) 1529 1530 ;; ========================================================================= 1531 ;; == Narrowing arithnetic 1532 ;; ========================================================================= 1533 1534 ;; ------------------------------------------------------------------------- 1535 ;; ---- [INT] Narrowing unary arithmetic 1536 ;; ------------------------------------------------------------------------- 1537 ;; Includes: 1538 ;; - SQXTNB 1539 ;; - SQXTNT 1540 ;; - SQXTUNB 1541 ;; - SQXTUNT 1542 ;; - UQXTNB 1543 ;; - UQXTNT 1544 ;; ------------------------------------------------------------------------- 1545 1546 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1547 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1548 (unspec:<VNARROW> 1549 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")] 1550 SVE2_INT_UNARY_NARROWB))] 1551 "TARGET_SVE2" 1552 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>" 1553 ) 1554 1555 ;; These instructions do not take MOVPRFX. 1556 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1557 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1558 (unspec:<VNARROW> 1559 [(match_operand:<VNARROW> 1 "register_operand" "0") 1560 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] 1561 SVE2_INT_UNARY_NARROWT))] 1562 "TARGET_SVE2" 1563 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>" 1564 ) 1565 1566 ;; ------------------------------------------------------------------------- 1567 ;; ---- [INT] Narrowing binary arithmetic 1568 ;; ------------------------------------------------------------------------- 1569 ;; Includes: 1570 ;; - ADDHNB 1571 ;; - ADDHNT 1572 ;; - RADDHNB 1573 ;; - RADDHNT 1574 ;; - RSUBHNB 1575 ;; - RSUBHNT 1576 ;; - SUBHNB 1577 ;; - SUBHNT 1578 ;; ------------------------------------------------------------------------- 1579 1580 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1581 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1582 (unspec:<VNARROW> 1583 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 1584 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] 1585 SVE2_INT_BINARY_NARROWB))] 1586 "TARGET_SVE2" 1587 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>" 1588 ) 1589 1590 ;; These instructions do not take MOVPRFX. 1591 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1592 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1593 (unspec:<VNARROW> 1594 [(match_operand:<VNARROW> 1 "register_operand" "0") 1595 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") 1596 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")] 1597 SVE2_INT_BINARY_NARROWT))] 1598 "TARGET_SVE2" 1599 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>" 1600 ) 1601 1602 ;; ------------------------------------------------------------------------- 1603 ;; ---- [INT] Narrowing right shifts 1604 ;; ------------------------------------------------------------------------- 1605 ;; Includes: 1606 ;; - RSHRNB 1607 ;; - RSHRNT 1608 ;; - SHRNB 1609 ;; - SHRNT 1610 ;; - SQRSHRNB 1611 ;; - SQRSHRNT 1612 ;; - SQRSHRUNB 1613 ;; - SQRSHRUNT 1614 ;; - SQSHRNB 1615 ;; - SQSHRNT 1616 ;; - SQSHRUNB 1617 ;; - SQSHRUNT 1618 ;; - UQRSHRNB 1619 ;; - UQRSHRNT 1620 ;; - UQSHRNB 1621 ;; - UQSHRNT 1622 ;; ------------------------------------------------------------------------- 1623 1624 ;; The immediate range is enforced before generating the instruction. 1625 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1626 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1627 (unspec:<VNARROW> 1628 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") 1629 (match_operand:DI 2 "const_int_operand")] 1630 SVE2_INT_SHIFT_IMM_NARROWB))] 1631 "TARGET_SVE2" 1632 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2" 1633 ) 1634 1635 ;; The immediate range is enforced before generating the instruction. 1636 ;; These instructions do not take MOVPRFX. 1637 (define_insn "@aarch64_sve_<sve_int_op><mode>" 1638 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 1639 (unspec:<VNARROW> 1640 [(match_operand:<VNARROW> 1 "register_operand" "0") 1641 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w") 1642 (match_operand:DI 3 "const_int_operand")] 1643 SVE2_INT_SHIFT_IMM_NARROWT))] 1644 "TARGET_SVE2" 1645 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3" 1646 ) 1647 1648 ;; ========================================================================= 1649 ;; == Pairwise arithmetic 1650 ;; ========================================================================= 1651 1652 ;; ------------------------------------------------------------------------- 1653 ;; ---- [INT] Pairwise arithmetic 1654 ;; ------------------------------------------------------------------------- 1655 ;; Includes: 1656 ;; - ADDP 1657 ;; - SMAXP 1658 ;; - SMINP 1659 ;; - UMAXP 1660 ;; - UMINP 1661 ;; ------------------------------------------------------------------------- 1662 1663 (define_insn "@aarch64_pred_<sve_int_op><mode>" 1664 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1665 (unspec:SVE_FULL_I 1666 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1667 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 1668 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 1669 SVE2_INT_BINARY_PAIR))] 1670 "TARGET_SVE2" 1671 "@ 1672 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 1673 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1674 [(set_attr "movprfx" "*,yes")] 1675 ) 1676 1677 ;; ------------------------------------------------------------------------- 1678 ;; ---- [FP] Pairwise arithmetic 1679 ;; ------------------------------------------------------------------------- 1680 ;; Includes: 1681 ;; - FADDP 1682 ;; - FMAXP 1683 ;; - FMAXNMP 1684 ;; - FMINP 1685 ;; - FMINNMP 1686 ;; ------------------------------------------------------------------------- 1687 1688 (define_insn "@aarch64_pred_<sve_fp_op><mode>" 1689 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 1690 (unspec:SVE_FULL_F 1691 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1692 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 1693 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 1694 SVE2_FP_BINARY_PAIR))] 1695 "TARGET_SVE2" 1696 "@ 1697 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 1698 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 1699 [(set_attr "movprfx" "*,yes")] 1700 ) 1701 1702 ;; ------------------------------------------------------------------------- 1703 ;; ---- [INT] Pairwise arithmetic with accumulation 1704 ;; ------------------------------------------------------------------------- 1705 ;; Includes: 1706 ;; - SADALP 1707 ;; - UADALP 1708 ;; ------------------------------------------------------------------------- 1709 1710 ;; Predicated pairwise absolute difference and accumulate with merging. 1711 (define_expand "@cond_<sve_int_op><mode>" 1712 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") 1713 (unspec:SVE_FULL_HSDI 1714 [(match_operand:<VPRED> 1 "register_operand") 1715 (unspec:SVE_FULL_HSDI 1716 [(match_dup 1) 1717 (match_operand:SVE_FULL_HSDI 2 "register_operand") 1718 (match_operand:<VNARROW> 3 "register_operand")] 1719 SVE2_INT_BINARY_PAIR_LONG) 1720 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")] 1721 UNSPEC_SEL))] 1722 "TARGET_SVE2" 1723 { 1724 /* Only target code is aware of these operations, so we don't need 1725 to handle the fully-general case. */ 1726 gcc_assert (rtx_equal_p (operands[2], operands[4]) 1727 || CONSTANT_P (operands[4])); 1728 }) 1729 1730 ;; Predicated pairwise absolute difference and accumulate, merging with 1731 ;; the first input. 1732 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 1733 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 1734 (unspec:SVE_FULL_HSDI 1735 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1736 (unspec:SVE_FULL_HSDI 1737 [(match_operand 4) 1738 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") 1739 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1740 SVE2_INT_BINARY_PAIR_LONG) 1741 (match_dup 2)] 1742 UNSPEC_SEL))] 1743 "TARGET_SVE2" 1744 "@ 1745 <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype> 1746 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>" 1747 "&& !CONSTANT_P (operands[4])" 1748 { 1749 operands[4] = CONSTM1_RTX (<VPRED>mode); 1750 } 1751 [(set_attr "movprfx" "*,yes")] 1752 ) 1753 1754 ;; Predicated pairwise absolute difference and accumulate, merging with zero. 1755 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z" 1756 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w") 1757 (unspec:SVE_FULL_HSDI 1758 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 1759 (unspec:SVE_FULL_HSDI 1760 [(match_operand 5) 1761 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w") 1762 (match_operand:<VNARROW> 3 "register_operand" "w, w")] 1763 SVE2_INT_BINARY_PAIR_LONG) 1764 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")] 1765 UNSPEC_SEL))] 1766 "TARGET_SVE2" 1767 "@ 1768 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype> 1769 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>" 1770 "&& !CONSTANT_P (operands[5])" 1771 { 1772 operands[5] = CONSTM1_RTX (<VPRED>mode); 1773 } 1774 [(set_attr "movprfx" "yes")] 1775 ) 1776 1777 ;; ========================================================================= 1778 ;; == Complex arithmetic 1779 ;; ========================================================================= 1780 1781 ;; ------------------------------------------------------------------------- 1782 ;; ---- [INT] Complex binary operations 1783 ;; ------------------------------------------------------------------------- 1784 ;; Includes: 1785 ;; - CADD 1786 ;; - SQCADD 1787 ;; ------------------------------------------------------------------------- 1788 1789 (define_insn "@aarch64_sve_<optab><mode>" 1790 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1791 (unspec:SVE_FULL_I 1792 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") 1793 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")] 1794 SVE2_INT_CADD))] 1795 "TARGET_SVE2" 1796 "@ 1797 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot> 1798 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>" 1799 [(set_attr "movprfx" "*,yes")] 1800 ) 1801 1802 ;; unpredicated optab pattern for auto-vectorizer 1803 (define_expand "cadd<rot><mode>3" 1804 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1805 (unspec:SVE_FULL_I 1806 [(match_operand:SVE_FULL_I 1 "register_operand") 1807 (match_operand:SVE_FULL_I 2 "register_operand")] 1808 SVE2_INT_CADD_OP))] 1809 "TARGET_SVE2" 1810 ) 1811 1812 ;; ------------------------------------------------------------------------- 1813 ;; ---- [INT] Complex ternary operations 1814 ;; ------------------------------------------------------------------------- 1815 ;; Includes: 1816 ;; - CMLA 1817 ;; - SQRDCMLA 1818 ;; ------------------------------------------------------------------------- 1819 1820 (define_insn "@aarch64_sve_<optab><mode>" 1821 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 1822 (unspec:SVE_FULL_I 1823 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w") 1824 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 1825 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 1826 SVE2_INT_CMLA))] 1827 "TARGET_SVE2" 1828 "@ 1829 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot> 1830 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>" 1831 [(set_attr "movprfx" "*,yes")] 1832 ) 1833 1834 (define_insn "@aarch64_<optab>_lane_<mode>" 1835 [(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w") 1836 (unspec:SVE_FULL_HSI 1837 [(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w") 1838 (match_operand:SVE_FULL_HSI 2 "register_operand" "w, w") 1839 (unspec:SVE_FULL_HSI 1840 [(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1841 (match_operand:SI 4 "const_int_operand")] 1842 UNSPEC_SVE_LANE_SELECT)] 1843 SVE2_INT_CMLA))] 1844 "TARGET_SVE2" 1845 "@ 1846 <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot> 1847 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>" 1848 [(set_attr "movprfx" "*,yes")] 1849 ) 1850 1851 ;; unpredicated optab pattern for auto-vectorizer 1852 ;; The complex mla/mls operations always need to expand to two instructions. 1853 ;; The first operation does half the computation and the second does the 1854 ;; remainder. Because of this, expand early. 1855 (define_expand "cml<fcmac1><conj_op><mode>4" 1856 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1857 (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand") 1858 (unspec:SVE_FULL_I 1859 [(match_operand:SVE_FULL_I 2 "register_operand") 1860 (match_operand:SVE_FULL_I 3 "register_operand")] 1861 SVE2_INT_CMLA_OP)))] 1862 "TARGET_SVE2" 1863 { 1864 rtx tmp = gen_reg_rtx (<MODE>mode); 1865 emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, operands[1], 1866 operands[3], operands[2])); 1867 emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp, 1868 operands[3], operands[2])); 1869 DONE; 1870 }) 1871 1872 ;; unpredicated optab pattern for auto-vectorizer 1873 ;; The complex mul operations always need to expand to two instructions. 1874 ;; The first operation does half the computation and the second does the 1875 ;; remainder. Because of this, expand early. 1876 (define_expand "cmul<conj_op><mode>3" 1877 [(set (match_operand:SVE_FULL_I 0 "register_operand") 1878 (unspec:SVE_FULL_I 1879 [(match_operand:SVE_FULL_I 1 "register_operand") 1880 (match_operand:SVE_FULL_I 2 "register_operand")] 1881 SVE2_INT_CMUL_OP))] 1882 "TARGET_SVE2" 1883 { 1884 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); 1885 rtx tmp = gen_reg_rtx (<MODE>mode); 1886 emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, accum, 1887 operands[2], operands[1])); 1888 emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp, 1889 operands[2], operands[1])); 1890 DONE; 1891 }) 1892 1893 ;; ------------------------------------------------------------------------- 1894 ;; ---- [INT] Complex dot product 1895 ;; ------------------------------------------------------------------------- 1896 ;; Includes: 1897 ;; - CDOT 1898 ;; ------------------------------------------------------------------------- 1899 1900 (define_insn "@aarch64_sve_<optab><mode>" 1901 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1902 (unspec:SVE_FULL_SDI 1903 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1904 (match_operand:<VSI2QI> 2 "register_operand" "w, w") 1905 (match_operand:<VSI2QI> 3 "register_operand" "w, w")] 1906 SVE2_INT_CDOT))] 1907 "TARGET_SVE2" 1908 "@ 1909 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot> 1910 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>" 1911 [(set_attr "movprfx" "*,yes")] 1912 ) 1913 1914 (define_insn "@aarch64_<optab>_lane_<mode>" 1915 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 1916 (unspec:SVE_FULL_SDI 1917 [(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w") 1918 (match_operand:<VSI2QI> 2 "register_operand" "w, w") 1919 (unspec:<VSI2QI> 1920 [(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>") 1921 (match_operand:SI 4 "const_int_operand")] 1922 UNSPEC_SVE_LANE_SELECT)] 1923 SVE2_INT_CDOT))] 1924 "TARGET_SVE2" 1925 "@ 1926 <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot> 1927 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>" 1928 [(set_attr "movprfx" "*,yes")] 1929 ) 1930 1931 ;; ========================================================================= 1932 ;; == Conversions 1933 ;; ========================================================================= 1934 1935 ;; ------------------------------------------------------------------------- 1936 ;; ---- [FP<-FP] Widening conversions 1937 ;; ------------------------------------------------------------------------- 1938 ;; Includes: 1939 ;; - FCVTLT 1940 ;; ------------------------------------------------------------------------- 1941 1942 ;; Predicated convert long top. 1943 (define_insn "@aarch64_pred_<sve_fp_op><mode>" 1944 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") 1945 (unspec:SVE_FULL_SDF 1946 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1947 (match_operand:SI 3 "aarch64_sve_gp_strictness") 1948 (match_operand:<VNARROW> 2 "register_operand" "0")] 1949 SVE2_COND_FP_UNARY_LONG))] 1950 "TARGET_SVE2" 1951 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>" 1952 ) 1953 1954 ;; Predicated convert long top with merging. 1955 (define_expand "@cond_<sve_fp_op><mode>" 1956 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 1957 (unspec:SVE_FULL_SDF 1958 [(match_operand:<VPRED> 1 "register_operand") 1959 (unspec:SVE_FULL_SDF 1960 [(match_dup 1) 1961 (const_int SVE_STRICT_GP) 1962 (match_operand:<VNARROW> 2 "register_operand")] 1963 SVE2_COND_FP_UNARY_LONG) 1964 (match_operand:SVE_FULL_SDF 3 "register_operand")] 1965 UNSPEC_SEL))] 1966 "TARGET_SVE2" 1967 ) 1968 1969 ;; These instructions do not take MOVPRFX. 1970 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed" 1971 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") 1972 (unspec:SVE_FULL_SDF 1973 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1974 (unspec:SVE_FULL_SDF 1975 [(match_operand 4) 1976 (const_int SVE_RELAXED_GP) 1977 (match_operand:<VNARROW> 2 "register_operand" "w")] 1978 SVE2_COND_FP_UNARY_LONG) 1979 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] 1980 UNSPEC_SEL))] 1981 "TARGET_SVE2" 1982 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" 1983 "&& !rtx_equal_p (operands[1], operands[4])" 1984 { 1985 operands[4] = copy_rtx (operands[1]); 1986 } 1987 ) 1988 1989 (define_insn "*cond_<sve_fp_op><mode>_strict" 1990 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") 1991 (unspec:SVE_FULL_SDF 1992 [(match_operand:<VPRED> 1 "register_operand" "Upl") 1993 (unspec:SVE_FULL_SDF 1994 [(match_dup 1) 1995 (const_int SVE_STRICT_GP) 1996 (match_operand:<VNARROW> 2 "register_operand" "w")] 1997 SVE2_COND_FP_UNARY_LONG) 1998 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] 1999 UNSPEC_SEL))] 2000 "TARGET_SVE2" 2001 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" 2002 ) 2003 2004 ;; ------------------------------------------------------------------------- 2005 ;; ---- [FP<-FP] Narrowing conversions 2006 ;; ------------------------------------------------------------------------- 2007 ;; Includes: 2008 ;; - FCVTNT 2009 ;; - FCVTX 2010 ;; - FCVTXNT 2011 ;; ------------------------------------------------------------------------- 2012 2013 ;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_* 2014 ;; pair because the even elements always have to be supplied for active 2015 ;; elements, even if the inactive elements don't matter. 2016 ;; 2017 ;; These instructions do not take MOVPRFX. 2018 (define_insn "@aarch64_sve_cvtnt<mode>" 2019 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") 2020 (unspec:SVE_FULL_HSF 2021 [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl") 2022 (const_int SVE_STRICT_GP) 2023 (match_operand:SVE_FULL_HSF 1 "register_operand" "0") 2024 (match_operand:<VWIDE> 3 "register_operand" "w")] 2025 UNSPEC_COND_FCVTNT))] 2026 "TARGET_SVE2" 2027 "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>" 2028 ) 2029 2030 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that 2031 ;; it supports MOVPRFX). 2032 (define_insn "@aarch64_pred_<sve_fp_op><mode>" 2033 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w") 2034 (unspec:VNx4SF_ONLY 2035 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl") 2036 (match_operand:SI 3 "aarch64_sve_gp_strictness") 2037 (match_operand:<VWIDE> 2 "register_operand" "0, w")] 2038 SVE2_COND_FP_UNARY_NARROWB))] 2039 "TARGET_SVE2" 2040 "@ 2041 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2042 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2043 [(set_attr "movprfx" "*,yes")] 2044 ) 2045 2046 ;; Predicated FCVTX with merging. 2047 (define_expand "@cond_<sve_fp_op><mode>" 2048 [(set (match_operand:VNx4SF_ONLY 0 "register_operand") 2049 (unspec:VNx4SF_ONLY 2050 [(match_operand:<VWIDE_PRED> 1 "register_operand") 2051 (unspec:VNx4SF_ONLY 2052 [(match_dup 1) 2053 (const_int SVE_STRICT_GP) 2054 (match_operand:<VWIDE> 2 "register_operand")] 2055 SVE2_COND_FP_UNARY_NARROWB) 2056 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")] 2057 UNSPEC_SEL))] 2058 "TARGET_SVE2" 2059 ) 2060 2061 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed" 2062 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") 2063 (unspec:VNx4SF_ONLY 2064 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl") 2065 (unspec:VNx4SF_ONLY 2066 [(match_operand 4) 2067 (const_int SVE_RELAXED_GP) 2068 (match_operand:<VWIDE> 2 "register_operand" "w, w, w")] 2069 SVE2_COND_FP_UNARY_NARROWB) 2070 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2071 UNSPEC_SEL))] 2072 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2073 "@ 2074 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2075 movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2076 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2077 "&& !rtx_equal_p (operands[1], operands[4])" 2078 { 2079 operands[4] = copy_rtx (operands[1]); 2080 } 2081 [(set_attr "movprfx" "*,yes,yes")] 2082 ) 2083 2084 (define_insn "*cond_<sve_fp_op><mode>_any_strict" 2085 [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") 2086 (unspec:VNx4SF_ONLY 2087 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl") 2088 (unspec:VNx4SF_ONLY 2089 [(match_dup 1) 2090 (const_int SVE_STRICT_GP) 2091 (match_operand:<VWIDE> 2 "register_operand" "w, w, w")] 2092 SVE2_COND_FP_UNARY_NARROWB) 2093 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2094 UNSPEC_SEL))] 2095 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2096 "@ 2097 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2098 movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype> 2099 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>" 2100 [(set_attr "movprfx" "*,yes,yes")] 2101 ) 2102 2103 ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_* 2104 ;; pair because the even elements always have to be supplied for active 2105 ;; elements, even if the inactive elements don't matter. 2106 ;; 2107 ;; These instructions do not take MOVPRFX. 2108 (define_insn "@aarch64_sve2_cvtxnt<mode>" 2109 [(set (match_operand:<VNARROW> 0 "register_operand" "=w") 2110 (unspec:<VNARROW> 2111 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2112 (const_int SVE_STRICT_GP) 2113 (match_operand:<VNARROW> 1 "register_operand" "0") 2114 (match_operand:VNx2DF_ONLY 3 "register_operand" "w")] 2115 UNSPEC_COND_FCVTXNT))] 2116 "TARGET_SVE2" 2117 "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>" 2118 ) 2119 2120 ;; ========================================================================= 2121 ;; == Other arithmetic 2122 ;; ========================================================================= 2123 2124 ;; ------------------------------------------------------------------------- 2125 ;; ---- [INT] Reciprocal approximation 2126 ;; ------------------------------------------------------------------------- 2127 ;; Includes: 2128 ;; - URECPE 2129 ;; - URSQRTE 2130 ;; ------------------------------------------------------------------------- 2131 2132 ;; Predicated integer unary operations. 2133 (define_insn "@aarch64_pred_<sve_int_op><mode>" 2134 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 2135 (unspec:VNx4SI_ONLY 2136 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2137 (unspec:VNx4SI_ONLY 2138 [(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")] 2139 SVE2_U32_UNARY)] 2140 UNSPEC_PRED_X))] 2141 "TARGET_SVE2" 2142 "@ 2143 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2144 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2145 [(set_attr "movprfx" "*,yes")] 2146 ) 2147 2148 ;; Predicated integer unary operations with merging. 2149 (define_expand "@cond_<sve_int_op><mode>" 2150 [(set (match_operand:VNx4SI_ONLY 0 "register_operand") 2151 (unspec:VNx4SI_ONLY 2152 [(match_operand:<VPRED> 1 "register_operand") 2153 (unspec:VNx4SI_ONLY 2154 [(match_dup 4) 2155 (unspec:VNx4SI_ONLY 2156 [(match_operand:VNx4SI_ONLY 2 "register_operand")] 2157 SVE2_U32_UNARY)] 2158 UNSPEC_PRED_X) 2159 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] 2160 UNSPEC_SEL))] 2161 "TARGET_SVE2" 2162 { 2163 operands[4] = CONSTM1_RTX (<MODE>mode); 2164 } 2165 ) 2166 2167 (define_insn_and_rewrite "*cond_<sve_int_op><mode>" 2168 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w") 2169 (unspec:VNx4SI_ONLY 2170 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2171 (unspec:VNx4SI_ONLY 2172 [(match_operand 4) 2173 (unspec:VNx4SI_ONLY 2174 [(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] 2175 SVE2_U32_UNARY)] 2176 UNSPEC_PRED_X) 2177 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2178 UNSPEC_SEL))] 2179 "TARGET_SVE2" 2180 "@ 2181 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2182 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2183 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2184 "&& !CONSTANT_P (operands[4])" 2185 { 2186 operands[4] = CONSTM1_RTX (<VPRED>mode); 2187 } 2188 [(set_attr "movprfx" "*,yes,yes")] 2189 ) 2190 2191 ;; ------------------------------------------------------------------------- 2192 ;; ---- [INT<-FP] Base-2 logarithm 2193 ;; ------------------------------------------------------------------------- 2194 ;; Includes: 2195 ;; - FLOGB 2196 ;; ------------------------------------------------------------------------- 2197 2198 ;; Predicated FLOGB. 2199 (define_insn "@aarch64_pred_<sve_fp_op><mode>" 2200 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w, ?&w") 2201 (unspec:<V_INT_EQUIV> 2202 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2203 (match_operand:SI 3 "aarch64_sve_gp_strictness") 2204 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 2205 SVE2_COND_INT_UNARY_FP))] 2206 "TARGET_SVE2" 2207 "@ 2208 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2209 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2210 [(set_attr "movprfx" "*,yes")] 2211 ) 2212 2213 ;; Predicated FLOGB with merging. 2214 (define_expand "@cond_<sve_fp_op><mode>" 2215 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 2216 (unspec:<V_INT_EQUIV> 2217 [(match_operand:<VPRED> 1 "register_operand") 2218 (unspec:<V_INT_EQUIV> 2219 [(match_dup 1) 2220 (const_int SVE_STRICT_GP) 2221 (match_operand:SVE_FULL_F 2 "register_operand")] 2222 SVE2_COND_INT_UNARY_FP) 2223 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")] 2224 UNSPEC_SEL))] 2225 "TARGET_SVE2" 2226 ) 2227 2228 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>" 2229 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w") 2230 (unspec:<V_INT_EQUIV> 2231 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2232 (unspec:<V_INT_EQUIV> 2233 [(match_operand 4) 2234 (const_int SVE_RELAXED_GP) 2235 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 2236 SVE2_COND_INT_UNARY_FP) 2237 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2238 UNSPEC_SEL))] 2239 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2240 "@ 2241 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2242 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2243 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2244 "&& !rtx_equal_p (operands[1], operands[4])" 2245 { 2246 operands[4] = copy_rtx (operands[1]); 2247 } 2248 [(set_attr "movprfx" "*,yes,yes")] 2249 ) 2250 2251 (define_insn "*cond_<sve_fp_op><mode>_strict" 2252 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w") 2253 (unspec:<V_INT_EQUIV> 2254 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2255 (unspec:<V_INT_EQUIV> 2256 [(match_dup 1) 2257 (const_int SVE_STRICT_GP) 2258 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 2259 SVE2_COND_INT_UNARY_FP) 2260 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2261 UNSPEC_SEL))] 2262 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" 2263 "@ 2264 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2265 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2266 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2267 [(set_attr "movprfx" "*,yes,yes")] 2268 ) 2269 2270 ;; ------------------------------------------------------------------------- 2271 ;; ---- [INT] Polynomial multiplication 2272 ;; ------------------------------------------------------------------------- 2273 ;; Includes: 2274 ;; - PMUL 2275 ;; - PMULLB 2276 ;; - PMULLT 2277 ;; ------------------------------------------------------------------------- 2278 2279 ;; Uniform PMUL. 2280 (define_insn "@aarch64_sve2_pmul<mode>" 2281 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") 2282 (unspec:VNx16QI_ONLY 2283 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") 2284 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] 2285 UNSPEC_PMUL))] 2286 "TARGET_SVE2" 2287 "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2288 ) 2289 2290 ;; Extending PMUL, with the results modeled as wider vectors. 2291 ;; This representation is only possible for .H and .D, not .Q. 2292 (define_insn "@aarch64_sve_<optab><mode>" 2293 [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w") 2294 (unspec:SVE_FULL_HDI 2295 [(match_operand:<VNARROW> 1 "register_operand" "w") 2296 (match_operand:<VNARROW> 2 "register_operand" "w")] 2297 SVE2_PMULL))] 2298 "TARGET_SVE2" 2299 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>" 2300 ) 2301 2302 ;; Extending PMUL, with the results modeled as pairs of values. 2303 ;; This representation works for .H, .D and .Q, with .Q requiring 2304 ;; the AES extension. (This is enforced by the mode iterator.) 2305 (define_insn "@aarch64_sve_<optab><mode>" 2306 [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w") 2307 (unspec:SVE2_PMULL_PAIR_I 2308 [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w") 2309 (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")] 2310 SVE2_PMULL_PAIR))] 2311 "TARGET_SVE2" 2312 "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>" 2313 ) 2314 2315 ;; ========================================================================= 2316 ;; == Permutation 2317 ;; ========================================================================= 2318 2319 ;; ------------------------------------------------------------------------- 2320 ;; ---- [INT,FP] General permutes 2321 ;; ------------------------------------------------------------------------- 2322 ;; Includes: 2323 ;; - TBL (vector pair form) 2324 ;; - TBX 2325 ;; ------------------------------------------------------------------------- 2326 2327 ;; TBL on a pair of data vectors. 2328 (define_insn "@aarch64_sve2_tbl2<mode>" 2329 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2330 (unspec:SVE_FULL 2331 [(match_operand:<VDOUBLE> 1 "register_operand" "w") 2332 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 2333 UNSPEC_TBL2))] 2334 "TARGET_SVE2" 2335 "tbl\t%0.<Vetype>, %1, %2.<Vetype>" 2336 ) 2337 2338 ;; TBX. These instructions do not take MOVPRFX. 2339 (define_insn "@aarch64_sve2_tbx<mode>" 2340 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2341 (unspec:SVE_FULL 2342 [(match_operand:SVE_FULL 1 "register_operand" "0") 2343 (match_operand:SVE_FULL 2 "register_operand" "w") 2344 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")] 2345 UNSPEC_TBX))] 2346 "TARGET_SVE2" 2347 "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 2348 ) 2349 2350 ;; ------------------------------------------------------------------------- 2351 ;; ---- [INT] Optional bit-permute extensions 2352 ;; ------------------------------------------------------------------------- 2353 ;; Includes: 2354 ;; - BDEP 2355 ;; - BEXT 2356 ;; - BGRP 2357 ;; ------------------------------------------------------------------------- 2358 2359 (define_insn "@aarch64_sve_<sve_int_op><mode>" 2360 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 2361 (unspec:SVE_FULL_I 2362 [(match_operand:SVE_FULL_I 1 "register_operand" "w") 2363 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 2364 SVE2_INT_BITPERM))] 2365 "TARGET_SVE2_BITPERM" 2366 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2367 ) 2368 2369 ;; ========================================================================= 2370 ;; == General 2371 ;; ========================================================================= 2372 2373 ;; ------------------------------------------------------------------------- 2374 ;; ---- Check for aliases between pointers 2375 ;; ------------------------------------------------------------------------- 2376 ;; The patterns in this section are synthetic: WHILERW and WHILEWR are 2377 ;; defined in aarch64-sve.md instead. 2378 ;; ------------------------------------------------------------------------- 2379 2380 ;; Use WHILERW and WHILEWR to accelerate alias checks. This is only 2381 ;; possible if the accesses we're checking are exactly the same size 2382 ;; as an SVE vector. 2383 (define_expand "check_<raw_war>_ptrs<mode>" 2384 [(match_operand:GPI 0 "register_operand") 2385 (unspec:VNx16BI 2386 [(match_operand:GPI 1 "register_operand") 2387 (match_operand:GPI 2 "register_operand") 2388 (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand") 2389 (match_operand:GPI 4 "const_int_operand")] 2390 SVE2_WHILE_PTR)] 2391 "TARGET_SVE2" 2392 { 2393 /* Use the widest predicate mode we can. */ 2394 unsigned int align = INTVAL (operands[4]); 2395 if (align > 8) 2396 align = 8; 2397 machine_mode pred_mode = aarch64_sve_pred_mode (align).require (); 2398 2399 /* Emit a WHILERW or WHILEWR, setting the condition codes based on 2400 the result. */ 2401 emit_insn (gen_while_ptest 2402 (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode, 2403 gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], 2404 CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode))); 2405 2406 /* Set operand 0 to true if the last bit of the predicate result is set, 2407 i.e. if all elements are free of dependencies. */ 2408 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); 2409 rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx); 2410 emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg)); 2411 DONE; 2412 }) 2413 2414 ;; ------------------------------------------------------------------------- 2415 ;; ---- Histogram processing 2416 ;; ------------------------------------------------------------------------- 2417 ;; Includes: 2418 ;; - HISTCNT 2419 ;; - HISTSEG 2420 ;; ------------------------------------------------------------------------- 2421 2422 (define_insn "@aarch64_sve2_histcnt<mode>" 2423 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 2424 (unspec:SVE_FULL_SDI 2425 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2426 (match_operand:SVE_FULL_SDI 2 "register_operand" "w") 2427 (match_operand:SVE_FULL_SDI 3 "register_operand" "w")] 2428 UNSPEC_HISTCNT))] 2429 "TARGET_SVE2" 2430 "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 2431 ) 2432 2433 (define_insn "@aarch64_sve2_histseg<mode>" 2434 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w") 2435 (unspec:VNx16QI_ONLY 2436 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") 2437 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] 2438 UNSPEC_HISTSEG))] 2439 "TARGET_SVE2" 2440 "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 2441 ) 2442 2443 ;; ------------------------------------------------------------------------- 2444 ;; ---- String matching 2445 ;; ------------------------------------------------------------------------- 2446 ;; Includes: 2447 ;; - MATCH 2448 ;; - NMATCH 2449 ;; ------------------------------------------------------------------------- 2450 2451 ;; Predicated string matching. 2452 (define_insn "@aarch64_pred_<sve_int_op><mode>" 2453 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 2454 (unspec:<VPRED> 2455 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2456 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 2457 (unspec:<VPRED> 2458 [(match_operand:SVE_FULL_BHI 3 "register_operand" "w") 2459 (match_operand:SVE_FULL_BHI 4 "register_operand" "w")] 2460 SVE2_MATCH)] 2461 UNSPEC_PRED_Z)) 2462 (clobber (reg:CC_NZC CC_REGNUM))] 2463 "TARGET_SVE2" 2464 "<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 2465 ) 2466 2467 ;; Predicated string matching in which both the flag and predicate results 2468 ;; are interesting. 2469 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc" 2470 [(set (reg:CC_NZC CC_REGNUM) 2471 (unspec:CC_NZC 2472 [(match_operand:VNx16BI 1 "register_operand" "Upl") 2473 (match_operand 4) 2474 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 2475 (unspec:<VPRED> 2476 [(match_operand 6) 2477 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 2478 (unspec:<VPRED> 2479 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") 2480 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] 2481 SVE2_MATCH)] 2482 UNSPEC_PRED_Z)] 2483 UNSPEC_PTEST)) 2484 (set (match_operand:<VPRED> 0 "register_operand" "=Upa") 2485 (unspec:<VPRED> 2486 [(match_dup 6) 2487 (match_dup 7) 2488 (unspec:<VPRED> 2489 [(match_dup 2) 2490 (match_dup 3)] 2491 SVE2_MATCH)] 2492 UNSPEC_PRED_Z))] 2493 "TARGET_SVE2 2494 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 2495 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 2496 "&& !rtx_equal_p (operands[4], operands[6])" 2497 { 2498 operands[6] = copy_rtx (operands[4]); 2499 operands[7] = operands[5]; 2500 } 2501 ) 2502 2503 ;; Predicated string matching in which only the flags result is interesting. 2504 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest" 2505 [(set (reg:CC_NZC CC_REGNUM) 2506 (unspec:CC_NZC 2507 [(match_operand:VNx16BI 1 "register_operand" "Upl") 2508 (match_operand 4) 2509 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 2510 (unspec:<VPRED> 2511 [(match_operand 6) 2512 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 2513 (unspec:<VPRED> 2514 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w") 2515 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")] 2516 SVE2_MATCH)] 2517 UNSPEC_PRED_Z)] 2518 UNSPEC_PTEST)) 2519 (clobber (match_scratch:<VPRED> 0 "=Upa"))] 2520 "TARGET_SVE2 2521 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 2522 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 2523 "&& !rtx_equal_p (operands[4], operands[6])" 2524 { 2525 operands[6] = copy_rtx (operands[4]); 2526 operands[7] = operands[5]; 2527 } 2528 ) 2529 2530 ;; ========================================================================= 2531 ;; == Crypotographic extensions 2532 ;; ========================================================================= 2533 2534 ;; ------------------------------------------------------------------------- 2535 ;; ---- Optional AES extensions 2536 ;; ------------------------------------------------------------------------- 2537 ;; Includes: 2538 ;; - AESD 2539 ;; - AESE 2540 ;; - AESIMC 2541 ;; - AESMC 2542 ;; ------------------------------------------------------------------------- 2543 2544 ;; AESD and AESE. 2545 (define_insn "aarch64_sve2_aes<aes_op>" 2546 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2547 (unspec:VNx16QI 2548 [(xor:VNx16QI 2549 (match_operand:VNx16QI 1 "register_operand" "%0") 2550 (match_operand:VNx16QI 2 "register_operand" "w"))] 2551 CRYPTO_AES))] 2552 "TARGET_SVE2_AES" 2553 "aes<aes_op>\t%0.b, %0.b, %2.b" 2554 [(set_attr "type" "crypto_aese")] 2555 ) 2556 2557 ;; AESMC and AESIMC. These instructions do not take MOVPRFX. 2558 (define_insn "aarch64_sve2_aes<aesmc_op>" 2559 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2560 (unspec:VNx16QI 2561 [(match_operand:VNx16QI 1 "register_operand" "0")] 2562 CRYPTO_AESMC))] 2563 "TARGET_SVE2_AES" 2564 "aes<aesmc_op>\t%0.b, %0.b" 2565 [(set_attr "type" "crypto_aesmc")] 2566 ) 2567 2568 ;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want 2569 ;; to keep the two together and enforce the register dependency without 2570 ;; scheduling or register allocation messing up the order or introducing 2571 ;; moves inbetween. Mash the two together during combine. 2572 2573 (define_insn "*aarch64_sve2_aese_fused" 2574 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2575 (unspec:VNx16QI 2576 [(unspec:VNx16QI 2577 [(xor:VNx16QI 2578 (match_operand:VNx16QI 1 "register_operand" "%0") 2579 (match_operand:VNx16QI 2 "register_operand" "w"))] 2580 UNSPEC_AESE)] 2581 UNSPEC_AESMC))] 2582 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 2583 "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b" 2584 [(set_attr "type" "crypto_aese") 2585 (set_attr "length" "8")] 2586 ) 2587 2588 (define_insn "*aarch64_sve2_aesd_fused" 2589 [(set (match_operand:VNx16QI 0 "register_operand" "=w") 2590 (unspec:VNx16QI 2591 [(unspec:VNx16QI 2592 [(xor:VNx16QI 2593 (match_operand:VNx16QI 1 "register_operand" "%0") 2594 (match_operand:VNx16QI 2 "register_operand" "w"))] 2595 UNSPEC_AESD)] 2596 UNSPEC_AESIMC))] 2597 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 2598 "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b" 2599 [(set_attr "type" "crypto_aese") 2600 (set_attr "length" "8")] 2601 ) 2602 2603 ;; ------------------------------------------------------------------------- 2604 ;; ---- Optional SHA-3 extensions 2605 ;; ------------------------------------------------------------------------- 2606 ;; Includes: 2607 ;; - RAX1 2608 ;; ------------------------------------------------------------------------- 2609 2610 (define_insn "aarch64_sve2_rax1" 2611 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 2612 (xor:VNx2DI 2613 (rotate:VNx2DI 2614 (match_operand:VNx2DI 2 "register_operand" "w") 2615 (const_int 1)) 2616 (match_operand:VNx2DI 1 "register_operand" "w")))] 2617 "TARGET_SVE2_SHA3" 2618 "rax1\t%0.d, %1.d, %2.d" 2619 [(set_attr "type" "crypto_sha3")] 2620 ) 2621 2622 ;; ------------------------------------------------------------------------- 2623 ;; ---- Optional SM4 extensions 2624 ;; ------------------------------------------------------------------------- 2625 ;; Includes: 2626 ;; - SM4E 2627 ;; - SM4EKEY 2628 ;; ------------------------------------------------------------------------- 2629 2630 ;; These instructions do not take MOVPRFX. 2631 (define_insn "aarch64_sve2_sm4e" 2632 [(set (match_operand:VNx4SI 0 "register_operand" "=w") 2633 (unspec:VNx4SI 2634 [(match_operand:VNx4SI 1 "register_operand" "0") 2635 (match_operand:VNx4SI 2 "register_operand" "w")] 2636 UNSPEC_SM4E))] 2637 "TARGET_SVE2_SM4" 2638 "sm4e\t%0.s, %0.s, %2.s" 2639 [(set_attr "type" "crypto_sm4")] 2640 ) 2641 2642 (define_insn "aarch64_sve2_sm4ekey" 2643 [(set (match_operand:VNx4SI 0 "register_operand" "=w") 2644 (unspec:VNx4SI 2645 [(match_operand:VNx4SI 1 "register_operand" "w") 2646 (match_operand:VNx4SI 2 "register_operand" "w")] 2647 UNSPEC_SM4EKEY))] 2648 "TARGET_SVE2_SM4" 2649 "sm4ekey\t%0.s, %1.s, %2.s" 2650 [(set_attr "type" "crypto_sm4")] 2651 ) 2652