Home | History | Annotate | Line # | Download | only in i386
sync.md revision 1.11
      1 ;; GCC machine description for i386 synchronization instructions.
      2 ;; Copyright (C) 2005-2020 Free Software Foundation, Inc.
      3 ;;
      4 ;; This file is part of GCC.
      5 ;;
      6 ;; GCC is free software; you can redistribute it and/or modify
      7 ;; it under the terms of the GNU General Public License as published by
      8 ;; the Free Software Foundation; either version 3, or (at your option)
      9 ;; any later version.
     10 ;;
     11 ;; GCC is distributed in the hope that it will be useful,
     12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 ;; GNU General Public License for more details.
     15 ;;
     16 ;; You should have received a copy of the GNU General Public License
     17 ;; along with GCC; see the file COPYING3.  If not see
     18 ;; <http://www.gnu.org/licenses/>.
     19 
     20 (define_c_enum "unspec" [
     21   UNSPEC_LFENCE
     22   UNSPEC_SFENCE
     23   UNSPEC_MFENCE
     24 
     25   UNSPEC_FILD_ATOMIC
     26   UNSPEC_FIST_ATOMIC
     27 
     28   UNSPEC_LDX_ATOMIC
     29   UNSPEC_STX_ATOMIC
     30 
     31   ;; __atomic support
     32   UNSPEC_LDA
     33   UNSPEC_STA
     34 ])
     35 
     36 (define_c_enum "unspecv" [
     37   UNSPECV_CMPXCHG
     38   UNSPECV_XCHG
     39   UNSPECV_LOCK
     40 ])
     41 
     42 (define_expand "sse2_lfence"
     43   [(set (match_dup 0)
     44 	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
     45   "TARGET_SSE2"
     46 {
     47   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
     48   MEM_VOLATILE_P (operands[0]) = 1;
     49 })
     50 
     51 (define_insn "*sse2_lfence"
     52   [(set (match_operand:BLK 0)
     53 	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
     54   "TARGET_SSE2"
     55   "lfence"
     56   [(set_attr "type" "sse")
     57    (set_attr "length_address" "0")
     58    (set_attr "atom_sse_attr" "lfence")
     59    (set_attr "memory" "unknown")])
     60 
     61 (define_expand "sse_sfence"
     62   [(set (match_dup 0)
     63 	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
     64   "TARGET_SSE || TARGET_3DNOW_A"
     65 {
     66   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
     67   MEM_VOLATILE_P (operands[0]) = 1;
     68 })
     69 
     70 (define_insn "*sse_sfence"
     71   [(set (match_operand:BLK 0)
     72 	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
     73   "TARGET_SSE || TARGET_3DNOW_A"
     74   "sfence"
     75   [(set_attr "type" "sse")
     76    (set_attr "length_address" "0")
     77    (set_attr "atom_sse_attr" "fence")
     78    (set_attr "memory" "unknown")])
     79 
     80 (define_expand "sse2_mfence"
     81   [(set (match_dup 0)
     82 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
     83   "TARGET_SSE2"
     84 {
     85   operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
     86   MEM_VOLATILE_P (operands[0]) = 1;
     87 })
     88 
     89 (define_insn "mfence_sse2"
     90   [(set (match_operand:BLK 0)
     91 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
     92   "TARGET_64BIT || TARGET_SSE2"
     93   "mfence"
     94   [(set_attr "type" "sse")
     95    (set_attr "length_address" "0")
     96    (set_attr "atom_sse_attr" "fence")
     97    (set_attr "memory" "unknown")])
     98 
     99 (define_insn "mfence_nosse"
    100   [(set (match_operand:BLK 0)
    101 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
    102    (clobber (reg:CC FLAGS_REG))]
    103   "!(TARGET_64BIT || TARGET_SSE2)"
    104   "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
    105   [(set_attr "memory" "unknown")])
    106 
    107 (define_expand "mem_thread_fence"
    108   [(match_operand:SI 0 "const_int_operand")]		;; model
    109   ""
    110 {
    111   enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
    112 
    113   /* Unless this is a SEQ_CST fence, the i386 memory model is strong
    114      enough not to require barriers of any kind.  */
    115   if (is_mm_seq_cst (model))
    116     {
    117       rtx (*mfence_insn)(rtx);
    118       rtx mem;
    119 
    120       if (TARGET_64BIT || TARGET_SSE2)
    121 	mfence_insn = gen_mfence_sse2;
    122       else
    123 	mfence_insn = gen_mfence_nosse;
    124 
    125       mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
    126       MEM_VOLATILE_P (mem) = 1;
    127 
    128       emit_insn (mfence_insn (mem));
    129     }
    130   DONE;
    131 })
    132 
    133 ;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
    134 ;; Only beginning at Pentium family processors do we get any guarantee of
    135 ;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
    136 ;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
    137 ;;
    138 ;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
    139 ;;
    140 ;; Importantly, *no* processor makes atomicity guarantees for larger
    141 ;; accesses.  In particular, there's no way to perform an atomic TImode
    142 ;; move, despite the apparent applicability of MOVDQA et al.
    143 
    144 (define_mode_iterator ATOMIC
    145    [QI HI SI
    146     (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
    147    ])
    148 
    149 (define_expand "atomic_load<mode>"
    150   [(set (match_operand:ATOMIC 0 "nonimmediate_operand")
    151 	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
    152 			(match_operand:SI 2 "const_int_operand")]
    153 		       UNSPEC_LDA))]
    154   ""
    155 {
    156   /* For DImode on 32-bit, we can use the FPU to perform the load.  */
    157   if (<MODE>mode == DImode && !TARGET_64BIT)
    158     emit_insn (gen_atomic_loaddi_fpu
    159 	       (operands[0], operands[1],
    160 	        assign_386_stack_local (DImode, SLOT_TEMP)));
    161   else
    162     {
    163       rtx dst = operands[0];
    164 
    165       if (MEM_P (dst))
    166 	dst = gen_reg_rtx (<MODE>mode);
    167 
    168       emit_move_insn (dst, operands[1]);
    169 
    170       /* Fix up the destination if needed.  */
    171       if (dst != operands[0])
    172 	emit_move_insn (operands[0], dst);
    173     }
    174   DONE;
    175 })
    176 
    177 (define_insn_and_split "atomic_loaddi_fpu"
    178   [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
    179 	(unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
    180 		   UNSPEC_LDA))
    181    (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
    182    (clobber (match_scratch:DF 3 "=X,xf,xf"))]
    183   "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
    184   "#"
    185   "&& reload_completed"
    186   [(const_int 0)]
    187 {
    188   rtx dst = operands[0], src = operands[1];
    189   rtx mem = operands[2], tmp = operands[3];
    190 
    191   if (SSE_REG_P (dst))
    192     emit_move_insn (dst, src);
    193   else
    194     {
    195       if (MEM_P (dst))
    196 	mem = dst;
    197 
    198       if (STACK_REG_P (tmp))
    199         {
    200 	  emit_insn (gen_loaddi_via_fpu (tmp, src));
    201 	  emit_insn (gen_storedi_via_fpu (mem, tmp));
    202 	}
    203       else
    204 	{
    205 	  emit_insn (gen_loaddi_via_sse (tmp, src));
    206 	  emit_insn (gen_storedi_via_sse (mem, tmp));
    207 	}
    208 
    209       if (mem != dst)
    210 	emit_move_insn (dst, mem);
    211     }
    212   DONE;
    213 })
    214 
    215 (define_expand "atomic_store<mode>"
    216   [(set (match_operand:ATOMIC 0 "memory_operand")
    217 	(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
    218 			(match_operand:SI 2 "const_int_operand")]
    219 		       UNSPEC_STA))]
    220   ""
    221 {
    222   enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
    223 
    224   if (<MODE>mode == DImode && !TARGET_64BIT)
    225     {
    226       /* For DImode on 32-bit, we can use the FPU to perform the store.  */
    227       /* Note that while we could perform a cmpxchg8b loop, that turns
    228 	 out to be significantly larger than this plus a barrier.  */
    229       emit_insn (gen_atomic_storedi_fpu
    230 		 (operands[0], operands[1],
    231 	          assign_386_stack_local (DImode, SLOT_TEMP)));
    232     }
    233   else
    234     {
    235       operands[1] = force_reg (<MODE>mode, operands[1]);
    236 
    237       /* For seq-cst stores, use XCHG when we lack MFENCE.  */
    238       if (is_mm_seq_cst (model)
    239 	  && (!(TARGET_64BIT || TARGET_SSE2)
    240 	      || TARGET_AVOID_MFENCE))
    241 	{
    242 	  emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
    243 						operands[0], operands[1],
    244 						operands[2]));
    245 	  DONE;
    246 	}
    247 
    248       /* Otherwise use a store.  */
    249       emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
    250 					   operands[2]));
    251     }
    252   /* ... followed by an MFENCE, if required.  */
    253   if (is_mm_seq_cst (model))
    254     emit_insn (gen_mem_thread_fence (operands[2]));
    255   DONE;
    256 })
    257 
    258 (define_insn "atomic_store<mode>_1"
    259   [(set (match_operand:SWI 0 "memory_operand" "=m")
    260 	(unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
    261 		     (match_operand:SI 2 "const_int_operand")]
    262 		    UNSPEC_STA))]
    263   ""
    264   "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
    265 
    266 (define_insn_and_split "atomic_storedi_fpu"
    267   [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
    268 	(unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")]
    269 		   UNSPEC_STA))
    270    (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
    271    (clobber (match_scratch:DF 3 "=X,xf,xf"))]
    272   "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
    273   "#"
    274   "&& reload_completed"
    275   [(const_int 0)]
    276 {
    277   rtx dst = operands[0], src = operands[1];
    278   rtx mem = operands[2], tmp = operands[3];
    279 
    280   if (SSE_REG_P (src))
    281     emit_move_insn (dst, src);
    282   else
    283     {
    284       if (REG_P (src))
    285 	{
    286 	  emit_move_insn (mem, src);
    287 	  src = mem;
    288 	}
    289 
    290       if (STACK_REG_P (tmp))
    291 	{
    292 	  emit_insn (gen_loaddi_via_fpu (tmp, src));
    293 	  emit_insn (gen_storedi_via_fpu (dst, tmp));
    294 	}
    295       else
    296 	{
    297 	  emit_insn (gen_loaddi_via_sse (tmp, src));
    298 	  emit_insn (gen_storedi_via_sse (dst, tmp));
    299 	}
    300     }
    301   DONE;
    302 })
    303 
    304 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
    305 ;; operations.  But the fix_trunc patterns want way more setup than we want
    306 ;; to provide.  Note that the scratch is DFmode instead of XFmode in order
    307 ;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
    308 
    309 (define_insn "loaddi_via_fpu"
    310   [(set (match_operand:DF 0 "register_operand" "=f")
    311 	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
    312 		   UNSPEC_FILD_ATOMIC))]
    313   "TARGET_80387"
    314   "fild%Z1\t%1"
    315   [(set_attr "type" "fmov")
    316    (set_attr "mode" "DF")
    317    (set_attr "fp_int_src" "true")])
    318 
    319 (define_insn "storedi_via_fpu"
    320   [(set (match_operand:DI 0 "memory_operand" "=m")
    321 	(unspec:DI [(match_operand:DF 1 "register_operand" "f")]
    322 		   UNSPEC_FIST_ATOMIC))]
    323   "TARGET_80387"
    324 {
    325   gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
    326 
    327   return "fistp%Z0\t%0";
    328 }
    329   [(set_attr "type" "fmov")
    330    (set_attr "mode" "DI")])
    331 
    332 (define_insn "loaddi_via_sse"
    333   [(set (match_operand:DF 0 "register_operand" "=x")
    334 	(unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
    335 		   UNSPEC_LDX_ATOMIC))]
    336   "TARGET_SSE"
    337 {
    338   if (TARGET_SSE2)
    339     return "%vmovq\t{%1, %0|%0, %1}";
    340   return "movlps\t{%1, %0|%0, %1}";
    341 }
    342   [(set_attr "type" "ssemov")
    343    (set_attr "mode" "DI")])
    344 
    345 (define_insn "storedi_via_sse"
    346   [(set (match_operand:DI 0 "memory_operand" "=m")
    347 	(unspec:DI [(match_operand:DF 1 "register_operand" "x")]
    348 		   UNSPEC_STX_ATOMIC))]
    349   "TARGET_SSE"
    350 {
    351   if (TARGET_SSE2)
    352     return "%vmovq\t{%1, %0|%0, %1}";
    353   return "movlps\t{%1, %0|%0, %1}";
    354 }
    355   [(set_attr "type" "ssemov")
    356    (set_attr "mode" "DI")])
    357 
    358 (define_expand "atomic_compare_and_swap<mode>"
    359   [(match_operand:QI 0 "register_operand")	;; bool success output
    360    (match_operand:SWI124 1 "register_operand")	;; oldval output
    361    (match_operand:SWI124 2 "memory_operand")	;; memory
    362    (match_operand:SWI124 3 "register_operand")	;; expected input
    363    (match_operand:SWI124 4 "register_operand")	;; newval input
    364    (match_operand:SI 5 "const_int_operand")	;; is_weak
    365    (match_operand:SI 6 "const_int_operand")	;; success model
    366    (match_operand:SI 7 "const_int_operand")]	;; failure model
    367   "TARGET_CMPXCHG"
    368 {
    369   emit_insn
    370    (gen_atomic_compare_and_swap<mode>_1
    371     (operands[1], operands[2], operands[3], operands[4], operands[6]));
    372   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
    373 		     const0_rtx);
    374   DONE;
    375 })
    376 
    377 (define_mode_iterator CASMODE
    378   [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
    379    (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
    380 (define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
    381 
    382 (define_expand "atomic_compare_and_swap<mode>"
    383   [(match_operand:QI 0 "register_operand")	;; bool success output
    384    (match_operand:CASMODE 1 "register_operand")	;; oldval output
    385    (match_operand:CASMODE 2 "memory_operand")	;; memory
    386    (match_operand:CASMODE 3 "register_operand")	;; expected input
    387    (match_operand:CASMODE 4 "register_operand")	;; newval input
    388    (match_operand:SI 5 "const_int_operand")	;; is_weak
    389    (match_operand:SI 6 "const_int_operand")	;; success model
    390    (match_operand:SI 7 "const_int_operand")]	;; failure model
    391   "TARGET_CMPXCHG"
    392 {
    393   if (<MODE>mode == DImode && TARGET_64BIT)
    394     {
    395       emit_insn
    396        (gen_atomic_compare_and_swapdi_1
    397 	(operands[1], operands[2], operands[3], operands[4], operands[6]));
    398     }
    399   else
    400     {
    401       machine_mode hmode = <CASHMODE>mode;
    402 
    403       emit_insn
    404        (gen_atomic_compare_and_swap<mode>_doubleword
    405         (operands[1], operands[2], operands[3],
    406 	 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
    407 	 operands[6]));
    408     }
    409 
    410   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
    411 		     const0_rtx);
    412   DONE;
    413 })
    414 
    415 ;; For double-word compare and swap, we are obliged to play tricks with
    416 ;; the input newval (op3:op4) because the Intel register numbering does
    417 ;; not match the gcc register numbering, so the pair must be CX:BX.
    418 
    419 (define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
    420 
    421 (define_insn "atomic_compare_and_swap<dwi>_doubleword"
    422   [(set (match_operand:<DWI> 0 "register_operand" "=A")
    423 	(unspec_volatile:<DWI>
    424 	  [(match_operand:<DWI> 1 "memory_operand" "+m")
    425 	   (match_operand:<DWI> 2 "register_operand" "0")
    426 	   (match_operand:DWIH 3 "register_operand" "b")
    427 	   (match_operand:DWIH 4 "register_operand" "c")
    428 	   (match_operand:SI 5 "const_int_operand")]
    429 	  UNSPECV_CMPXCHG))
    430    (set (match_dup 1)
    431 	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
    432    (set (reg:CCZ FLAGS_REG)
    433         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
    434   "TARGET_CMPXCHG<doublemodesuffix>B"
    435   "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
    436 
    437 (define_insn "atomic_compare_and_swap<mode>_1"
    438   [(set (match_operand:SWI 0 "register_operand" "=a")
    439 	(unspec_volatile:SWI
    440 	  [(match_operand:SWI 1 "memory_operand" "+m")
    441 	   (match_operand:SWI 2 "register_operand" "0")
    442 	   (match_operand:SWI 3 "register_operand" "<r>")
    443 	   (match_operand:SI 4 "const_int_operand")]
    444 	  UNSPECV_CMPXCHG))
    445    (set (match_dup 1)
    446 	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
    447    (set (reg:CCZ FLAGS_REG)
    448         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
    449   "TARGET_CMPXCHG"
    450   "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
    451 
    452 ;; For operand 2 nonmemory_operand predicate is used instead of
    453 ;; register_operand to allow combiner to better optimize atomic
    454 ;; additions of constants.
    455 (define_insn "atomic_fetch_add<mode>"
    456   [(set (match_operand:SWI 0 "register_operand" "=<r>")
    457 	(unspec_volatile:SWI
    458 	  [(match_operand:SWI 1 "memory_operand" "+m")
    459 	   (match_operand:SI 3 "const_int_operand")]		;; model
    460 	  UNSPECV_XCHG))
    461    (set (match_dup 1)
    462 	(plus:SWI (match_dup 1)
    463 		  (match_operand:SWI 2 "nonmemory_operand" "0")))
    464    (clobber (reg:CC FLAGS_REG))]
    465   "TARGET_XADD"
    466   "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
    467 
    468 ;; This peephole2 and following insn optimize
    469 ;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
    470 ;; followed by testing of flags instead of lock xadd and comparisons.
    471 (define_peephole2
    472   [(set (match_operand:SWI 0 "register_operand")
    473 	(match_operand:SWI 2 "const_int_operand"))
    474    (parallel [(set (match_dup 0)
    475 		   (unspec_volatile:SWI
    476 		     [(match_operand:SWI 1 "memory_operand")
    477 		      (match_operand:SI 4 "const_int_operand")]
    478 		     UNSPECV_XCHG))
    479 	      (set (match_dup 1)
    480 		   (plus:SWI (match_dup 1)
    481 			     (match_dup 0)))
    482 	      (clobber (reg:CC FLAGS_REG))])
    483    (set (reg:CCZ FLAGS_REG)
    484 	(compare:CCZ (match_dup 0)
    485 		     (match_operand:SWI 3 "const_int_operand")))]
    486   "peep2_reg_dead_p (3, operands[0])
    487    && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
    488       == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
    489    && !reg_overlap_mentioned_p (operands[0], operands[1])"
    490   [(parallel [(set (reg:CCZ FLAGS_REG)
    491 		   (compare:CCZ
    492 		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
    493 					  UNSPECV_XCHG)
    494 		     (match_dup 3)))
    495 	      (set (match_dup 1)
    496 		   (plus:SWI (match_dup 1)
    497 			     (match_dup 2)))])])
    498 
    499 ;; Likewise, but for the -Os special case of *mov<mode>_or.
    500 (define_peephole2
    501   [(parallel [(set (match_operand:SWI 0 "register_operand")
    502 		   (match_operand:SWI 2 "constm1_operand"))
    503 	      (clobber (reg:CC FLAGS_REG))])
    504    (parallel [(set (match_dup 0)
    505 		   (unspec_volatile:SWI
    506 		     [(match_operand:SWI 1 "memory_operand")
    507 		      (match_operand:SI 4 "const_int_operand")]
    508 		     UNSPECV_XCHG))
    509 	      (set (match_dup 1)
    510 		   (plus:SWI (match_dup 1)
    511 			     (match_dup 0)))
    512 	      (clobber (reg:CC FLAGS_REG))])
    513    (set (reg:CCZ FLAGS_REG)
    514 	(compare:CCZ (match_dup 0)
    515 		     (match_operand:SWI 3 "const_int_operand")))]
    516   "peep2_reg_dead_p (3, operands[0])
    517    && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
    518       == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
    519    && !reg_overlap_mentioned_p (operands[0], operands[1])"
    520   [(parallel [(set (reg:CCZ FLAGS_REG)
    521 		   (compare:CCZ
    522 		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
    523 					  UNSPECV_XCHG)
    524 		     (match_dup 3)))
    525 	      (set (match_dup 1)
    526 		   (plus:SWI (match_dup 1)
    527 			     (match_dup 2)))])])
    528 
    529 (define_insn "*atomic_fetch_add_cmp<mode>"
    530   [(set (reg:CCZ FLAGS_REG)
    531 	(compare:CCZ
    532 	  (unspec_volatile:SWI
    533 	    [(match_operand:SWI 0 "memory_operand" "+m")
    534 	     (match_operand:SI 3 "const_int_operand")]		;; model
    535 	    UNSPECV_XCHG)
    536 	  (match_operand:SWI 2 "const_int_operand" "i")))
    537    (set (match_dup 0)
    538 	(plus:SWI (match_dup 0)
    539 		  (match_operand:SWI 1 "const_int_operand" "i")))]
    540   "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
    541    == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
    542 {
    543   if (incdec_operand (operands[1], <MODE>mode))
    544     {
    545       if (operands[1] == const1_rtx)
    546 	return "lock{%;} %K3inc{<imodesuffix>}\t%0";
    547       else
    548 	{
    549 	  gcc_assert (operands[1] == constm1_rtx);
    550 	  return "lock{%;} %K3dec{<imodesuffix>}\t%0";
    551 	}
    552     }
    553 
    554   if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
    555     return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
    556 
    557   return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
    558 })
    559 
    560 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
    561 ;; In addition, it is always a full barrier, so we can ignore the memory model.
    562 (define_insn "atomic_exchange<mode>"
    563   [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
    564 	(unspec_volatile:SWI
    565 	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
    566 	   (match_operand:SI 3 "const_int_operand")]		;; model
    567 	  UNSPECV_XCHG))
    568    (set (match_dup 1)
    569 	(match_operand:SWI 2 "register_operand" "0"))]		;; input
    570   ""
    571   "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
    572 
    573 (define_insn "atomic_add<mode>"
    574   [(set (match_operand:SWI 0 "memory_operand" "+m")
    575 	(unspec_volatile:SWI
    576 	  [(plus:SWI (match_dup 0)
    577 		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
    578 	   (match_operand:SI 2 "const_int_operand")]		;; model
    579 	  UNSPECV_LOCK))
    580    (clobber (reg:CC FLAGS_REG))]
    581   ""
    582 {
    583   if (incdec_operand (operands[1], <MODE>mode))
    584     {
    585       if (operands[1] == const1_rtx)
    586 	return "lock{%;} %K2inc{<imodesuffix>}\t%0";
    587       else
    588 	{
    589 	  gcc_assert (operands[1] == constm1_rtx);
    590 	  return "lock{%;} %K2dec{<imodesuffix>}\t%0";
    591 	}
    592     }
    593 
    594   if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
    595     return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
    596 
    597   return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
    598 })
    599 
    600 (define_insn "atomic_sub<mode>"
    601   [(set (match_operand:SWI 0 "memory_operand" "+m")
    602 	(unspec_volatile:SWI
    603 	  [(minus:SWI (match_dup 0)
    604 		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
    605 	   (match_operand:SI 2 "const_int_operand")]		;; model
    606 	  UNSPECV_LOCK))
    607    (clobber (reg:CC FLAGS_REG))]
    608   ""
    609 {
    610   if (incdec_operand (operands[1], <MODE>mode))
    611     {
    612       if (operands[1] == const1_rtx)
    613 	return "lock{%;} %K2dec{<imodesuffix>}\t%0";
    614       else
    615 	{
    616 	  gcc_assert (operands[1] == constm1_rtx);
    617 	  return "lock{%;} %K2inc{<imodesuffix>}\t%0";
    618 	}
    619     }
    620 
    621   if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
    622     return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
    623 
    624   return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
    625 })
    626 
    627 (define_insn "atomic_<logic><mode>"
    628   [(set (match_operand:SWI 0 "memory_operand" "+m")
    629 	(unspec_volatile:SWI
    630 	  [(any_logic:SWI (match_dup 0)
    631 			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
    632 	   (match_operand:SI 2 "const_int_operand")]		;; model
    633 	  UNSPECV_LOCK))
    634    (clobber (reg:CC FLAGS_REG))]
    635   ""
    636   "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
    637 
    638 (define_expand "atomic_bit_test_and_set<mode>"
    639   [(match_operand:SWI248 0 "register_operand")
    640    (match_operand:SWI248 1 "memory_operand")
    641    (match_operand:SWI248 2 "nonmemory_operand")
    642    (match_operand:SI 3 "const_int_operand") ;; model
    643    (match_operand:SI 4 "const_int_operand")]
    644   ""
    645 {
    646   emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
    647 						  operands[3]));
    648   rtx tem = gen_reg_rtx (QImode);
    649   ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
    650   rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
    651   if (operands[4] == const0_rtx)
    652     result = expand_simple_binop (<MODE>mode, ASHIFT, result,
    653 				  operands[2], operands[0], 0, OPTAB_WIDEN);
    654   if (result != operands[0])
    655     emit_move_insn (operands[0], result);
    656   DONE;
    657 })
    658 
    659 (define_insn "atomic_bit_test_and_set<mode>_1"
    660   [(set (reg:CCC FLAGS_REG)
    661 	(compare:CCC
    662 	  (unspec_volatile:SWI248
    663 	    [(match_operand:SWI248 0 "memory_operand" "+m")
    664 	     (match_operand:SI 2 "const_int_operand")]		;; model
    665 	    UNSPECV_XCHG)
    666 	  (const_int 0)))
    667    (set (zero_extract:SWI248 (match_dup 0)
    668 			     (const_int 1)
    669 			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
    670 	(const_int 1))]
    671   ""
    672   "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
    673 
    674 (define_expand "atomic_bit_test_and_complement<mode>"
    675   [(match_operand:SWI248 0 "register_operand")
    676    (match_operand:SWI248 1 "memory_operand")
    677    (match_operand:SWI248 2 "nonmemory_operand")
    678    (match_operand:SI 3 "const_int_operand") ;; model
    679    (match_operand:SI 4 "const_int_operand")]
    680   ""
    681 {
    682   emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
    683 							 operands[2],
    684 							 operands[3]));
    685   rtx tem = gen_reg_rtx (QImode);
    686   ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
    687   rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
    688   if (operands[4] == const0_rtx)
    689     result = expand_simple_binop (<MODE>mode, ASHIFT, result,
    690 				  operands[2], operands[0], 0, OPTAB_WIDEN);
    691   if (result != operands[0])
    692     emit_move_insn (operands[0], result);
    693   DONE;
    694 })
    695 
    696 (define_insn "atomic_bit_test_and_complement<mode>_1"
    697   [(set (reg:CCC FLAGS_REG)
    698 	(compare:CCC
    699 	  (unspec_volatile:SWI248
    700 	    [(match_operand:SWI248 0 "memory_operand" "+m")
    701 	     (match_operand:SI 2 "const_int_operand")]		;; model
    702 	    UNSPECV_XCHG)
    703 	  (const_int 0)))
    704    (set (zero_extract:SWI248 (match_dup 0)
    705 			     (const_int 1)
    706 			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
    707 	(not:SWI248 (zero_extract:SWI248 (match_dup 0)
    708 					 (const_int 1)
    709 					 (match_dup 1))))]
    710   ""
    711   "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
    712 
    713 (define_expand "atomic_bit_test_and_reset<mode>"
    714   [(match_operand:SWI248 0 "register_operand")
    715    (match_operand:SWI248 1 "memory_operand")
    716    (match_operand:SWI248 2 "nonmemory_operand")
    717    (match_operand:SI 3 "const_int_operand") ;; model
    718    (match_operand:SI 4 "const_int_operand")]
    719   ""
    720 {
    721   emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
    722 						    operands[3]));
    723   rtx tem = gen_reg_rtx (QImode);
    724   ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
    725   rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
    726   if (operands[4] == const0_rtx)
    727     result = expand_simple_binop (<MODE>mode, ASHIFT, result,
    728 				  operands[2], operands[0], 0, OPTAB_WIDEN);
    729   if (result != operands[0])
    730     emit_move_insn (operands[0], result);
    731   DONE;
    732 })
    733 
    734 (define_insn "atomic_bit_test_and_reset<mode>_1"
    735   [(set (reg:CCC FLAGS_REG)
    736 	(compare:CCC
    737 	  (unspec_volatile:SWI248
    738 	    [(match_operand:SWI248 0 "memory_operand" "+m")
    739 	     (match_operand:SI 2 "const_int_operand")]		;; model
    740 	    UNSPECV_XCHG)
    741 	  (const_int 0)))
    742    (set (zero_extract:SWI248 (match_dup 0)
    743 			     (const_int 1)
    744 			     (match_operand:SWI248 1 "nonmemory_operand" "rN"))
    745 	(const_int 0))]
    746   ""
    747   "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
    748