sync.md revision 1.12 1 ;; GCC machine description for i386 synchronization instructions.
2 ;; Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 UNSPEC_LFENCE
22 UNSPEC_SFENCE
23 UNSPEC_MFENCE
24
25 UNSPEC_FILD_ATOMIC
26 UNSPEC_FIST_ATOMIC
27
28 UNSPEC_LDX_ATOMIC
29 UNSPEC_STX_ATOMIC
30
31 ;; __atomic support
32 UNSPEC_LDA
33 UNSPEC_STA
34 ])
35
36 (define_c_enum "unspecv" [
37 UNSPECV_CMPXCHG
38 UNSPECV_XCHG
39 UNSPECV_LOCK
40 ])
41
42 (define_expand "sse2_lfence"
43 [(set (match_dup 0)
44 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
45 "TARGET_SSE2"
46 {
47 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
48 MEM_VOLATILE_P (operands[0]) = 1;
49 })
50
51 (define_insn "*sse2_lfence"
52 [(set (match_operand:BLK 0)
53 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
54 "TARGET_SSE2"
55 "lfence"
56 [(set_attr "type" "sse")
57 (set_attr "length_address" "0")
58 (set_attr "atom_sse_attr" "lfence")
59 (set_attr "memory" "unknown")])
60
61 (define_expand "sse_sfence"
62 [(set (match_dup 0)
63 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
64 "TARGET_SSE || TARGET_3DNOW_A"
65 {
66 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
67 MEM_VOLATILE_P (operands[0]) = 1;
68 })
69
70 (define_insn "*sse_sfence"
71 [(set (match_operand:BLK 0)
72 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
73 "TARGET_SSE || TARGET_3DNOW_A"
74 "sfence"
75 [(set_attr "type" "sse")
76 (set_attr "length_address" "0")
77 (set_attr "atom_sse_attr" "fence")
78 (set_attr "memory" "unknown")])
79
80 (define_expand "sse2_mfence"
81 [(set (match_dup 0)
82 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
83 "TARGET_SSE2"
84 {
85 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
86 MEM_VOLATILE_P (operands[0]) = 1;
87 })
88
89 (define_insn "mfence_sse2"
90 [(set (match_operand:BLK 0)
91 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
92 "TARGET_64BIT || TARGET_SSE2"
93 "mfence"
94 [(set_attr "type" "sse")
95 (set_attr "length_address" "0")
96 (set_attr "atom_sse_attr" "fence")
97 (set_attr "memory" "unknown")])
98
99 (define_insn "mfence_nosse"
100 [(set (match_operand:BLK 0)
101 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
102 (clobber (reg:CC FLAGS_REG))]
103 ""
104 {
105 rtx mem = gen_rtx_MEM (word_mode, stack_pointer_rtx);
106
107 output_asm_insn ("lock{%;} or%z0\t{$0, %0|%0, 0}", &mem);
108 return "";
109 }
110 [(set_attr "memory" "unknown")])
111
112 (define_expand "mem_thread_fence"
113 [(match_operand:SI 0 "const_int_operand")] ;; model
114 ""
115 {
116 enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
117
118 /* Unless this is a SEQ_CST fence, the i386 memory model is strong
119 enough not to require barriers of any kind. */
120 if (is_mm_seq_cst (model))
121 {
122 rtx (*mfence_insn)(rtx);
123 rtx mem;
124
125 if ((TARGET_64BIT || TARGET_SSE2)
126 && (optimize_function_for_size_p (cfun)
127 || !TARGET_AVOID_MFENCE))
128 mfence_insn = gen_mfence_sse2;
129 else
130 mfence_insn = gen_mfence_nosse;
131
132 mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
133 MEM_VOLATILE_P (mem) = 1;
134
135 emit_insn (mfence_insn (mem));
136 }
137 DONE;
138 })
139
140 ;; ??? From volume 3 section 8.1.1 Guaranteed Atomic Operations,
141 ;; Only beginning at Pentium family processors do we get any guarantee of
142 ;; atomicity in aligned 64-bit quantities. Beginning at P6, we get a
143 ;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
144 ;;
145 ;; Note that the TARGET_CMPXCHG8B test below is a stand-in for "Pentium".
146 ;;
147 ;; Importantly, *no* processor makes atomicity guarantees for larger
148 ;; accesses. In particular, there's no way to perform an atomic TImode
149 ;; move, despite the apparent applicability of MOVDQA et al.
150
151 (define_mode_iterator ATOMIC
152 [QI HI SI
153 (DI "TARGET_64BIT || (TARGET_CMPXCHG8B && (TARGET_80387 || TARGET_SSE))")
154 ])
155
156 (define_expand "atomic_load<mode>"
157 [(set (match_operand:ATOMIC 0 "nonimmediate_operand")
158 (unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand")
159 (match_operand:SI 2 "const_int_operand")]
160 UNSPEC_LDA))]
161 ""
162 {
163 /* For DImode on 32-bit, we can use the FPU to perform the load. */
164 if (<MODE>mode == DImode && !TARGET_64BIT)
165 emit_insn (gen_atomic_loaddi_fpu
166 (operands[0], operands[1],
167 assign_386_stack_local (DImode, SLOT_TEMP)));
168 else
169 {
170 rtx dst = operands[0];
171
172 if (MEM_P (dst))
173 dst = gen_reg_rtx (<MODE>mode);
174
175 emit_move_insn (dst, operands[1]);
176
177 /* Fix up the destination if needed. */
178 if (dst != operands[0])
179 emit_move_insn (operands[0], dst);
180 }
181 DONE;
182 })
183
184 (define_insn_and_split "atomic_loaddi_fpu"
185 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m,?r")
186 (unspec:DI [(match_operand:DI 1 "memory_operand" "m,m,m")]
187 UNSPEC_LDA))
188 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
189 (clobber (match_scratch:DF 3 "=X,xf,xf"))]
190 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
191 "#"
192 "&& reload_completed"
193 [(const_int 0)]
194 {
195 rtx dst = operands[0], src = operands[1];
196 rtx mem = operands[2], tmp = operands[3];
197
198 if (SSE_REG_P (dst))
199 emit_move_insn (dst, src);
200 else
201 {
202 if (MEM_P (dst))
203 mem = dst;
204
205 if (STACK_REG_P (tmp))
206 {
207 emit_insn (gen_loaddi_via_fpu (tmp, src));
208 emit_insn (gen_storedi_via_fpu (mem, tmp));
209 }
210 else
211 {
212 emit_insn (gen_loaddi_via_sse (tmp, src));
213 emit_insn (gen_storedi_via_sse (mem, tmp));
214 }
215
216 if (mem != dst)
217 emit_move_insn (dst, mem);
218 }
219 DONE;
220 })
221
222 (define_expand "atomic_store<mode>"
223 [(set (match_operand:ATOMIC 0 "memory_operand")
224 (unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
225 (match_operand:SI 2 "const_int_operand")]
226 UNSPEC_STA))]
227 ""
228 {
229 enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
230
231 if (<MODE>mode == DImode && !TARGET_64BIT)
232 {
233 /* For DImode on 32-bit, we can use the FPU to perform the store. */
234 /* Note that while we could perform a cmpxchg8b loop, that turns
235 out to be significantly larger than this plus a barrier. */
236 emit_insn (gen_atomic_storedi_fpu
237 (operands[0], operands[1],
238 assign_386_stack_local (DImode, SLOT_TEMP)));
239 }
240 else
241 {
242 operands[1] = force_reg (<MODE>mode, operands[1]);
243
244 /* For seq-cst stores, use XCHG when we lack MFENCE. */
245 if (is_mm_seq_cst (model)
246 && (!(TARGET_64BIT || TARGET_SSE2)
247 || TARGET_AVOID_MFENCE))
248 {
249 emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
250 operands[0], operands[1],
251 operands[2]));
252 DONE;
253 }
254
255 /* Otherwise use a store. */
256 emit_insn (gen_atomic_store<mode>_1 (operands[0], operands[1],
257 operands[2]));
258 }
259 /* ... followed by an MFENCE, if required. */
260 if (is_mm_seq_cst (model))
261 emit_insn (gen_mem_thread_fence (operands[2]));
262 DONE;
263 })
264
265 (define_insn "atomic_store<mode>_1"
266 [(set (match_operand:SWI 0 "memory_operand" "=m")
267 (unspec:SWI [(match_operand:SWI 1 "<nonmemory_operand>" "<r><i>")
268 (match_operand:SI 2 "const_int_operand")]
269 UNSPEC_STA))]
270 ""
271 "%K2mov{<imodesuffix>}\t{%1, %0|%0, %1}")
272
273 (define_insn_and_split "atomic_storedi_fpu"
274 [(set (match_operand:DI 0 "memory_operand" "=m,m,m")
275 (unspec:DI [(match_operand:DI 1 "nonimmediate_operand" "x,m,?r")]
276 UNSPEC_STA))
277 (clobber (match_operand:DI 2 "memory_operand" "=X,X,m"))
278 (clobber (match_scratch:DF 3 "=X,xf,xf"))]
279 "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
280 "#"
281 "&& reload_completed"
282 [(const_int 0)]
283 {
284 rtx dst = operands[0], src = operands[1];
285 rtx mem = operands[2], tmp = operands[3];
286
287 if (SSE_REG_P (src))
288 emit_move_insn (dst, src);
289 else
290 {
291 if (REG_P (src))
292 {
293 emit_move_insn (mem, src);
294 src = mem;
295 }
296
297 if (STACK_REG_P (tmp))
298 {
299 emit_insn (gen_loaddi_via_fpu (tmp, src));
300 emit_insn (gen_storedi_via_fpu (dst, tmp));
301 }
302 else
303 {
304 emit_insn (gen_loaddi_via_sse (tmp, src));
305 emit_insn (gen_storedi_via_sse (dst, tmp));
306 }
307 }
308 DONE;
309 })
310
311 ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
312 ;; operations. But the fix_trunc patterns want way more setup than we want
313 ;; to provide. Note that the scratch is DFmode instead of XFmode in order
314 ;; to make it easy to allocate a scratch in either SSE or FP_REGs above.
315
316 (define_insn "loaddi_via_fpu"
317 [(set (match_operand:DF 0 "register_operand" "=f")
318 (unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
319 UNSPEC_FILD_ATOMIC))]
320 "TARGET_80387"
321 "fild%Z1\t%1"
322 [(set_attr "type" "fmov")
323 (set_attr "mode" "DF")
324 (set_attr "fp_int_src" "true")])
325
326 (define_insn "storedi_via_fpu"
327 [(set (match_operand:DI 0 "memory_operand" "=m")
328 (unspec:DI [(match_operand:DF 1 "register_operand" "f")]
329 UNSPEC_FIST_ATOMIC))]
330 "TARGET_80387"
331 {
332 gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX);
333
334 return "fistp%Z0\t%0";
335 }
336 [(set_attr "type" "fmov")
337 (set_attr "mode" "DI")])
338
339 (define_insn "loaddi_via_sse"
340 [(set (match_operand:DF 0 "register_operand" "=x")
341 (unspec:DF [(match_operand:DI 1 "memory_operand" "m")]
342 UNSPEC_LDX_ATOMIC))]
343 "TARGET_SSE"
344 {
345 if (TARGET_SSE2)
346 return "%vmovq\t{%1, %0|%0, %1}";
347 return "movlps\t{%1, %0|%0, %1}";
348 }
349 [(set_attr "type" "ssemov")
350 (set_attr "mode" "DI")])
351
352 (define_insn "storedi_via_sse"
353 [(set (match_operand:DI 0 "memory_operand" "=m")
354 (unspec:DI [(match_operand:DF 1 "register_operand" "x")]
355 UNSPEC_STX_ATOMIC))]
356 "TARGET_SSE"
357 {
358 if (TARGET_SSE2)
359 return "%vmovq\t{%1, %0|%0, %1}";
360 return "movlps\t{%1, %0|%0, %1}";
361 }
362 [(set_attr "type" "ssemov")
363 (set_attr "mode" "DI")])
364
365 (define_expand "atomic_compare_and_swap<mode>"
366 [(match_operand:QI 0 "register_operand") ;; bool success output
367 (match_operand:SWI124 1 "register_operand") ;; oldval output
368 (match_operand:SWI124 2 "memory_operand") ;; memory
369 (match_operand:SWI124 3 "register_operand") ;; expected input
370 (match_operand:SWI124 4 "register_operand") ;; newval input
371 (match_operand:SI 5 "const_int_operand") ;; is_weak
372 (match_operand:SI 6 "const_int_operand") ;; success model
373 (match_operand:SI 7 "const_int_operand")] ;; failure model
374 "TARGET_CMPXCHG"
375 {
376 if (TARGET_RELAX_CMPXCHG_LOOP)
377 {
378 ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2],
379 operands[3], operands[4], operands[6],
380 false, NULL);
381 }
382 else
383 {
384 emit_insn
385 (gen_atomic_compare_and_swap<mode>_1
386 (operands[1], operands[2], operands[3], operands[4], operands[6]));
387 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
388 const0_rtx);
389 }
390 DONE;
391 })
392
393 (define_mode_iterator CASMODE
394 [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
395 (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
396 (define_mode_attr CASHMODE [(DI "SI") (TI "DI")])
397
398 (define_expand "atomic_compare_and_swap<mode>"
399 [(match_operand:QI 0 "register_operand") ;; bool success output
400 (match_operand:CASMODE 1 "register_operand") ;; oldval output
401 (match_operand:CASMODE 2 "memory_operand") ;; memory
402 (match_operand:CASMODE 3 "register_operand") ;; expected input
403 (match_operand:CASMODE 4 "register_operand") ;; newval input
404 (match_operand:SI 5 "const_int_operand") ;; is_weak
405 (match_operand:SI 6 "const_int_operand") ;; success model
406 (match_operand:SI 7 "const_int_operand")] ;; failure model
407 "TARGET_CMPXCHG"
408 {
409 int doubleword = !(<MODE>mode == DImode && TARGET_64BIT);
410 if (TARGET_RELAX_CMPXCHG_LOOP)
411 {
412 ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2],
413 operands[3], operands[4], operands[6],
414 doubleword, NULL);
415 }
416 else
417 {
418 if (!doubleword)
419 {
420 emit_insn
421 (gen_atomic_compare_and_swapdi_1
422 (operands[1], operands[2], operands[3], operands[4], operands[6]));
423 }
424 else
425 {
426 machine_mode hmode = <CASHMODE>mode;
427
428 emit_insn
429 (gen_atomic_compare_and_swap<mode>_doubleword
430 (operands[1], operands[2], operands[3],
431 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
432 operands[6]));
433 }
434
435 ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
436 const0_rtx);
437 }
438 DONE;
439 })
440
441 ;; For double-word compare and swap, we are obliged to play tricks with
442 ;; the input newval (op3:op4) because the Intel register numbering does
443 ;; not match the gcc register numbering, so the pair must be CX:BX.
444
445 (define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
446
447 (define_insn "atomic_compare_and_swap<dwi>_doubleword"
448 [(set (match_operand:<DWI> 0 "register_operand" "=A")
449 (unspec_volatile:<DWI>
450 [(match_operand:<DWI> 1 "memory_operand" "+m")
451 (match_operand:<DWI> 2 "register_operand" "0")
452 (match_operand:DWIH 3 "register_operand" "b")
453 (match_operand:DWIH 4 "register_operand" "c")
454 (match_operand:SI 5 "const_int_operand")]
455 UNSPECV_CMPXCHG))
456 (set (match_dup 1)
457 (unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
458 (set (reg:CCZ FLAGS_REG)
459 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
460 "TARGET_CMPXCHG<doublemodesuffix>B"
461 "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
462
463 (define_insn "atomic_compare_and_swap<mode>_1"
464 [(set (match_operand:SWI 0 "register_operand" "=a")
465 (unspec_volatile:SWI
466 [(match_operand:SWI 1 "memory_operand" "+m")
467 (match_operand:SWI 2 "register_operand" "0")
468 (match_operand:SWI 3 "register_operand" "<r>")
469 (match_operand:SI 4 "const_int_operand")]
470 UNSPECV_CMPXCHG))
471 (set (match_dup 1)
472 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
473 (set (reg:CCZ FLAGS_REG)
474 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
475 "TARGET_CMPXCHG"
476 "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
477
478 (define_peephole2
479 [(set (match_operand:SWI 0 "register_operand")
480 (match_operand:SWI 1 "general_operand"))
481 (parallel [(set (match_dup 0)
482 (unspec_volatile:SWI
483 [(match_operand:SWI 2 "memory_operand")
484 (match_dup 0)
485 (match_operand:SWI 3 "register_operand")
486 (match_operand:SI 4 "const_int_operand")]
487 UNSPECV_CMPXCHG))
488 (set (match_dup 2)
489 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
490 (set (reg:CCZ FLAGS_REG)
491 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
492 (set (reg:CCZ FLAGS_REG)
493 (compare:CCZ (match_operand:SWI 5 "register_operand")
494 (match_operand:SWI 6 "general_operand")))]
495 "(rtx_equal_p (operands[0], operands[5])
496 && rtx_equal_p (operands[1], operands[6]))
497 || (rtx_equal_p (operands[0], operands[6])
498 && rtx_equal_p (operands[1], operands[5]))"
499 [(set (match_dup 0)
500 (match_dup 1))
501 (parallel [(set (match_dup 0)
502 (unspec_volatile:SWI
503 [(match_dup 2)
504 (match_dup 0)
505 (match_dup 3)
506 (match_dup 4)]
507 UNSPECV_CMPXCHG))
508 (set (match_dup 2)
509 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
510 (set (reg:CCZ FLAGS_REG)
511 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
512
513 (define_peephole2
514 [(parallel [(set (match_operand:SWI48 0 "register_operand")
515 (match_operand:SWI48 1 "const_int_operand"))
516 (clobber (reg:CC FLAGS_REG))])
517 (parallel [(set (match_operand:SWI 2 "register_operand")
518 (unspec_volatile:SWI
519 [(match_operand:SWI 3 "memory_operand")
520 (match_dup 2)
521 (match_operand:SWI 4 "register_operand")
522 (match_operand:SI 5 "const_int_operand")]
523 UNSPECV_CMPXCHG))
524 (set (match_dup 3)
525 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
526 (set (reg:CCZ FLAGS_REG)
527 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
528 (set (reg:CCZ FLAGS_REG)
529 (compare:CCZ (match_dup 2)
530 (match_dup 1)))]
531 "REGNO (operands[0]) == REGNO (operands[2])"
532 [(parallel [(set (match_dup 0)
533 (match_dup 1))
534 (clobber (reg:CC FLAGS_REG))])
535 (parallel [(set (match_dup 2)
536 (unspec_volatile:SWI
537 [(match_dup 3)
538 (match_dup 2)
539 (match_dup 4)
540 (match_dup 5)]
541 UNSPECV_CMPXCHG))
542 (set (match_dup 3)
543 (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
544 (set (reg:CCZ FLAGS_REG)
545 (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
546
547 (define_expand "atomic_fetch_<logic><mode>"
548 [(match_operand:SWI124 0 "register_operand")
549 (any_logic:SWI124
550 (match_operand:SWI124 1 "memory_operand")
551 (match_operand:SWI124 2 "register_operand"))
552 (match_operand:SI 3 "const_int_operand")]
553 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
554 {
555 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
556 operands[2], <CODE>, false,
557 false);
558 DONE;
559 })
560
561 (define_expand "atomic_<logic>_fetch<mode>"
562 [(match_operand:SWI124 0 "register_operand")
563 (any_logic:SWI124
564 (match_operand:SWI124 1 "memory_operand")
565 (match_operand:SWI124 2 "register_operand"))
566 (match_operand:SI 3 "const_int_operand")]
567 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
568 {
569 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
570 operands[2], <CODE>, true,
571 false);
572 DONE;
573 })
574
575 (define_expand "atomic_fetch_nand<mode>"
576 [(match_operand:SWI124 0 "register_operand")
577 (match_operand:SWI124 1 "memory_operand")
578 (match_operand:SWI124 2 "register_operand")
579 (match_operand:SI 3 "const_int_operand")]
580 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
581 {
582 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
583 operands[2], NOT, false,
584 false);
585 DONE;
586 })
587
588 (define_expand "atomic_nand_fetch<mode>"
589 [(match_operand:SWI124 0 "register_operand")
590 (match_operand:SWI124 1 "memory_operand")
591 (match_operand:SWI124 2 "register_operand")
592 (match_operand:SI 3 "const_int_operand")]
593 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
594 {
595 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
596 operands[2], NOT, true,
597 false);
598 DONE;
599 })
600
601 (define_expand "atomic_fetch_<logic><mode>"
602 [(match_operand:CASMODE 0 "register_operand")
603 (any_logic:CASMODE
604 (match_operand:CASMODE 1 "memory_operand")
605 (match_operand:CASMODE 2 "register_operand"))
606 (match_operand:SI 3 "const_int_operand")]
607 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
608 {
609 bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
610 || (<MODE>mode == TImode);
611 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
612 operands[2], <CODE>, false,
613 doubleword);
614 DONE;
615 })
616
617 (define_expand "atomic_<logic>_fetch<mode>"
618 [(match_operand:CASMODE 0 "register_operand")
619 (any_logic:CASMODE
620 (match_operand:CASMODE 1 "memory_operand")
621 (match_operand:CASMODE 2 "register_operand"))
622 (match_operand:SI 3 "const_int_operand")]
623 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
624 {
625 bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
626 || (<MODE>mode == TImode);
627 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
628 operands[2], <CODE>, true,
629 doubleword);
630 DONE;
631 })
632
633 (define_expand "atomic_fetch_nand<mode>"
634 [(match_operand:CASMODE 0 "register_operand")
635 (match_operand:CASMODE 1 "memory_operand")
636 (match_operand:CASMODE 2 "register_operand")
637 (match_operand:SI 3 "const_int_operand")]
638 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
639 {
640 bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
641 || (<MODE>mode == TImode);
642 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
643 operands[2], NOT, false,
644 doubleword);
645 DONE;
646 })
647
648 (define_expand "atomic_nand_fetch<mode>"
649 [(match_operand:CASMODE 0 "register_operand")
650 (match_operand:CASMODE 1 "memory_operand")
651 (match_operand:CASMODE 2 "register_operand")
652 (match_operand:SI 3 "const_int_operand")]
653 "TARGET_CMPXCHG && TARGET_RELAX_CMPXCHG_LOOP"
654 {
655 bool doubleword = (<MODE>mode == DImode && !TARGET_64BIT)
656 || (<MODE>mode == TImode);
657 ix86_expand_atomic_fetch_op_loop (operands[0], operands[1],
658 operands[2], NOT, true,
659 doubleword);
660 DONE;
661 })
662
663
664 ;; For operand 2 nonmemory_operand predicate is used instead of
665 ;; register_operand to allow combiner to better optimize atomic
666 ;; additions of constants.
667 (define_insn "atomic_fetch_add<mode>"
668 [(set (match_operand:SWI 0 "register_operand" "=<r>")
669 (unspec_volatile:SWI
670 [(match_operand:SWI 1 "memory_operand" "+m")
671 (match_operand:SI 3 "const_int_operand")] ;; model
672 UNSPECV_XCHG))
673 (set (match_dup 1)
674 (plus:SWI (match_dup 1)
675 (match_operand:SWI 2 "nonmemory_operand" "0")))
676 (clobber (reg:CC FLAGS_REG))]
677 "TARGET_XADD"
678 "lock{%;} %K3xadd{<imodesuffix>}\t{%0, %1|%1, %0}")
679
680 ;; This peephole2 and following insn optimize
681 ;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec}
682 ;; followed by testing of flags instead of lock xadd and comparisons.
683 (define_peephole2
684 [(set (match_operand:SWI 0 "register_operand")
685 (match_operand:SWI 2 "const_int_operand"))
686 (parallel [(set (match_dup 0)
687 (unspec_volatile:SWI
688 [(match_operand:SWI 1 "memory_operand")
689 (match_operand:SI 4 "const_int_operand")]
690 UNSPECV_XCHG))
691 (set (match_dup 1)
692 (plus:SWI (match_dup 1)
693 (match_dup 0)))
694 (clobber (reg:CC FLAGS_REG))])
695 (set (reg:CCZ FLAGS_REG)
696 (compare:CCZ (match_dup 0)
697 (match_operand:SWI 3 "const_int_operand")))]
698 "peep2_reg_dead_p (3, operands[0])
699 && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
700 == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
701 && !reg_overlap_mentioned_p (operands[0], operands[1])"
702 [(parallel [(set (reg:CCZ FLAGS_REG)
703 (compare:CCZ
704 (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
705 UNSPECV_XCHG)
706 (match_dup 3)))
707 (set (match_dup 1)
708 (plus:SWI (match_dup 1)
709 (match_dup 2)))])])
710
711 ;; Likewise, but for the -Os special case of *mov<mode>_or.
712 (define_peephole2
713 [(parallel [(set (match_operand:SWI 0 "register_operand")
714 (match_operand:SWI 2 "constm1_operand"))
715 (clobber (reg:CC FLAGS_REG))])
716 (parallel [(set (match_dup 0)
717 (unspec_volatile:SWI
718 [(match_operand:SWI 1 "memory_operand")
719 (match_operand:SI 4 "const_int_operand")]
720 UNSPECV_XCHG))
721 (set (match_dup 1)
722 (plus:SWI (match_dup 1)
723 (match_dup 0)))
724 (clobber (reg:CC FLAGS_REG))])
725 (set (reg:CCZ FLAGS_REG)
726 (compare:CCZ (match_dup 0)
727 (match_operand:SWI 3 "const_int_operand")))]
728 "peep2_reg_dead_p (3, operands[0])
729 && (unsigned HOST_WIDE_INT) INTVAL (operands[2])
730 == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
731 && !reg_overlap_mentioned_p (operands[0], operands[1])"
732 [(parallel [(set (reg:CCZ FLAGS_REG)
733 (compare:CCZ
734 (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
735 UNSPECV_XCHG)
736 (match_dup 3)))
737 (set (match_dup 1)
738 (plus:SWI (match_dup 1)
739 (match_dup 2)))])])
740
741 (define_insn "*atomic_fetch_add_cmp<mode>"
742 [(set (reg:CCZ FLAGS_REG)
743 (compare:CCZ
744 (unspec_volatile:SWI
745 [(match_operand:SWI 0 "memory_operand" "+m")
746 (match_operand:SI 3 "const_int_operand")] ;; model
747 UNSPECV_XCHG)
748 (match_operand:SWI 2 "const_int_operand" "i")))
749 (set (match_dup 0)
750 (plus:SWI (match_dup 0)
751 (match_operand:SWI 1 "const_int_operand" "i")))]
752 "(unsigned HOST_WIDE_INT) INTVAL (operands[1])
753 == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])"
754 {
755 if (incdec_operand (operands[1], <MODE>mode))
756 {
757 if (operands[1] == const1_rtx)
758 return "lock{%;} %K3inc{<imodesuffix>}\t%0";
759 else
760 {
761 gcc_assert (operands[1] == constm1_rtx);
762 return "lock{%;} %K3dec{<imodesuffix>}\t%0";
763 }
764 }
765
766 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
767 return "lock{%;} %K3sub{<imodesuffix>}\t{%1, %0|%0, %1}";
768
769 return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
770 })
771
772 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
773 ;; In addition, it is always a full barrier, so we can ignore the memory model.
774 (define_insn "atomic_exchange<mode>"
775 [(set (match_operand:SWI 0 "register_operand" "=<r>") ;; output
776 (unspec_volatile:SWI
777 [(match_operand:SWI 1 "memory_operand" "+m") ;; memory
778 (match_operand:SI 3 "const_int_operand")] ;; model
779 UNSPECV_XCHG))
780 (set (match_dup 1)
781 (match_operand:SWI 2 "register_operand" "0"))] ;; input
782 ""
783 "%K3xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
784
785 (define_insn "atomic_add<mode>"
786 [(set (match_operand:SWI 0 "memory_operand" "+m")
787 (unspec_volatile:SWI
788 [(plus:SWI (match_dup 0)
789 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
790 (match_operand:SI 2 "const_int_operand")] ;; model
791 UNSPECV_LOCK))
792 (clobber (reg:CC FLAGS_REG))]
793 ""
794 {
795 if (incdec_operand (operands[1], <MODE>mode))
796 {
797 if (operands[1] == const1_rtx)
798 return "lock{%;} %K2inc{<imodesuffix>}\t%0";
799 else
800 {
801 gcc_assert (operands[1] == constm1_rtx);
802 return "lock{%;} %K2dec{<imodesuffix>}\t%0";
803 }
804 }
805
806 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
807 return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
808
809 return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
810 })
811
812 (define_insn "atomic_sub<mode>"
813 [(set (match_operand:SWI 0 "memory_operand" "+m")
814 (unspec_volatile:SWI
815 [(minus:SWI (match_dup 0)
816 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
817 (match_operand:SI 2 "const_int_operand")] ;; model
818 UNSPECV_LOCK))
819 (clobber (reg:CC FLAGS_REG))]
820 ""
821 {
822 if (incdec_operand (operands[1], <MODE>mode))
823 {
824 if (operands[1] == const1_rtx)
825 return "lock{%;} %K2dec{<imodesuffix>}\t%0";
826 else
827 {
828 gcc_assert (operands[1] == constm1_rtx);
829 return "lock{%;} %K2inc{<imodesuffix>}\t%0";
830 }
831 }
832
833 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
834 return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
835
836 return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
837 })
838
839 (define_insn "atomic_<logic><mode>"
840 [(set (match_operand:SWI 0 "memory_operand" "+m")
841 (unspec_volatile:SWI
842 [(any_logic:SWI (match_dup 0)
843 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
844 (match_operand:SI 2 "const_int_operand")] ;; model
845 UNSPECV_LOCK))
846 (clobber (reg:CC FLAGS_REG))]
847 ""
848 "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
849
850 (define_expand "atomic_bit_test_and_set<mode>"
851 [(match_operand:SWI248 0 "register_operand")
852 (match_operand:SWI248 1 "memory_operand")
853 (match_operand:SWI248 2 "nonmemory_operand")
854 (match_operand:SI 3 "const_int_operand") ;; model
855 (match_operand:SI 4 "const_int_operand")]
856 ""
857 {
858 emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
859 operands[3]));
860 rtx tem = gen_reg_rtx (QImode);
861 ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
862 rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
863 if (operands[4] == const0_rtx)
864 result = expand_simple_binop (<MODE>mode, ASHIFT, result,
865 operands[2], operands[0], 0, OPTAB_WIDEN);
866 if (result != operands[0])
867 emit_move_insn (operands[0], result);
868 DONE;
869 })
870
871 (define_insn "atomic_bit_test_and_set<mode>_1"
872 [(set (reg:CCC FLAGS_REG)
873 (compare:CCC
874 (unspec_volatile:SWI248
875 [(match_operand:SWI248 0 "memory_operand" "+m")
876 (match_operand:SI 2 "const_int_operand")] ;; model
877 UNSPECV_XCHG)
878 (const_int 0)))
879 (set (zero_extract:SWI248 (match_dup 0)
880 (const_int 1)
881 (match_operand:SWI248 1 "nonmemory_operand" "rN"))
882 (const_int 1))]
883 ""
884 "lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
885
886 (define_expand "atomic_bit_test_and_complement<mode>"
887 [(match_operand:SWI248 0 "register_operand")
888 (match_operand:SWI248 1 "memory_operand")
889 (match_operand:SWI248 2 "nonmemory_operand")
890 (match_operand:SI 3 "const_int_operand") ;; model
891 (match_operand:SI 4 "const_int_operand")]
892 ""
893 {
894 emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
895 operands[2],
896 operands[3]));
897 rtx tem = gen_reg_rtx (QImode);
898 ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
899 rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
900 if (operands[4] == const0_rtx)
901 result = expand_simple_binop (<MODE>mode, ASHIFT, result,
902 operands[2], operands[0], 0, OPTAB_WIDEN);
903 if (result != operands[0])
904 emit_move_insn (operands[0], result);
905 DONE;
906 })
907
908 (define_insn "atomic_bit_test_and_complement<mode>_1"
909 [(set (reg:CCC FLAGS_REG)
910 (compare:CCC
911 (unspec_volatile:SWI248
912 [(match_operand:SWI248 0 "memory_operand" "+m")
913 (match_operand:SI 2 "const_int_operand")] ;; model
914 UNSPECV_XCHG)
915 (const_int 0)))
916 (set (zero_extract:SWI248 (match_dup 0)
917 (const_int 1)
918 (match_operand:SWI248 1 "nonmemory_operand" "rN"))
919 (not:SWI248 (zero_extract:SWI248 (match_dup 0)
920 (const_int 1)
921 (match_dup 1))))]
922 ""
923 "lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
924
925 (define_expand "atomic_bit_test_and_reset<mode>"
926 [(match_operand:SWI248 0 "register_operand")
927 (match_operand:SWI248 1 "memory_operand")
928 (match_operand:SWI248 2 "nonmemory_operand")
929 (match_operand:SI 3 "const_int_operand") ;; model
930 (match_operand:SI 4 "const_int_operand")]
931 ""
932 {
933 emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
934 operands[3]));
935 rtx tem = gen_reg_rtx (QImode);
936 ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
937 rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
938 if (operands[4] == const0_rtx)
939 result = expand_simple_binop (<MODE>mode, ASHIFT, result,
940 operands[2], operands[0], 0, OPTAB_WIDEN);
941 if (result != operands[0])
942 emit_move_insn (operands[0], result);
943 DONE;
944 })
945
946 (define_insn "atomic_bit_test_and_reset<mode>_1"
947 [(set (reg:CCC FLAGS_REG)
948 (compare:CCC
949 (unspec_volatile:SWI248
950 [(match_operand:SWI248 0 "memory_operand" "+m")
951 (match_operand:SI 2 "const_int_operand")] ;; model
952 UNSPECV_XCHG)
953 (const_int 0)))
954 (set (zero_extract:SWI248 (match_dup 0)
955 (const_int 1)
956 (match_operand:SWI248 1 "nonmemory_operand" "rN"))
957 (const_int 0))]
958 ""
959 "lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
960
961 (define_expand "atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>"
962 [(match_operand:QI 0 "register_operand")
963 (plusminus:SWI (match_operand:SWI 1 "memory_operand")
964 (match_operand:SWI 2 "nonmemory_operand"))
965 (match_operand:SI 3 "const_int_operand") ;; model
966 (match_operand:SI 4 "const_int_operand")]
967 ""
968 {
969 if (INTVAL (operands[4]) == GT || INTVAL (operands[4]) == LE)
970 FAIL;
971 emit_insn (gen_atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1 (operands[1],
972 operands[2],
973 operands[3]));
974 ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
975 gen_rtx_REG (CCGOCmode, FLAGS_REG), const0_rtx);
976 DONE;
977 })
978
979 (define_insn "atomic_add_fetch_cmp_0<mode>_1"
980 [(set (reg:CCGOC FLAGS_REG)
981 (compare:CCGOC
982 (plus:SWI
983 (unspec_volatile:SWI
984 [(match_operand:SWI 0 "memory_operand" "+m")
985 (match_operand:SI 2 "const_int_operand")] ;; model
986 UNSPECV_XCHG)
987 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
988 (const_int 0)))
989 (set (match_dup 0)
990 (plus:SWI (match_dup 0) (match_dup 1)))]
991 ""
992 {
993 if (incdec_operand (operands[1], <MODE>mode))
994 {
995 if (operands[1] == const1_rtx)
996 return "lock{%;} %K2inc{<imodesuffix>}\t%0";
997 else
998 return "lock{%;} %K2dec{<imodesuffix>}\t%0";
999 }
1000
1001 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
1002 return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
1003
1004 return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
1005 })
1006
1007 (define_insn "atomic_sub_fetch_cmp_0<mode>_1"
1008 [(set (reg:CCGOC FLAGS_REG)
1009 (compare:CCGOC
1010 (minus:SWI
1011 (unspec_volatile:SWI
1012 [(match_operand:SWI 0 "memory_operand" "+m")
1013 (match_operand:SI 2 "const_int_operand")] ;; model
1014 UNSPECV_XCHG)
1015 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
1016 (const_int 0)))
1017 (set (match_dup 0)
1018 (minus:SWI (match_dup 0) (match_dup 1)))]
1019 ""
1020 {
1021 if (incdec_operand (operands[1], <MODE>mode))
1022 {
1023 if (operands[1] != const1_rtx)
1024 return "lock{%;} %K2inc{<imodesuffix>}\t%0";
1025 else
1026 return "lock{%;} %K2dec{<imodesuffix>}\t%0";
1027 }
1028
1029 if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
1030 return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
1031
1032 return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
1033 })
1034
1035 (define_expand "atomic_<logic>_fetch_cmp_0<mode>"
1036 [(match_operand:QI 0 "register_operand")
1037 (any_logic:SWI (match_operand:SWI 1 "memory_operand")
1038 (match_operand:SWI 2 "nonmemory_operand"))
1039 (match_operand:SI 3 "const_int_operand") ;; model
1040 (match_operand:SI 4 "const_int_operand")]
1041 ""
1042 {
1043 emit_insn (gen_atomic_<logic>_fetch_cmp_0<mode>_1 (operands[1], operands[2],
1044 operands[3]));
1045 ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
1046 gen_rtx_REG (CCNOmode, FLAGS_REG), const0_rtx);
1047 DONE;
1048 })
1049
1050 (define_insn "atomic_<logic>_fetch_cmp_0<mode>_1"
1051 [(set (reg:CCNO FLAGS_REG)
1052 (compare:CCNO
1053 (any_logic:SWI
1054 (unspec_volatile:SWI
1055 [(match_operand:SWI 0 "memory_operand" "+m")
1056 (match_operand:SI 2 "const_int_operand")] ;; model
1057 UNSPECV_XCHG)
1058 (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
1059 (const_int 0)))
1060 (set (match_dup 0)
1061 (any_logic:SWI (match_dup 0) (match_dup 1)))]
1062 ""
1063 "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
1064