1b8e80941Smrg#
2b8e80941Smrg# Copyright (C) 2014 Intel Corporation
3b8e80941Smrg#
4b8e80941Smrg# Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg# copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg# to deal in the Software without restriction, including without limitation
7b8e80941Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg# and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg# Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg#
11b8e80941Smrg# The above copyright notice and this permission notice (including the next
12b8e80941Smrg# paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg# Software.
14b8e80941Smrg#
15b8e80941Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg# IN THE SOFTWARE.
22b8e80941Smrg#
23b8e80941Smrg# Authors:
24b8e80941Smrg#    Jason Ekstrand (jason@jlekstrand.net)
25b8e80941Smrg
26b8e80941Smrgfrom __future__ import print_function
27b8e80941Smrg
28b8e80941Smrgfrom collections import OrderedDict
29b8e80941Smrgimport nir_algebraic
30b8e80941Smrgfrom nir_opcodes import type_sizes
31b8e80941Smrgimport itertools
32b8e80941Smrg
33b8e80941Smrg# Convenience variables
34b8e80941Smrga = 'a'
35b8e80941Smrgb = 'b'
36b8e80941Smrgc = 'c'
37b8e80941Smrgd = 'd'
38b8e80941Smrge = 'e'
39b8e80941Smrg
40b8e80941Smrg# Written in the form (<search>, <replace>) where <search> is an expression
41b8e80941Smrg# and <replace> is either an expression or a value.  An expression is
42b8e80941Smrg# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
43b8e80941Smrg# where each source is either an expression or a value.  A value can be
44b8e80941Smrg# either a numeric constant or a string representing a variable name.
45b8e80941Smrg#
46b8e80941Smrg# If the opcode in a search expression is prefixed by a '~' character, this
47b8e80941Smrg# indicates that the operation is inexact.  Such operations will only get
48b8e80941Smrg# applied to SSA values that do not have the exact bit set.  This should be
49b8e80941Smrg# used by by any optimizations that are not bit-for-bit exact.  It should not,
50b8e80941Smrg# however, be used for backend-requested lowering operations as those need to
51b8e80941Smrg# happen regardless of precision.
52b8e80941Smrg#
53b8e80941Smrg# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
54b8e80941Smrg# that the given variable will only match constants and the type indicates that
55b8e80941Smrg# the given variable will only match values from ALU instructions with the
56b8e80941Smrg# given output type, and (cond) specifies an additional condition function
57b8e80941Smrg# (see nir_search_helpers.h).
58b8e80941Smrg#
59b8e80941Smrg# For constants, you have to be careful to make sure that it is the right
60b8e80941Smrg# type because python is unaware of the source and destination types of the
61b8e80941Smrg# opcodes.
62b8e80941Smrg#
63b8e80941Smrg# All expression types can have a bit-size specified.  For opcodes, this
64b8e80941Smrg# looks like "op@32", for variables it is "a@32" or "a@uint32" to specify a
65b8e80941Smrg# type and size.  In the search half of the expression this indicates that it
66b8e80941Smrg# should only match that particular bit-size.  In the replace half of the
67b8e80941Smrg# expression this indicates that the constructed value should have that
68b8e80941Smrg# bit-size.
69b8e80941Smrg
70b8e80941Smrgoptimizations = [
71b8e80941Smrg
72b8e80941Smrg   (('imul', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b))),
73b8e80941Smrg   (('imul', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b))))),
74b8e80941Smrg   (('unpack_64_2x32_split_x', ('imul_2x32_64(is_used_once)', a, b)), ('imul', a, b)),
75b8e80941Smrg   (('unpack_64_2x32_split_x', ('umul_2x32_64(is_used_once)', a, b)), ('imul', a, b)),
76b8e80941Smrg   (('imul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('imul_high', a, b)), 'options->lower_mul_2x32_64'),
77b8e80941Smrg   (('umul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('umul_high', a, b)), 'options->lower_mul_2x32_64'),
78b8e80941Smrg   (('udiv', a, 1), a),
79b8e80941Smrg   (('idiv', a, 1), a),
80b8e80941Smrg   (('umod', a, 1), 0),
81b8e80941Smrg   (('imod', a, 1), 0),
82b8e80941Smrg   (('udiv', a, '#b@32(is_pos_power_of_two)'), ('ushr', a, ('find_lsb', b))),
83b8e80941Smrg   (('idiv', a, '#b@32(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), 'options->lower_idiv'),
84b8e80941Smrg   (('idiv', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), 'options->lower_idiv'),
85b8e80941Smrg   (('umod', a, '#b(is_pos_power_of_two)'),    ('iand', a, ('isub', b, 1))),
86b8e80941Smrg
87b8e80941Smrg   (('fneg', ('fneg', a)), a),
88b8e80941Smrg   (('ineg', ('ineg', a)), a),
89b8e80941Smrg   (('fabs', ('fabs', a)), ('fabs', a)),
90b8e80941Smrg   (('fabs', ('fneg', a)), ('fabs', a)),
91b8e80941Smrg   (('fabs', ('u2f', a)), ('u2f', a)),
92b8e80941Smrg   (('iabs', ('iabs', a)), ('iabs', a)),
93b8e80941Smrg   (('iabs', ('ineg', a)), ('iabs', a)),
94b8e80941Smrg   (('f2b', ('fneg', a)), ('f2b', a)),
95b8e80941Smrg   (('i2b', ('ineg', a)), ('i2b', a)),
96b8e80941Smrg   (('~fadd', a, 0.0), a),
97b8e80941Smrg   (('iadd', a, 0), a),
98b8e80941Smrg   (('usadd_4x8', a, 0), a),
99b8e80941Smrg   (('usadd_4x8', a, ~0), ~0),
100b8e80941Smrg   (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
101b8e80941Smrg   (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
102b8e80941Smrg   (('~fadd', ('fneg', a), a), 0.0),
103b8e80941Smrg   (('iadd', ('ineg', a), a), 0),
104b8e80941Smrg   (('iadd', ('ineg', a), ('iadd', a, b)), b),
105b8e80941Smrg   (('iadd', a, ('iadd', ('ineg', a), b)), b),
106b8e80941Smrg   (('~fadd', ('fneg', a), ('fadd', a, b)), b),
107b8e80941Smrg   (('~fadd', a, ('fadd', ('fneg', a), b)), b),
108b8e80941Smrg   (('~fmul', a, 0.0), 0.0),
109b8e80941Smrg   (('imul', a, 0), 0),
110b8e80941Smrg   (('umul_unorm_4x8', a, 0), 0),
111b8e80941Smrg   (('umul_unorm_4x8', a, ~0), a),
112b8e80941Smrg   (('fmul', a, 1.0), a),
113b8e80941Smrg   (('imul', a, 1), a),
114b8e80941Smrg   (('fmul', a, -1.0), ('fneg', a)),
115b8e80941Smrg   (('imul', a, -1), ('ineg', a)),
116b8e80941Smrg   # If a < 0: fsign(a)*a*a => -1*a*a => -a*a => abs(a)*a
117b8e80941Smrg   # If a > 0: fsign(a)*a*a => 1*a*a => a*a => abs(a)*a
118b8e80941Smrg   # If a == 0: fsign(a)*a*a => 0*0*0 => abs(0)*0
119b8e80941Smrg   (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)),
120b8e80941Smrg   (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)),
121b8e80941Smrg   (('~ffma', 0.0, a, b), b),
122b8e80941Smrg   (('~ffma', a, 0.0, b), b),
123b8e80941Smrg   (('~ffma', a, b, 0.0), ('fmul', a, b)),
124b8e80941Smrg   (('ffma', a, 1.0, b), ('fadd', a, b)),
125b8e80941Smrg   (('ffma', 1.0, a, b), ('fadd', a, b)),
126b8e80941Smrg   (('~flrp', a, b, 0.0), a),
127b8e80941Smrg   (('~flrp', a, b, 1.0), b),
128b8e80941Smrg   (('~flrp', a, a, b), a),
129b8e80941Smrg   (('~flrp', 0.0, a, b), ('fmul', a, b)),
130b8e80941Smrg   (('~flrp', a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp32'),
131b8e80941Smrg   (('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
132b8e80941Smrg   (('flrp@16', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp16'),
133b8e80941Smrg   (('flrp@32', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp32'),
134b8e80941Smrg   (('flrp@64', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp64'),
135b8e80941Smrg   (('ftrunc', a), ('bcsel', ('flt', a, 0.0), ('fneg', ('ffloor', ('fabs', a))), ('ffloor', ('fabs', a))), 'options->lower_ftrunc'),
136b8e80941Smrg   (('ffloor', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
137b8e80941Smrg   (('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
138b8e80941Smrg   (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
139b8e80941Smrg   (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
140b8e80941Smrg   (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
141b8e80941Smrg   (('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp32'),
142b8e80941Smrg   (('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg',         c ))), ('fmul', b,         c )), ('flrp', a, b, c), '!options->lower_flrp64'),
143b8e80941Smrg   (('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
144b8e80941Smrg   (('~fadd@32', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
145b8e80941Smrg   (('~fadd@64', a, ('fmul',         c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
146b8e80941Smrg   (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
147b8e80941Smrg   (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
148b8e80941Smrg
149b8e80941Smrg   (('~fmul', ('fadd', ('iand', ('ineg', ('b2i32', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
150b8e80941Smrg    ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
151b8e80941Smrg
152b8e80941Smrg   (('fdot4', ('vec4', a, b,   c,   1.0), d), ('fdph',  ('vec3', a, b, c), d)),
153b8e80941Smrg   (('fdot4', ('vec4', a, 0.0, 0.0, 0.0), b), ('fmul', a, b)),
154b8e80941Smrg   (('fdot4', ('vec4', a, b,   0.0, 0.0), c), ('fdot2', ('vec2', a, b), c)),
155b8e80941Smrg   (('fdot4', ('vec4', a, b,   c,   0.0), d), ('fdot3', ('vec3', a, b, c), d)),
156b8e80941Smrg
157b8e80941Smrg   (('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)),
158b8e80941Smrg   (('fdot3', ('vec3', a, b,   0.0), c), ('fdot2', ('vec2', a, b), c)),
159b8e80941Smrg
160b8e80941Smrg   # (a * #b + #c) << #d
161b8e80941Smrg   # ((a * #b) << #d) + (#c << #d)
162b8e80941Smrg   # (a * (#b << #d)) + (#c << #d)
163b8e80941Smrg   (('ishl', ('iadd', ('imul', a, '#b'), '#c'), '#d'),
164b8e80941Smrg    ('iadd', ('imul', a, ('ishl', b, d)), ('ishl', c, d))),
165b8e80941Smrg
166b8e80941Smrg   # (a * #b) << #c
167b8e80941Smrg   # a * (#b << #c)
168b8e80941Smrg   (('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))),
169b8e80941Smrg
170b8e80941Smrg   # Comparison simplifications
171b8e80941Smrg   (('~inot', ('flt', a, b)), ('fge', a, b)),
172b8e80941Smrg   (('~inot', ('fge', a, b)), ('flt', a, b)),
173b8e80941Smrg   (('~inot', ('feq', a, b)), ('fne', a, b)),
174b8e80941Smrg   (('~inot', ('fne', a, b)), ('feq', a, b)),
175b8e80941Smrg   (('inot', ('ilt', a, b)), ('ige', a, b)),
176b8e80941Smrg   (('inot', ('ult', a, b)), ('uge', a, b)),
177b8e80941Smrg   (('inot', ('ige', a, b)), ('ilt', a, b)),
178b8e80941Smrg   (('inot', ('uge', a, b)), ('ult', a, b)),
179b8e80941Smrg   (('inot', ('ieq', a, b)), ('ine', a, b)),
180b8e80941Smrg   (('inot', ('ine', a, b)), ('ieq', a, b)),
181b8e80941Smrg
182b8e80941Smrg   # 0.0 >= b2f(a)
183b8e80941Smrg   # b2f(a) <= 0.0
184b8e80941Smrg   # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
185b8e80941Smrg   # inot(a)
186b8e80941Smrg   (('fge', 0.0, ('b2f', 'a@1')), ('inot', a)),
187b8e80941Smrg
188b8e80941Smrg   (('fge', ('fneg', ('b2f', 'a@1')), 0.0), ('inot', a)),
189b8e80941Smrg
190b8e80941Smrg   (('fne', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
191b8e80941Smrg   (('fne', ('fmax', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
192b8e80941Smrg   (('fne', ('bcsel', a, 1.0, ('b2f', 'b@1'))   , 0.0), ('ior', a, b)),
193b8e80941Smrg   (('fne', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))),      ('ior', a, b)),
194b8e80941Smrg   (('fne', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
195b8e80941Smrg   (('fne', ('fmin', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
196b8e80941Smrg   (('fne', ('bcsel', a, ('b2f', 'b@1'), 0.0)   , 0.0), ('iand', a, b)),
197b8e80941Smrg   (('fne', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ixor', a, b)),
198b8e80941Smrg   (('fne',          ('b2f', 'a@1') ,          ('b2f', 'b@1') ),      ('ixor', a, b)),
199b8e80941Smrg   (('fne', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))),      ('ixor', a, b)),
200b8e80941Smrg   (('feq', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
201b8e80941Smrg   (('feq', ('fmax', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
202b8e80941Smrg   (('feq', ('bcsel', a, 1.0, ('b2f', 'b@1'))   , 0.0), ('inot', ('ior', a, b))),
203b8e80941Smrg   (('feq', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))),      ('inot', ('ior', a, b))),
204b8e80941Smrg   (('feq', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
205b8e80941Smrg   (('feq', ('fmin', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
206b8e80941Smrg   (('feq', ('bcsel', a, ('b2f', 'b@1'), 0.0)   , 0.0), ('inot', ('iand', a, b))),
207b8e80941Smrg   (('feq', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ieq', a, b)),
208b8e80941Smrg   (('feq',          ('b2f', 'a@1') ,          ('b2f', 'b@1') ),      ('ieq', a, b)),
209b8e80941Smrg   (('feq', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))),      ('ieq', a, b)),
210b8e80941Smrg
211b8e80941Smrg   # -(b2f(a) + b2f(b)) < 0
212b8e80941Smrg   # 0 < b2f(a) + b2f(b)
213b8e80941Smrg   # 0 != b2f(a) + b2f(b)       b2f must be 0 or 1, so the sum is non-negative
214b8e80941Smrg   # a || b
215b8e80941Smrg   (('flt', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('ior', a, b)),
216b8e80941Smrg   (('flt', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('ior', a, b)),
217b8e80941Smrg
218b8e80941Smrg   # -(b2f(a) + b2f(b)) >= 0
219b8e80941Smrg   # 0 >= b2f(a) + b2f(b)
220b8e80941Smrg   # 0 == b2f(a) + b2f(b)       b2f must be 0 or 1, so the sum is non-negative
221b8e80941Smrg   # !(a || b)
222b8e80941Smrg   (('fge', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('inot', ('ior', a, b))),
223b8e80941Smrg   (('fge', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('inot', ('ior', a, b))),
224b8e80941Smrg
225b8e80941Smrg   (('flt', a, ('fneg', a)), ('flt', a, 0.0)),
226b8e80941Smrg   (('fge', a, ('fneg', a)), ('fge', a, 0.0)),
227b8e80941Smrg
228b8e80941Smrg   # Some optimizations (below) convert things like (a < b || c < b) into
229b8e80941Smrg   # (min(a, c) < b).  However, this interfers with the previous optimizations
230b8e80941Smrg   # that try to remove comparisons with negated sums of b2f.  This just
231b8e80941Smrg   # breaks that apart.
232b8e80941Smrg   (('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')))), 0.0),
233b8e80941Smrg    ('ior', ('flt', c, 0.0), ('ior', a, b))),
234b8e80941Smrg
235b8e80941Smrg   (('~flt', ('fadd', a, b), a), ('flt', b, 0.0)),
236b8e80941Smrg   (('~fge', ('fadd', a, b), a), ('fge', b, 0.0)),
237b8e80941Smrg   (('~feq', ('fadd', a, b), a), ('feq', b, 0.0)),
238b8e80941Smrg   (('~fne', ('fadd', a, b), a), ('fne', b, 0.0)),
239b8e80941Smrg
240b8e80941Smrg   # Cannot remove the addition from ilt or ige due to overflow.
241b8e80941Smrg   (('ieq', ('iadd', a, b), a), ('ieq', b, 0)),
242b8e80941Smrg   (('ine', ('iadd', a, b), a), ('ine', b, 0)),
243b8e80941Smrg
244b8e80941Smrg   # fmin(-b2f(a), b) >= 0.0
245b8e80941Smrg   # -b2f(a) >= 0.0 && b >= 0.0
246b8e80941Smrg   # -b2f(a) == 0.0 && b >= 0.0    -b2f can only be 0 or -1, never >0
247b8e80941Smrg   # b2f(a) == 0.0 && b >= 0.0
248b8e80941Smrg   # a == False && b >= 0.0
249b8e80941Smrg   # !a && b >= 0.0
250b8e80941Smrg   #
251b8e80941Smrg   # The fge in the second replacement is not a typo.  I leave the proof that
252b8e80941Smrg   # "fmin(-b2f(a), b) >= 0 <=> fmin(-b2f(a), b) == 0" as an exercise for the
253b8e80941Smrg   # reader.
254b8e80941Smrg   (('fge', ('fmin', ('fneg', ('b2f', 'a@1')), 'b@1'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
255b8e80941Smrg   (('feq', ('fmin', ('fneg', ('b2f', 'a@1')), 'b@1'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
256b8e80941Smrg
257b8e80941Smrg   (('feq', ('b2f', 'a@1'), 0.0), ('inot', a)),
258b8e80941Smrg   (('fne', ('b2f', 'a@1'), 0.0), a),
259b8e80941Smrg   (('ieq', ('b2i', 'a@1'), 0),   ('inot', a)),
260b8e80941Smrg   (('ine', ('b2i', 'a@1'), 0),   a),
261b8e80941Smrg
262b8e80941Smrg   (('fne', ('u2f', a), 0.0), ('ine', a, 0)),
263b8e80941Smrg   (('feq', ('u2f', a), 0.0), ('ieq', a, 0)),
264b8e80941Smrg   (('fge', ('u2f', a), 0.0), True),
265b8e80941Smrg   (('fge', 0.0, ('u2f', a)), ('uge', 0, a)),    # ieq instead?
266b8e80941Smrg   (('flt', ('u2f', a), 0.0), False),
267b8e80941Smrg   (('flt', 0.0, ('u2f', a)), ('ult', 0, a)),    # ine instead?
268b8e80941Smrg   (('fne', ('i2f', a), 0.0), ('ine', a, 0)),
269b8e80941Smrg   (('feq', ('i2f', a), 0.0), ('ieq', a, 0)),
270b8e80941Smrg   (('fge', ('i2f', a), 0.0), ('ige', a, 0)),
271b8e80941Smrg   (('fge', 0.0, ('i2f', a)), ('ige', 0, a)),
272b8e80941Smrg   (('flt', ('i2f', a), 0.0), ('ilt', a, 0)),
273b8e80941Smrg   (('flt', 0.0, ('i2f', a)), ('ilt', 0, a)),
274b8e80941Smrg
275b8e80941Smrg   # 0.0 < fabs(a)
276b8e80941Smrg   # fabs(a) > 0.0
277b8e80941Smrg   # fabs(a) != 0.0 because fabs(a) must be >= 0
278b8e80941Smrg   # a != 0.0
279b8e80941Smrg   (('~flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
280b8e80941Smrg
281b8e80941Smrg   # -fabs(a) < 0.0
282b8e80941Smrg   # fabs(a) > 0.0
283b8e80941Smrg   (('~flt', ('fneg', ('fabs', a)), 0.0), ('fne', a, 0.0)),
284b8e80941Smrg
285b8e80941Smrg   # 0.0 >= fabs(a)
286b8e80941Smrg   # 0.0 == fabs(a)   because fabs(a) must be >= 0
287b8e80941Smrg   # 0.0 == a
288b8e80941Smrg   (('fge', 0.0, ('fabs', a)), ('feq', a, 0.0)),
289b8e80941Smrg
290b8e80941Smrg   # -fabs(a) >= 0.0
291b8e80941Smrg   # 0.0 >= fabs(a)
292b8e80941Smrg   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
293b8e80941Smrg
294b8e80941Smrg   (('fmax',                        ('b2f(is_used_once)', 'a@1'),           ('b2f', 'b@1')),           ('b2f', ('ior', a, b))),
295b8e80941Smrg   (('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('ior', a, b)))),
296b8e80941Smrg   (('fmin',                        ('b2f(is_used_once)', 'a@1'),           ('b2f', 'b@1')),           ('b2f', ('iand', a, b))),
297b8e80941Smrg   (('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('iand', a, b)))),
298b8e80941Smrg
299b8e80941Smrg   # fmin(b2f(a), b)
300b8e80941Smrg   # bcsel(a, fmin(b2f(a), b), fmin(b2f(a), b))
301b8e80941Smrg   # bcsel(a, fmin(b2f(True), b), fmin(b2f(False), b))
302b8e80941Smrg   # bcsel(a, fmin(1.0, b), fmin(0.0, b))
303b8e80941Smrg   #
304b8e80941Smrg   # Since b is a constant, constant folding will eliminate the fmin and the
305b8e80941Smrg   # fmax.  If b is > 1.0, the bcsel will be replaced with a b2f.
306b8e80941Smrg   (('fmin', ('b2f', 'a@1'), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
307b8e80941Smrg
308b8e80941Smrg   (('flt', ('fadd(is_used_once)', a, ('fneg', b)), 0.0), ('flt', a, b)),
309b8e80941Smrg
310b8e80941Smrg   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
311b8e80941Smrg   (('~bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
312b8e80941Smrg   (('~bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
313b8e80941Smrg   (('~bcsel', ('fge', a, b), b, a), ('fmin', a, b)),
314b8e80941Smrg   (('~bcsel', ('fge', b, a), b, a), ('fmax', a, b)),
315b8e80941Smrg   (('bcsel', ('i2b', a), b, c), ('bcsel', ('ine', a, 0), b, c)),
316b8e80941Smrg   (('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
317b8e80941Smrg   (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
318b8e80941Smrg   (('bcsel', a, b, ('bcsel', a, c, d)), ('bcsel', a, b, d)),
319b8e80941Smrg   (('bcsel', a, ('bcsel', b, c, d), ('bcsel(is_used_once)', b, c, 'e')), ('bcsel', b, c, ('bcsel', a, d, 'e'))),
320b8e80941Smrg   (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, c, 'e')), ('bcsel', b, c, ('bcsel', a, d, 'e'))),
321b8e80941Smrg   (('bcsel', a, ('bcsel', b, c, d), ('bcsel(is_used_once)', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)),
322b8e80941Smrg   (('bcsel', a, ('bcsel(is_used_once)', b, c, d), ('bcsel', b, 'e', d)), ('bcsel', b, ('bcsel', a, c, 'e'), d)),
323b8e80941Smrg   (('bcsel', a, True, b), ('ior', a, b)),
324b8e80941Smrg   (('bcsel', a, a, b), ('ior', a, b)),
325b8e80941Smrg   (('bcsel', a, b, False), ('iand', a, b)),
326b8e80941Smrg   (('bcsel', a, b, a), ('iand', a, b)),
327b8e80941Smrg   (('fmin', a, a), a),
328b8e80941Smrg   (('fmax', a, a), a),
329b8e80941Smrg   (('imin', a, a), a),
330b8e80941Smrg   (('imax', a, a), a),
331b8e80941Smrg   (('umin', a, a), a),
332b8e80941Smrg   (('umax', a, a), a),
333b8e80941Smrg   (('fmax', ('fmax', a, b), b), ('fmax', a, b)),
334b8e80941Smrg   (('umax', ('umax', a, b), b), ('umax', a, b)),
335b8e80941Smrg   (('imax', ('imax', a, b), b), ('imax', a, b)),
336b8e80941Smrg   (('fmin', ('fmin', a, b), b), ('fmin', a, b)),
337b8e80941Smrg   (('umin', ('umin', a, b), b), ('umin', a, b)),
338b8e80941Smrg   (('imin', ('imin', a, b), b), ('imin', a, b)),
339b8e80941Smrg   (('fmax', a, ('fneg', a)), ('fabs', a)),
340b8e80941Smrg   (('imax', a, ('ineg', a)), ('iabs', a)),
341b8e80941Smrg   (('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))),
342b8e80941Smrg   (('imin', a, ('ineg', a)), ('ineg', ('iabs', a))),
343b8e80941Smrg   (('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))),
344b8e80941Smrg   (('imin', a, ('ineg', ('iabs', a))), ('ineg', ('iabs', a))),
345b8e80941Smrg   (('fmin', a, ('fabs', a)), a),
346b8e80941Smrg   (('imin', a, ('iabs', a)), a),
347b8e80941Smrg   (('fmax', a, ('fneg', ('fabs', a))), a),
348b8e80941Smrg   (('imax', a, ('ineg', ('iabs', a))), a),
349b8e80941Smrg   (('fmax', a, ('fabs', a)), ('fabs', a)),
350b8e80941Smrg   (('imax', a, ('iabs', a)), ('iabs', a)),
351b8e80941Smrg   (('fmax', a, ('fneg', a)), ('fabs', a)),
352b8e80941Smrg   (('imax', a, ('ineg', a)), ('iabs', a)),
353b8e80941Smrg   (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
354b8e80941Smrg   (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
355b8e80941Smrg   (('fsat', ('fsign', a)), ('b2f', ('flt', 0.0, a))),
356b8e80941Smrg   (('fsat', ('b2f', a)), ('b2f', a)),
357b8e80941Smrg   (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
358b8e80941Smrg   (('fsat', ('fsat', a)), ('fsat', a)),
359b8e80941Smrg   (('fmin', ('fmax', ('fmin', ('fmax', a, b), c), b), c), ('fmin', ('fmax', a, b), c)),
360b8e80941Smrg   (('imin', ('imax', ('imin', ('imax', a, b), c), b), c), ('imin', ('imax', a, b), c)),
361b8e80941Smrg   (('umin', ('umax', ('umin', ('umax', a, b), c), b), c), ('umin', ('umax', a, b), c)),
362b8e80941Smrg   (('fmax', ('fsat', a), '#b@32(is_zero_to_one)'), ('fsat', ('fmax', a, b))),
363b8e80941Smrg   (('fmin', ('fsat', a), '#b@32(is_zero_to_one)'), ('fsat', ('fmin', a, b))),
364b8e80941Smrg   (('extract_u8', ('imin', ('imax', a, 0), 0xff), 0), ('imin', ('imax', a, 0), 0xff)),
365b8e80941Smrg   (('~ior', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
366b8e80941Smrg   (('~ior', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
367b8e80941Smrg   (('~ior', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
368b8e80941Smrg   (('~ior', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
369b8e80941Smrg   (('~ior', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('fmax', b, c))),
370b8e80941Smrg   (('~ior', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('fmin', a, b), c)),
371b8e80941Smrg   (('~ior', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmin', b, c))),
372b8e80941Smrg   (('~ior', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmax', a, b), c)),
373b8e80941Smrg   (('~iand', ('flt(is_used_once)', a, b), ('flt', a, c)), ('flt', a, ('fmin', b, c))),
374b8e80941Smrg   (('~iand', ('flt(is_used_once)', a, c), ('flt', b, c)), ('flt', ('fmax', a, b), c)),
375b8e80941Smrg   (('~iand', ('fge(is_used_once)', a, b), ('fge', a, c)), ('fge', a, ('fmax', b, c))),
376b8e80941Smrg   (('~iand', ('fge(is_used_once)', a, c), ('fge', b, c)), ('fge', ('fmin', a, b), c)),
377b8e80941Smrg   (('~iand', ('flt', a, '#b'), ('flt', a, '#c')), ('flt', a, ('fmin', b, c))),
378b8e80941Smrg   (('~iand', ('flt', '#a', c), ('flt', '#b', c)), ('flt', ('fmax', a, b), c)),
379b8e80941Smrg   (('~iand', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmax', b, c))),
380b8e80941Smrg   (('~iand', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmin', a, b), c)),
381b8e80941Smrg
382b8e80941Smrg   (('ior', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imax', b, c))),
383b8e80941Smrg   (('ior', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imin', a, b), c)),
384b8e80941Smrg   (('ior', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imin', b, c))),
385b8e80941Smrg   (('ior', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imax', a, b), c)),
386b8e80941Smrg   (('ior', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umax', b, c))),
387b8e80941Smrg   (('ior', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umin', a, b), c)),
388b8e80941Smrg   (('ior', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umin', b, c))),
389b8e80941Smrg   (('ior', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umax', a, b), c)),
390b8e80941Smrg   (('iand', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imin', b, c))),
391b8e80941Smrg   (('iand', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imax', a, b), c)),
392b8e80941Smrg   (('iand', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imax', b, c))),
393b8e80941Smrg   (('iand', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imin', a, b), c)),
394b8e80941Smrg   (('iand', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umin', b, c))),
395b8e80941Smrg   (('iand', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umax', a, b), c)),
396b8e80941Smrg   (('iand', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umax', b, c))),
397b8e80941Smrg   (('iand', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umin', a, b), c)),
398b8e80941Smrg
399b8e80941Smrg   # Common pattern like 'if (i == 0 || i == 1 || ...)'
400b8e80941Smrg   (('ior', ('ieq', a, 0), ('ieq', a, 1)), ('uge', 1, a)),
401b8e80941Smrg   (('ior', ('uge', 1, a), ('ieq', a, 2)), ('uge', 2, a)),
402b8e80941Smrg   (('ior', ('uge', 2, a), ('ieq', a, 3)), ('uge', 3, a)),
403b8e80941Smrg
404b8e80941Smrg   # The (i2f32, ...) part is an open-coded fsign.  When that is combined with
405b8e80941Smrg   # the bcsel, it's basically copysign(1.0, a).  There is no copysign in NIR,
406b8e80941Smrg   # so emit an open-coded version of that.
407b8e80941Smrg   (('bcsel@32', ('feq', a, 0.0), 1.0, ('i2f32', ('iadd', ('b2i32', ('flt', 0.0, 'a@32')), ('ineg', ('b2i32', ('flt', 'a@32', 0.0)))))),
408b8e80941Smrg    ('ior', 0x3f800000, ('iand', a, 0x80000000))),
409b8e80941Smrg
410b8e80941Smrg   (('ior', a, ('ieq', a, False)), True),
411b8e80941Smrg   (('ior', a, ('inot', a)), -1),
412b8e80941Smrg
413b8e80941Smrg   (('ine', ('ineg', ('b2i32', 'a@1')), ('ineg', ('b2i32', 'b@1'))), ('ine', a, b)),
414b8e80941Smrg   (('b2i32', ('ine', 'a@1', 'b@1')), ('b2i32', ('ixor', a, b))),
415b8e80941Smrg
416b8e80941Smrg   (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', 'a@32', 'b@32'), 0)),
417b8e80941Smrg
418b8e80941Smrg   # These patterns can result when (a < b || a < c) => (a < min(b, c))
419b8e80941Smrg   # transformations occur before constant propagation and loop-unrolling.
420b8e80941Smrg   (('~flt', a, ('fmax', b, a)), ('flt', a, b)),
421b8e80941Smrg   (('~flt', ('fmin', a, b), a), ('flt', b, a)),
422b8e80941Smrg   (('~fge', a, ('fmin', b, a)), True),
423b8e80941Smrg   (('~fge', ('fmax', a, b), a), True),
424b8e80941Smrg   (('~flt', a, ('fmin', b, a)), False),
425b8e80941Smrg   (('~flt', ('fmax', a, b), a), False),
426b8e80941Smrg   (('~fge', a, ('fmax', b, a)), ('fge', a, b)),
427b8e80941Smrg   (('~fge', ('fmin', a, b), a), ('fge', b, a)),
428b8e80941Smrg
429b8e80941Smrg   (('ilt', a, ('imax', b, a)), ('ilt', a, b)),
430b8e80941Smrg   (('ilt', ('imin', a, b), a), ('ilt', b, a)),
431b8e80941Smrg   (('ige', a, ('imin', b, a)), True),
432b8e80941Smrg   (('ige', ('imax', a, b), a), True),
433b8e80941Smrg   (('ult', a, ('umax', b, a)), ('ult', a, b)),
434b8e80941Smrg   (('ult', ('umin', a, b), a), ('ult', b, a)),
435b8e80941Smrg   (('uge', a, ('umin', b, a)), True),
436b8e80941Smrg   (('uge', ('umax', a, b), a), True),
437b8e80941Smrg   (('ilt', a, ('imin', b, a)), False),
438b8e80941Smrg   (('ilt', ('imax', a, b), a), False),
439b8e80941Smrg   (('ige', a, ('imax', b, a)), ('ige', a, b)),
440b8e80941Smrg   (('ige', ('imin', a, b), a), ('ige', b, a)),
441b8e80941Smrg   (('ult', a, ('umin', b, a)), False),
442b8e80941Smrg   (('ult', ('umax', a, b), a), False),
443b8e80941Smrg   (('uge', a, ('umax', b, a)), ('uge', a, b)),
444b8e80941Smrg   (('uge', ('umin', a, b), a), ('uge', b, a)),
445b8e80941Smrg
446b8e80941Smrg   (('ilt', '#a', ('imax', '#b', c)), ('ior', ('ilt', a, b), ('ilt', a, c))),
447b8e80941Smrg   (('ilt', ('imin', '#a', b), '#c'), ('ior', ('ilt', a, c), ('ilt', b, c))),
448b8e80941Smrg   (('ige', '#a', ('imin', '#b', c)), ('ior', ('ige', a, b), ('ige', a, c))),
449b8e80941Smrg   (('ige', ('imax', '#a', b), '#c'), ('ior', ('ige', a, c), ('ige', b, c))),
450b8e80941Smrg   (('ult', '#a', ('umax', '#b', c)), ('ior', ('ult', a, b), ('ult', a, c))),
451b8e80941Smrg   (('ult', ('umin', '#a', b), '#c'), ('ior', ('ult', a, c), ('ult', b, c))),
452b8e80941Smrg   (('uge', '#a', ('umin', '#b', c)), ('ior', ('uge', a, b), ('uge', a, c))),
453b8e80941Smrg   (('uge', ('umax', '#a', b), '#c'), ('ior', ('uge', a, c), ('uge', b, c))),
454b8e80941Smrg   (('ilt', '#a', ('imin', '#b', c)), ('iand', ('ilt', a, b), ('ilt', a, c))),
455b8e80941Smrg   (('ilt', ('imax', '#a', b), '#c'), ('iand', ('ilt', a, c), ('ilt', b, c))),
456b8e80941Smrg   (('ige', '#a', ('imax', '#b', c)), ('iand', ('ige', a, b), ('ige', a, c))),
457b8e80941Smrg   (('ige', ('imin', '#a', b), '#c'), ('iand', ('ige', a, c), ('ige', b, c))),
458b8e80941Smrg   (('ult', '#a', ('umin', '#b', c)), ('iand', ('ult', a, b), ('ult', a, c))),
459b8e80941Smrg   (('ult', ('umax', '#a', b), '#c'), ('iand', ('ult', a, c), ('ult', b, c))),
460b8e80941Smrg   (('uge', '#a', ('umax', '#b', c)), ('iand', ('uge', a, b), ('uge', a, c))),
461b8e80941Smrg   (('uge', ('umin', '#a', b), '#c'), ('iand', ('uge', a, c), ('uge', b, c))),
462b8e80941Smrg
463b8e80941Smrg   # Thanks to sign extension, the ishr(a, b) is negative if and only if a is
464b8e80941Smrg   # negative.
465b8e80941Smrg   (('bcsel', ('ilt', a, 0), ('ineg', ('ishr', a, b)), ('ishr', a, b)),
466b8e80941Smrg    ('iabs', ('ishr', a, b))),
467b8e80941Smrg   (('iabs', ('ishr', ('iabs', a), b)), ('ishr', ('iabs', a), b)),
468b8e80941Smrg
469b8e80941Smrg   (('fabs', ('slt', a, b)), ('slt', a, b)),
470b8e80941Smrg   (('fabs', ('sge', a, b)), ('sge', a, b)),
471b8e80941Smrg   (('fabs', ('seq', a, b)), ('seq', a, b)),
472b8e80941Smrg   (('fabs', ('sne', a, b)), ('sne', a, b)),
473b8e80941Smrg   (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
474b8e80941Smrg   (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
475b8e80941Smrg   (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
476b8e80941Smrg   (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
477b8e80941Smrg   (('fne', ('fneg', a), a), ('fne', a, 0.0)),
478b8e80941Smrg   (('feq', ('fneg', a), a), ('feq', a, 0.0)),
479b8e80941Smrg   # Emulating booleans
480b8e80941Smrg   (('imul', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
481b8e80941Smrg   (('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
482b8e80941Smrg   (('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
483b8e80941Smrg   (('iand', 'a@bool32', 1.0), ('b2f', a)),
484b8e80941Smrg   # True/False are ~0 and 0 in NIR.  b2i of True is 1, and -1 is ~0 (True).
485b8e80941Smrg   (('ineg', ('b2i32', 'a@32')), a),
486b8e80941Smrg   (('flt', ('fneg', ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
487b8e80941Smrg   (('flt', ('fsub', 0.0, ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
488b8e80941Smrg   # Comparison with the same args.  Note that these are not done for
489b8e80941Smrg   # the float versions because NaN always returns false on float
490b8e80941Smrg   # inequalities.
491b8e80941Smrg   (('ilt', a, a), False),
492b8e80941Smrg   (('ige', a, a), True),
493b8e80941Smrg   (('ieq', a, a), True),
494b8e80941Smrg   (('ine', a, a), False),
495b8e80941Smrg   (('ult', a, a), False),
496b8e80941Smrg   (('uge', a, a), True),
497b8e80941Smrg   # Logical and bit operations
498b8e80941Smrg   (('fand', a, 0.0), 0.0),
499b8e80941Smrg   (('iand', a, a), a),
500b8e80941Smrg   (('iand', a, ~0), a),
501b8e80941Smrg   (('iand', a, 0), 0),
502b8e80941Smrg   (('ior', a, a), a),
503b8e80941Smrg   (('ior', a, 0), a),
504b8e80941Smrg   (('ior', a, True), True),
505b8e80941Smrg   (('fxor', a, a), 0.0),
506b8e80941Smrg   (('ixor', a, a), 0),
507b8e80941Smrg   (('ixor', a, 0), a),
508b8e80941Smrg   (('inot', ('inot', a)), a),
509b8e80941Smrg   (('ior', ('iand', a, b), b), b),
510b8e80941Smrg   (('ior', ('ior', a, b), b), ('ior', a, b)),
511b8e80941Smrg   (('iand', ('ior', a, b), b), b),
512b8e80941Smrg   (('iand', ('iand', a, b), b), ('iand', a, b)),
513b8e80941Smrg   # DeMorgan's Laws
514b8e80941Smrg   (('iand', ('inot', a), ('inot', b)), ('inot', ('ior',  a, b))),
515b8e80941Smrg   (('ior',  ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
516b8e80941Smrg   # Shift optimizations
517b8e80941Smrg   (('ishl', 0, a), 0),
518b8e80941Smrg   (('ishl', a, 0), a),
519b8e80941Smrg   (('ishr', 0, a), 0),
520b8e80941Smrg   (('ishr', a, 0), a),
521b8e80941Smrg   (('ushr', 0, a), 0),
522b8e80941Smrg   (('ushr', a, 0), a),
523b8e80941Smrg   (('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
524b8e80941Smrg   (('iand', 0xffff, ('ushr@32', a, 16)), ('ushr', a, 16)),
525b8e80941Smrg   # Exponential/logarithmic identities
526b8e80941Smrg   (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
527b8e80941Smrg   (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
528b8e80941Smrg   (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
529b8e80941Smrg   (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
530b8e80941Smrg   (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
531b8e80941Smrg    ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
532b8e80941Smrg   (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)),
533b8e80941Smrg   (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))),
534b8e80941Smrg   (('~fpow', a, 1.0), a),
535b8e80941Smrg   (('~fpow', a, 2.0), ('fmul', a, a)),
536b8e80941Smrg   (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
537b8e80941Smrg   (('~fpow', 2.0, a), ('fexp2', a)),
538b8e80941Smrg   (('~fpow', ('fpow', a, 2.2), 0.454545), a),
539b8e80941Smrg   (('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
540b8e80941Smrg   (('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
541b8e80941Smrg   (('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
542b8e80941Smrg   (('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
543b8e80941Smrg   (('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
544b8e80941Smrg   (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
545b8e80941Smrg   (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
546b8e80941Smrg   (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
547b8e80941Smrg   (('~fmul', ('fexp2(is_used_once)', a), ('fexp2(is_used_once)', b)), ('fexp2', ('fadd', a, b))),
548b8e80941Smrg   # Division and reciprocal
549b8e80941Smrg   (('~fdiv', 1.0, a), ('frcp', a)),
550b8e80941Smrg   (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
551b8e80941Smrg   (('~frcp', ('frcp', a)), a),
552b8e80941Smrg   (('~frcp', ('fsqrt', a)), ('frsq', a)),
553b8e80941Smrg   (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
554b8e80941Smrg   (('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
555b8e80941Smrg   # Boolean simplifications
556b8e80941Smrg   (('i2b32(is_used_by_if)', a), ('ine32', a, 0)),
557b8e80941Smrg   (('i2b1(is_used_by_if)', a), ('ine', a, 0)),
558b8e80941Smrg   (('ieq', a, True), a),
559b8e80941Smrg   (('ine(is_not_used_by_if)', a, True), ('inot', a)),
560b8e80941Smrg   (('ine', a, False), a),
561b8e80941Smrg   (('ieq(is_not_used_by_if)', a, False), ('inot', 'a')),
562b8e80941Smrg   (('bcsel', a, True, False), a),
563b8e80941Smrg   (('bcsel', a, False, True), ('inot', a)),
564b8e80941Smrg   (('bcsel@32', a, 1.0, 0.0), ('b2f', a)),
565b8e80941Smrg   (('bcsel@32', a, 0.0, 1.0), ('b2f', ('inot', a))),
566b8e80941Smrg   (('bcsel@32', a, -1.0, -0.0), ('fneg', ('b2f', a))),
567b8e80941Smrg   (('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
568b8e80941Smrg   (('bcsel', True, b, c), b),
569b8e80941Smrg   (('bcsel', False, b, c), c),
570b8e80941Smrg   (('bcsel', a, ('b2f(is_used_once)', 'b@32'), ('b2f', 'c@32')), ('b2f', ('bcsel', a, b, c))),
571b8e80941Smrg   # The result of this should be hit by constant propagation and, in the
572b8e80941Smrg   # next round of opt_algebraic, get picked up by one of the above two.
573b8e80941Smrg   (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
574b8e80941Smrg
575b8e80941Smrg   (('bcsel', a, b, b), b),
576b8e80941Smrg   (('fcsel', a, b, b), b),
577b8e80941Smrg
578b8e80941Smrg   # D3D Boolean emulation
579b8e80941Smrg   (('bcsel', a, -1, 0), ('ineg', ('b2i', 'a@1'))),
580b8e80941Smrg   (('bcsel', a, 0, -1), ('ineg', ('b2i', ('inot', a)))),
581b8e80941Smrg   (('iand', ('ineg', ('b2i', 'a@1')), ('ineg', ('b2i', 'b@1'))),
582b8e80941Smrg    ('ineg', ('b2i', ('iand', a, b)))),
583b8e80941Smrg   (('ior', ('ineg', ('b2i','a@1')), ('ineg', ('b2i', 'b@1'))),
584b8e80941Smrg    ('ineg', ('b2i', ('ior', a, b)))),
585b8e80941Smrg   (('ieq', ('ineg', ('b2i', 'a@1')), 0), ('inot', a)),
586b8e80941Smrg   (('ieq', ('ineg', ('b2i', 'a@1')), -1), a),
587b8e80941Smrg   (('ine', ('ineg', ('b2i', 'a@1')), 0), a),
588b8e80941Smrg   (('ine', ('ineg', ('b2i', 'a@1')), -1), ('inot', a)),
589b8e80941Smrg   (('iand', ('ineg', ('b2i', a)), 1.0), ('b2f', a)),
590b8e80941Smrg
591b8e80941Smrg   # SM5 32-bit shifts are defined to use the 5 least significant bits
592b8e80941Smrg   (('ishl', 'a@32', ('iand', 31, b)), ('ishl', a, b)),
593b8e80941Smrg   (('ishr', 'a@32', ('iand', 31, b)), ('ishr', a, b)),
594b8e80941Smrg   (('ushr', 'a@32', ('iand', 31, b)), ('ushr', a, b)),
595b8e80941Smrg
596b8e80941Smrg   # Conversions
597b8e80941Smrg   (('i2b32', ('b2i', 'a@32')), a),
598b8e80941Smrg   (('f2i', ('ftrunc', a)), ('f2i', a)),
599b8e80941Smrg   (('f2u', ('ftrunc', a)), ('f2u', a)),
600b8e80941Smrg   (('i2b', ('ineg', a)), ('i2b', a)),
601b8e80941Smrg   (('i2b', ('iabs', a)), ('i2b', a)),
602b8e80941Smrg   (('fabs', ('b2f', a)), ('b2f', a)),
603b8e80941Smrg   (('iabs', ('b2i', a)), ('b2i', a)),
604b8e80941Smrg   (('inot', ('f2b1', a)), ('feq', a, 0.0)),
605b8e80941Smrg
606b8e80941Smrg   # Ironically, mark these as imprecise because removing the conversions may
607b8e80941Smrg   # preserve more precision than doing the conversions (e.g.,
608b8e80941Smrg   # uint(float(0x81818181u)) == 0x81818200).
609b8e80941Smrg   (('~f2i32', ('i2f', 'a@32')), a),
610b8e80941Smrg   (('~f2i32', ('u2f', 'a@32')), a),
611b8e80941Smrg   (('~f2u32', ('i2f', 'a@32')), a),
612b8e80941Smrg   (('~f2u32', ('u2f', 'a@32')), a),
613b8e80941Smrg
614b8e80941Smrg   # Section 5.4.1 (Conversion and Scalar Constructors) of the GLSL 4.60 spec
615b8e80941Smrg   # says:
616b8e80941Smrg   #
617b8e80941Smrg   #    It is undefined to convert a negative floating-point value to an
618b8e80941Smrg   #    uint.
619b8e80941Smrg   #
620b8e80941Smrg   # Assuming that (uint)some_float behaves like (uint)(int)some_float allows
621b8e80941Smrg   # some optimizations in the i965 backend to proceed.
622b8e80941Smrg   (('ige', ('f2u', a), b), ('ige', ('f2i', a), b)),
623b8e80941Smrg   (('ige', b, ('f2u', a)), ('ige', b, ('f2i', a))),
624b8e80941Smrg   (('ilt', ('f2u', a), b), ('ilt', ('f2i', a), b)),
625b8e80941Smrg   (('ilt', b, ('f2u', a)), ('ilt', b, ('f2i', a))),
626b8e80941Smrg
627b8e80941Smrg   # Packing and then unpacking does nothing
628b8e80941Smrg   (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a),
629b8e80941Smrg   (('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b),
630b8e80941Smrg   (('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
631b8e80941Smrg                           ('unpack_64_2x32_split_y', a)), a),
632b8e80941Smrg
633b8e80941Smrg   # Byte extraction
634b8e80941Smrg   (('ushr', 'a@16',  8), ('extract_u8', a, 1), '!options->lower_extract_byte'),
635b8e80941Smrg   (('ushr', 'a@32', 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
636b8e80941Smrg   (('ushr', 'a@64', 56), ('extract_u8', a, 7), '!options->lower_extract_byte'),
637b8e80941Smrg   (('ishr', 'a@16',  8), ('extract_i8', a, 1), '!options->lower_extract_byte'),
638b8e80941Smrg   (('ishr', 'a@32', 24), ('extract_i8', a, 3), '!options->lower_extract_byte'),
639b8e80941Smrg   (('ishr', 'a@64', 56), ('extract_i8', a, 7), '!options->lower_extract_byte'),
640b8e80941Smrg   (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte')
641b8e80941Smrg]
642b8e80941Smrg
643b8e80941Smrg# After the ('extract_u8', a, 0) pattern, above, triggers, there will be
644b8e80941Smrg# patterns like those below.
645b8e80941Smrgfor op in ('ushr', 'ishr'):
646b8e80941Smrg   optimizations.extend([(('extract_u8', (op, 'a@16',  8),     0), ('extract_u8', a, 1))])
647b8e80941Smrg   optimizations.extend([(('extract_u8', (op, 'a@32',  8 * i), 0), ('extract_u8', a, i)) for i in range(1, 4)])
648b8e80941Smrg   optimizations.extend([(('extract_u8', (op, 'a@64',  8 * i), 0), ('extract_u8', a, i)) for i in range(1, 8)])
649b8e80941Smrg
650b8e80941Smrgoptimizations.extend([(('extract_u8', ('extract_u16', a, 1), 0), ('extract_u8', a, 2))])
651b8e80941Smrg
652b8e80941Smrg# After the ('extract_[iu]8', a, 3) patterns, above, trigger, there will be
653b8e80941Smrg# patterns like those below.
654b8e80941Smrgfor op in ('extract_u8', 'extract_i8'):
655b8e80941Smrg   optimizations.extend([((op, ('ishl', 'a@16',      8),     1), (op, a, 0))])
656b8e80941Smrg   optimizations.extend([((op, ('ishl', 'a@32', 24 - 8 * i), 3), (op, a, i)) for i in range(2, -1, -1)])
657b8e80941Smrg   optimizations.extend([((op, ('ishl', 'a@64', 56 - 8 * i), 7), (op, a, i)) for i in range(6, -1, -1)])
658b8e80941Smrg
659b8e80941Smrgoptimizations.extend([
660b8e80941Smrg    # Word extraction
661b8e80941Smrg   (('ushr', ('ishl', 'a@32', 16), 16), ('extract_u16', a, 0), '!options->lower_extract_word'),
662b8e80941Smrg   (('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
663b8e80941Smrg   (('ishr', ('ishl', 'a@32', 16), 16), ('extract_i16', a, 0), '!options->lower_extract_word'),
664b8e80941Smrg   (('ishr', 'a@32', 16), ('extract_i16', a, 1), '!options->lower_extract_word'),
665b8e80941Smrg   (('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
666b8e80941Smrg
667b8e80941Smrg   # Subtracts
668b8e80941Smrg   (('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
669b8e80941Smrg   (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
670b8e80941Smrg   (('ussub_4x8', a, 0), a),
671b8e80941Smrg   (('ussub_4x8', a, ~0), 0),
672b8e80941Smrg   (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
673b8e80941Smrg   (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
674b8e80941Smrg   (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
675b8e80941Smrg   (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
676b8e80941Smrg   (('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
677b8e80941Smrg   (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
678b8e80941Smrg   (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
679b8e80941Smrg   (('iabs', ('isub', 0, a)), ('iabs', a)),
680b8e80941Smrg
681b8e80941Smrg   # Propagate negation up multiplication chains
682b8e80941Smrg   (('fmul', ('fneg', a), b), ('fneg', ('fmul', a, b))),
683b8e80941Smrg   (('imul', ('ineg', a), b), ('ineg', ('imul', a, b))),
684b8e80941Smrg
685b8e80941Smrg   # Propagate constants up multiplication chains
686b8e80941Smrg   (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)),
687b8e80941Smrg   (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)),
688b8e80941Smrg   (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)),
689b8e80941Smrg   (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)),
690b8e80941Smrg
691b8e80941Smrg   # Reassociate constants in add/mul chains so they can be folded together.
692b8e80941Smrg   # For now, we mostly only handle cases where the constants are separated by
693b8e80941Smrg   # a single non-constant.  We could do better eventually.
694b8e80941Smrg   (('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)),
695b8e80941Smrg   (('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)),
696b8e80941Smrg   (('~fadd', '#a',          ('fadd', 'b(is_not_const)', '#c')),  ('fadd', ('fadd', a,          c),           b)),
697b8e80941Smrg   (('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
698b8e80941Smrg   (('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),
699b8e80941Smrg
700b8e80941Smrg   # By definition...
701b8e80941Smrg   (('bcsel', ('ige', ('find_lsb', a), 0), ('find_lsb', a), -1), ('find_lsb', a)),
702b8e80941Smrg   (('bcsel', ('ige', ('ifind_msb', a), 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
703b8e80941Smrg   (('bcsel', ('ige', ('ufind_msb', a), 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
704b8e80941Smrg
705b8e80941Smrg   (('bcsel', ('ine', a, 0), ('find_lsb', a), -1), ('find_lsb', a)),
706b8e80941Smrg   (('bcsel', ('ine', a, 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
707b8e80941Smrg   (('bcsel', ('ine', a, 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
708b8e80941Smrg
709b8e80941Smrg   (('bcsel', ('ine', a, -1), ('ifind_msb', a), -1), ('ifind_msb', a)),
710b8e80941Smrg
711b8e80941Smrg   # Misc. lowering
712b8e80941Smrg   (('fmod@16', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod16'),
713b8e80941Smrg   (('fmod@32', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod32'),
714b8e80941Smrg   (('fmod@64', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod64'),
715b8e80941Smrg   (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod32'),
716b8e80941Smrg   (('uadd_carry@32', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'),
717b8e80941Smrg   (('usub_borrow@32', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'),
718b8e80941Smrg
719b8e80941Smrg   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
720b8e80941Smrg    ('bcsel', ('ilt', 31, 'bits'), 'insert',
721b8e80941Smrg              ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
722b8e80941Smrg    'options->lower_bitfield_insert'),
723b8e80941Smrg   (('ihadd', a, b), ('iadd', ('iand', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
724b8e80941Smrg   (('uhadd', a, b), ('iadd', ('iand', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd'),
725b8e80941Smrg   (('irhadd', a, b), ('isub', ('ior', a, b), ('ishr', ('ixor', a, b), 1)), 'options->lower_hadd'),
726b8e80941Smrg   (('urhadd', a, b), ('isub', ('ior', a, b), ('ushr', ('ixor', a, b), 1)), 'options->lower_hadd'),
727b8e80941Smrg   (('uadd_sat', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_add_sat'),
728b8e80941Smrg   (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_add_sat'),
729b8e80941Smrg
730b8e80941Smrg   # Alternative lowering that doesn't rely on bfi.
731b8e80941Smrg   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
732b8e80941Smrg    ('bcsel', ('ilt', 31, 'bits'),
733b8e80941Smrg     'insert',
734b8e80941Smrg     ('ior',
735b8e80941Smrg      ('iand', 'base', ('inot', ('bfm', 'bits', 'offset'))),
736b8e80941Smrg      ('iand', ('ishl', 'insert', 'offset'), ('bfm', 'bits', 'offset')))),
737b8e80941Smrg    'options->lower_bitfield_insert_to_shifts'),
738b8e80941Smrg
739b8e80941Smrg   # bfm lowering -- note that the NIR opcode is undefined if either arg is 32.
740b8e80941Smrg   (('bfm', 'bits', 'offset'),
741b8e80941Smrg    ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'),
742b8e80941Smrg    'options->lower_bfm'),
743b8e80941Smrg
744b8e80941Smrg   (('ibitfield_extract', 'value', 'offset', 'bits'),
745b8e80941Smrg    ('bcsel', ('ilt', 31, 'bits'), 'value',
746b8e80941Smrg              ('ibfe', 'value', 'offset', 'bits')),
747b8e80941Smrg    'options->lower_bitfield_extract'),
748b8e80941Smrg
749b8e80941Smrg   (('ubitfield_extract', 'value', 'offset', 'bits'),
750b8e80941Smrg    ('bcsel', ('ult', 31, 'bits'), 'value',
751b8e80941Smrg              ('ubfe', 'value', 'offset', 'bits')),
752b8e80941Smrg    'options->lower_bitfield_extract'),
753b8e80941Smrg
754b8e80941Smrg   (('ibitfield_extract', 'value', 'offset', 'bits'),
755b8e80941Smrg    ('bcsel', ('ieq', 0, 'bits'),
756b8e80941Smrg     0,
757b8e80941Smrg     ('ishr',
758b8e80941Smrg       ('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')),
759b8e80941Smrg       ('isub', 32, 'bits'))),
760b8e80941Smrg    'options->lower_bitfield_extract_to_shifts'),
761b8e80941Smrg
762b8e80941Smrg   (('ubitfield_extract', 'value', 'offset', 'bits'),
763b8e80941Smrg    ('iand',
764b8e80941Smrg     ('ushr', 'value', 'offset'),
765b8e80941Smrg     ('bcsel', ('ieq', 'bits', 32),
766b8e80941Smrg      0xffffffff,
767b8e80941Smrg      ('bfm', 'bits', 0))),
768b8e80941Smrg    'options->lower_bitfield_extract_to_shifts'),
769b8e80941Smrg
770b8e80941Smrg   (('ifind_msb', 'value'),
771b8e80941Smrg    ('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')),
772b8e80941Smrg    'options->lower_ifind_msb'),
773b8e80941Smrg
774b8e80941Smrg   (('find_lsb', 'value'),
775b8e80941Smrg    ('ufind_msb', ('iand', 'value', ('ineg', 'value'))),
776b8e80941Smrg    'options->lower_find_lsb'),
777b8e80941Smrg
778b8e80941Smrg   (('extract_i8', a, 'b@32'),
779b8e80941Smrg    ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
780b8e80941Smrg    'options->lower_extract_byte'),
781b8e80941Smrg
782b8e80941Smrg   (('extract_u8', a, 'b@32'),
783b8e80941Smrg    ('iand', ('ushr', a, ('imul', b, 8)), 0xff),
784b8e80941Smrg    'options->lower_extract_byte'),
785b8e80941Smrg
786b8e80941Smrg   (('extract_i16', a, 'b@32'),
787b8e80941Smrg    ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
788b8e80941Smrg    'options->lower_extract_word'),
789b8e80941Smrg
790b8e80941Smrg   (('extract_u16', a, 'b@32'),
791b8e80941Smrg    ('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
792b8e80941Smrg    'options->lower_extract_word'),
793b8e80941Smrg
794b8e80941Smrg    (('pack_unorm_2x16', 'v'),
795b8e80941Smrg     ('pack_uvec2_to_uint',
796b8e80941Smrg        ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
797b8e80941Smrg     'options->lower_pack_unorm_2x16'),
798b8e80941Smrg
799b8e80941Smrg    (('pack_unorm_4x8', 'v'),
800b8e80941Smrg     ('pack_uvec4_to_uint',
801b8e80941Smrg        ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
802b8e80941Smrg     'options->lower_pack_unorm_4x8'),
803b8e80941Smrg
804b8e80941Smrg    (('pack_snorm_2x16', 'v'),
805b8e80941Smrg     ('pack_uvec2_to_uint',
806b8e80941Smrg        ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
807b8e80941Smrg     'options->lower_pack_snorm_2x16'),
808b8e80941Smrg
809b8e80941Smrg    (('pack_snorm_4x8', 'v'),
810b8e80941Smrg     ('pack_uvec4_to_uint',
811b8e80941Smrg        ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
812b8e80941Smrg     'options->lower_pack_snorm_4x8'),
813b8e80941Smrg
814b8e80941Smrg    (('unpack_unorm_2x16', 'v'),
815b8e80941Smrg     ('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0),
816b8e80941Smrg                                  ('extract_u16', 'v', 1))),
817b8e80941Smrg              65535.0),
818b8e80941Smrg     'options->lower_unpack_unorm_2x16'),
819b8e80941Smrg
820b8e80941Smrg    (('unpack_unorm_4x8', 'v'),
821b8e80941Smrg     ('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0),
822b8e80941Smrg                                  ('extract_u8', 'v', 1),
823b8e80941Smrg                                  ('extract_u8', 'v', 2),
824b8e80941Smrg                                  ('extract_u8', 'v', 3))),
825b8e80941Smrg              255.0),
826b8e80941Smrg     'options->lower_unpack_unorm_4x8'),
827b8e80941Smrg
828b8e80941Smrg    (('unpack_snorm_2x16', 'v'),
829b8e80941Smrg     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
830b8e80941Smrg                                                            ('extract_i16', 'v', 1))),
831b8e80941Smrg                                           32767.0))),
832b8e80941Smrg     'options->lower_unpack_snorm_2x16'),
833b8e80941Smrg
834b8e80941Smrg    (('unpack_snorm_4x8', 'v'),
835b8e80941Smrg     ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
836b8e80941Smrg                                                            ('extract_i8', 'v', 1),
837b8e80941Smrg                                                            ('extract_i8', 'v', 2),
838b8e80941Smrg                                                            ('extract_i8', 'v', 3))),
839b8e80941Smrg                                           127.0))),
840b8e80941Smrg     'options->lower_unpack_snorm_4x8'),
841b8e80941Smrg
842b8e80941Smrg   (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
843b8e80941Smrg   (('fsign', a), ('fsub', ('b2f', ('flt', 0.0, a)), ('b2f', ('flt', a, 0.0))), 'options->lower_fsign'),
844b8e80941Smrg])
845b8e80941Smrg
846b8e80941Smrg# bit_size dependent lowerings
847b8e80941Smrgfor bit_size in [8, 16, 32, 64]:
848b8e80941Smrg   # convenience constants
849b8e80941Smrg   intmax = (1 << (bit_size - 1)) - 1
850b8e80941Smrg   intmin = 1 << (bit_size - 1)
851b8e80941Smrg
852b8e80941Smrg   optimizations += [
853b8e80941Smrg      (('iadd_sat@' + str(bit_size), a, b),
854b8e80941Smrg       ('bcsel', ('ige', b, 1), ('bcsel', ('ilt', ('iadd', a, b), a), intmax, ('iadd', a, b)),
855b8e80941Smrg                                ('bcsel', ('ilt', a, ('iadd', a, b)), intmin, ('iadd', a, b))), 'options->lower_add_sat'),
856b8e80941Smrg      (('isub_sat@' + str(bit_size), a, b),
857b8e80941Smrg       ('bcsel', ('ilt', b, 0), ('bcsel', ('ilt', ('isub', a, b), a), intmax, ('isub', a, b)),
858b8e80941Smrg                                ('bcsel', ('ilt', a, ('isub', a, b)), intmin, ('isub', a, b))), 'options->lower_add_sat'),
859b8e80941Smrg   ]
860b8e80941Smrg
861b8e80941Smrginvert = OrderedDict([('feq', 'fne'), ('fne', 'feq'), ('fge', 'flt'), ('flt', 'fge')])
862b8e80941Smrg
863b8e80941Smrgfor left, right in itertools.combinations_with_replacement(invert.keys(), 2):
864b8e80941Smrg   optimizations.append((('inot', ('ior(is_used_once)', (left, a, b), (right, c, d))),
865b8e80941Smrg                         ('iand', (invert[left], a, b), (invert[right], c, d))))
866b8e80941Smrg   optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
867b8e80941Smrg                         ('ior', (invert[left], a, b), (invert[right], c, d))))
868b8e80941Smrg
869b8e80941Smrg# Optimize x2bN(b2x(x)) -> x
870b8e80941Smrgfor size in type_sizes('bool'):
871b8e80941Smrg    aN = 'a@' + str(size)
872b8e80941Smrg    f2bN = 'f2b' + str(size)
873b8e80941Smrg    i2bN = 'i2b' + str(size)
874b8e80941Smrg    optimizations.append(((f2bN, ('b2f', aN)), a))
875b8e80941Smrg    optimizations.append(((i2bN, ('b2i', aN)), a))
876b8e80941Smrg
877b8e80941Smrg# Optimize x2yN(b2x(x)) -> b2y
878b8e80941Smrgfor x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
879b8e80941Smrg   if x != 'f' and y != 'f' and x != y:
880b8e80941Smrg      continue
881b8e80941Smrg
882b8e80941Smrg   b2x = 'b2f' if x == 'f' else 'b2i'
883b8e80941Smrg   b2y = 'b2f' if y == 'f' else 'b2i'
884b8e80941Smrg   x2yN = '{}2{}'.format(x, y)
885b8e80941Smrg   optimizations.append(((x2yN, (b2x, a)), (b2y, a)))
886b8e80941Smrg
887b8e80941Smrg# Optimize away x2xN(a@N)
888b8e80941Smrgfor t in ['int', 'uint', 'float']:
889b8e80941Smrg   for N in type_sizes(t):
890b8e80941Smrg      x2xN = '{0}2{0}{1}'.format(t[0], N)
891b8e80941Smrg      aN = 'a@{0}'.format(N)
892b8e80941Smrg      optimizations.append(((x2xN, aN), a))
893b8e80941Smrg
894b8e80941Smrg# Optimize x2xN(y2yM(a@P)) -> y2yN(a) for integers
895b8e80941Smrg# In particular, we can optimize away everything except upcast of downcast and
896b8e80941Smrg# upcasts where the type differs from the other cast
897b8e80941Smrgfor N, M in itertools.product(type_sizes('uint'), type_sizes('uint')):
898b8e80941Smrg   if N < M:
899b8e80941Smrg      # The outer cast is a down-cast.  It doesn't matter what the size of the
900b8e80941Smrg      # argument of the inner cast is because we'll never been in the upcast
901b8e80941Smrg      # of downcast case.  Regardless of types, we'll always end up with y2yN
902b8e80941Smrg      # in the end.
903b8e80941Smrg      for x, y in itertools.product(['i', 'u'], ['i', 'u']):
904b8e80941Smrg         x2xN = '{0}2{0}{1}'.format(x, N)
905b8e80941Smrg         y2yM = '{0}2{0}{1}'.format(y, M)
906b8e80941Smrg         y2yN = '{0}2{0}{1}'.format(y, N)
907b8e80941Smrg         optimizations.append(((x2xN, (y2yM, a)), (y2yN, a)))
908b8e80941Smrg   elif N > M:
909b8e80941Smrg      # If the outer cast is an up-cast, we have to be more careful about the
910b8e80941Smrg      # size of the argument of the inner cast and with types.  In this case,
911b8e80941Smrg      # the type is always the type of type up-cast which is given by the
912b8e80941Smrg      # outer cast.
913b8e80941Smrg      for P in type_sizes('uint'):
914b8e80941Smrg         # We can't optimize away up-cast of down-cast.
915b8e80941Smrg         if M < P:
916b8e80941Smrg            continue
917b8e80941Smrg
918b8e80941Smrg         # Because we're doing down-cast of down-cast, the types always have
919b8e80941Smrg         # to match between the two casts
920b8e80941Smrg         for x in ['i', 'u']:
921b8e80941Smrg            x2xN = '{0}2{0}{1}'.format(x, N)
922b8e80941Smrg            x2xM = '{0}2{0}{1}'.format(x, M)
923b8e80941Smrg            aP = 'a@{0}'.format(P)
924b8e80941Smrg            optimizations.append(((x2xN, (x2xM, aP)), (x2xN, a)))
925b8e80941Smrg   else:
926b8e80941Smrg      # The N == M case is handled by other optimizations
927b8e80941Smrg      pass
928b8e80941Smrg
929b8e80941Smrgdef fexp2i(exp, bits):
930b8e80941Smrg   # We assume that exp is already in the right range.
931b8e80941Smrg   if bits == 16:
932b8e80941Smrg      return ('i2i16', ('ishl', ('iadd', exp, 15), 10))
933b8e80941Smrg   elif bits == 32:
934b8e80941Smrg      return ('ishl', ('iadd', exp, 127), 23)
935b8e80941Smrg   elif bits == 64:
936b8e80941Smrg      return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20))
937b8e80941Smrg   else:
938b8e80941Smrg      assert False
939b8e80941Smrg
940b8e80941Smrgdef ldexp(f, exp, bits):
941b8e80941Smrg   # First, we clamp exp to a reasonable range.  The maximum possible range
942b8e80941Smrg   # for a normal exponent is [-126, 127] and, throwing in denormals, you get
943b8e80941Smrg   # a maximum range of [-149, 127].  This means that we can potentially have
944b8e80941Smrg   # a swing of +-276.  If you start with FLT_MAX, you actually have to do
945b8e80941Smrg   # ldexp(FLT_MAX, -278) to get it to flush all the way to zero.  The GLSL
946b8e80941Smrg   # spec, on the other hand, only requires that we handle an exponent value
947b8e80941Smrg   # in the range [-126, 128].  This implementation is *mostly* correct; it
948b8e80941Smrg   # handles a range on exp of [-252, 254] which allows you to create any
949b8e80941Smrg   # value (including denorms if the hardware supports it) and to adjust the
950b8e80941Smrg   # exponent of any normal value to anything you want.
951b8e80941Smrg   if bits == 16:
952b8e80941Smrg      exp = ('imin', ('imax', exp, -28), 30)
953b8e80941Smrg   elif bits == 32:
954b8e80941Smrg      exp = ('imin', ('imax', exp, -252), 254)
955b8e80941Smrg   elif bits == 64:
956b8e80941Smrg      exp = ('imin', ('imax', exp, -2044), 2046)
957b8e80941Smrg   else:
958b8e80941Smrg      assert False
959b8e80941Smrg
960b8e80941Smrg   # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
961b8e80941Smrg   # (We use ishr which isn't the same for -1, but the -1 case still works
962b8e80941Smrg   # since we use exp-exp/2 as the second exponent.)  While the spec
963b8e80941Smrg   # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
964b8e80941Smrg   # work with denormals and doesn't allow for the full swing in exponents
965b8e80941Smrg   # that you can get with normalized values.  Instead, we create two powers
966b8e80941Smrg   # of two and multiply by them each in turn.  That way the effective range
967b8e80941Smrg   # of our exponent is doubled.
968b8e80941Smrg   pow2_1 = fexp2i(('ishr', exp, 1), bits)
969b8e80941Smrg   pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
970b8e80941Smrg   return ('fmul', ('fmul', f, pow2_1), pow2_2)
971b8e80941Smrg
972b8e80941Smrgoptimizations += [
973b8e80941Smrg   (('ldexp@16', 'x', 'exp'), ldexp('x', 'exp', 16), 'options->lower_ldexp'),
974b8e80941Smrg   (('ldexp@32', 'x', 'exp'), ldexp('x', 'exp', 32), 'options->lower_ldexp'),
975b8e80941Smrg   (('ldexp@64', 'x', 'exp'), ldexp('x', 'exp', 64), 'options->lower_ldexp'),
976b8e80941Smrg]
977b8e80941Smrg
978b8e80941Smrg# Unreal Engine 4 demo applications open-codes bitfieldReverse()
979b8e80941Smrgdef bitfield_reverse(u):
980b8e80941Smrg    step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
981b8e80941Smrg    step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8))
982b8e80941Smrg    step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4))
983b8e80941Smrg    step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2))
984b8e80941Smrg    step5 = ('ior', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1))
985b8e80941Smrg
986b8e80941Smrg    return step5
987b8e80941Smrg
988b8e80941Smrgoptimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'), '!options->lower_bitfield_reverse')]
989b8e80941Smrg
990b8e80941Smrg# For any float comparison operation, "cmp", if you have "a == a && a cmp b"
991b8e80941Smrg# then the "a == a" is redundant because it's equivalent to "a is not NaN"
992b8e80941Smrg# and, if a is a NaN then the second comparison will fail anyway.
993b8e80941Smrgfor op in ['flt', 'fge', 'feq']:
994b8e80941Smrg   optimizations += [
995b8e80941Smrg      (('iand', ('feq', a, a), (op, a, b)), (op, a, b)),
996b8e80941Smrg      (('iand', ('feq', a, a), (op, b, a)), (op, b, a)),
997b8e80941Smrg   ]
998b8e80941Smrg
999b8e80941Smrg# Add optimizations to handle the case where the result of a ternary is
1000b8e80941Smrg# compared to a constant.  This way we can take things like
1001b8e80941Smrg#
1002b8e80941Smrg# (a ? 0 : 1) > 0
1003b8e80941Smrg#
1004b8e80941Smrg# and turn it into
1005b8e80941Smrg#
1006b8e80941Smrg# a ? (0 > 0) : (1 > 0)
1007b8e80941Smrg#
1008b8e80941Smrg# which constant folding will eat for lunch.  The resulting ternary will
1009b8e80941Smrg# further get cleaned up by the boolean reductions above and we will be
1010b8e80941Smrg# left with just the original variable "a".
1011b8e80941Smrgfor op in ['flt', 'fge', 'feq', 'fne',
1012b8e80941Smrg           'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
1013b8e80941Smrg   optimizations += [
1014b8e80941Smrg      ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
1015b8e80941Smrg       ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
1016b8e80941Smrg      ((op, '#d', ('bcsel', a, '#b', '#c')),
1017b8e80941Smrg       ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
1018b8e80941Smrg   ]
1019b8e80941Smrg
1020b8e80941Smrg
1021b8e80941Smrg# For example, this converts things like
1022b8e80941Smrg#
1023b8e80941Smrg#    1 + mix(0, a - 1, condition)
1024b8e80941Smrg#
1025b8e80941Smrg# into
1026b8e80941Smrg#
1027b8e80941Smrg#    mix(1, (a-1)+1, condition)
1028b8e80941Smrg#
1029b8e80941Smrg# Other optimizations will rearrange the constants.
1030b8e80941Smrgfor op in ['fadd', 'fmul', 'iadd', 'imul']:
1031b8e80941Smrg   optimizations += [
1032b8e80941Smrg      ((op, ('bcsel(is_used_once)', a, '#b', c), '#d'), ('bcsel', a, (op, b, d), (op, c, d)))
1033b8e80941Smrg   ]
1034b8e80941Smrg
1035b8e80941Smrg# For derivatives in compute shaders, GLSL_NV_compute_shader_derivatives
1036b8e80941Smrg# states:
1037b8e80941Smrg#
1038b8e80941Smrg#     If neither layout qualifier is specified, derivatives in compute shaders
1039b8e80941Smrg#     return zero, which is consistent with the handling of built-in texture
1040b8e80941Smrg#     functions like texture() in GLSL 4.50 compute shaders.
1041b8e80941Smrgfor op in ['fddx', 'fddx_fine', 'fddx_coarse',
1042b8e80941Smrg           'fddy', 'fddy_fine', 'fddy_coarse']:
1043b8e80941Smrg   optimizations += [
1044b8e80941Smrg      ((op, 'a'), 0.0, 'info->stage == MESA_SHADER_COMPUTE && info->cs.derivative_group == DERIVATIVE_GROUP_NONE')
1045b8e80941Smrg]
1046b8e80941Smrg
1047b8e80941Smrg# This section contains "late" optimizations that should be run before
1048b8e80941Smrg# creating ffmas and calling regular optimizations for the final time.
1049b8e80941Smrg# Optimizations should go here if they help code generation and conflict
1050b8e80941Smrg# with the regular optimizations.
1051b8e80941Smrgbefore_ffma_optimizations = [
1052b8e80941Smrg   # Propagate constants down multiplication chains
1053b8e80941Smrg   (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
1054b8e80941Smrg   (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
1055b8e80941Smrg   (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
1056b8e80941Smrg   (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
1057b8e80941Smrg
1058b8e80941Smrg   (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
1059b8e80941Smrg   (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
1060b8e80941Smrg   (('~fadd', ('fneg', a), a), 0.0),
1061b8e80941Smrg   (('iadd', ('ineg', a), a), 0),
1062b8e80941Smrg   (('iadd', ('ineg', a), ('iadd', a, b)), b),
1063b8e80941Smrg   (('iadd', a, ('iadd', ('ineg', a), b)), b),
1064b8e80941Smrg   (('~fadd', ('fneg', a), ('fadd', a, b)), b),
1065b8e80941Smrg   (('~fadd', a, ('fadd', ('fneg', a), b)), b),
1066b8e80941Smrg]
1067b8e80941Smrg
1068b8e80941Smrg# This section contains "late" optimizations that should be run after the
1069b8e80941Smrg# regular optimizations have finished.  Optimizations should go here if
1070b8e80941Smrg# they help code generation but do not necessarily produce code that is
1071b8e80941Smrg# more easily optimizable.
1072b8e80941Smrglate_optimizations = [
1073b8e80941Smrg   # Most of these optimizations aren't quite safe when you get infinity or
1074b8e80941Smrg   # Nan involved but the first one should be fine.
1075b8e80941Smrg   (('flt',          ('fadd', a, b),  0.0), ('flt',          a, ('fneg', b))),
1076b8e80941Smrg   (('flt', ('fneg', ('fadd', a, b)), 0.0), ('flt', ('fneg', a),         b)),
1077b8e80941Smrg   (('~fge',          ('fadd', a, b),  0.0), ('fge',          a, ('fneg', b))),
1078b8e80941Smrg   (('~fge', ('fneg', ('fadd', a, b)), 0.0), ('fge', ('fneg', a),         b)),
1079b8e80941Smrg   (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
1080b8e80941Smrg   (('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
1081b8e80941Smrg
1082b8e80941Smrg   (('~fge', ('fmin(is_used_once)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))),
1083b8e80941Smrg
1084b8e80941Smrg   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
1085b8e80941Smrg   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
1086b8e80941Smrg   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
1087b8e80941Smrg   (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
1088b8e80941Smrg
1089b8e80941Smrg   # we do these late so that we don't get in the way of creating ffmas
1090b8e80941Smrg   (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
1091b8e80941Smrg   (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
1092b8e80941Smrg
1093b8e80941Smrg   (('bcsel', a, 0, ('b2f32', ('inot', 'b@bool'))), ('b2f32', ('inot', ('ior', a, b)))),
1094b8e80941Smrg]
1095b8e80941Smrg
1096b8e80941Smrgprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
1097b8e80941Smrgprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
1098b8e80941Smrg                                  before_ffma_optimizations).render())
1099b8e80941Smrgprint(nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
1100b8e80941Smrg                                  late_optimizations).render())
1101