17ec681f3Smrg#
27ec681f3Smrg# Copyright (C) 2020 Microsoft Corporation
37ec681f3Smrg#
47ec681f3Smrg# Copyright (C) 2018 Alyssa Rosenzweig
57ec681f3Smrg#
67ec681f3Smrg# Copyright (C) 2016 Intel Corporation
77ec681f3Smrg#
87ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a
97ec681f3Smrg# copy of this software and associated documentation files (the "Software"),
107ec681f3Smrg# to deal in the Software without restriction, including without limitation
117ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense,
127ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the
137ec681f3Smrg# Software is furnished to do so, subject to the following conditions:
147ec681f3Smrg#
157ec681f3Smrg# The above copyright notice and this permission notice (including the next
167ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the
177ec681f3Smrg# Software.
187ec681f3Smrg#
197ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
207ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
217ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
227ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
237ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
247ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
257ec681f3Smrg# IN THE SOFTWARE.
267ec681f3Smrg
277ec681f3Smrgimport argparse
287ec681f3Smrgimport sys
297ec681f3Smrgimport math
307ec681f3Smrg
317ec681f3Smrga = 'a'
327ec681f3Smrg
337ec681f3Smrg# The nir_lower_bit_size() pass gets rid of all 8bit ALUs but insert new u2u8
347ec681f3Smrg# and i2i8 operations to convert the result back to the original type after the
357ec681f3Smrg# arithmetic operation is done. Those u2u8 and i2i8 operations, as any other
367ec681f3Smrg# 8bit operations, are not supported by DXIL and needs to be discarded. The
377ec681f3Smrg# dxil_nir_lower_8bit_conv() pass is here for that.
387ec681f3Smrg# Similarly, some hardware doesn't support 16bit values
397ec681f3Smrg
407ec681f3Smrgno_8bit_conv = []
417ec681f3Smrgno_16bit_conv = []
427ec681f3Smrg
437ec681f3Smrgdef remove_unsupported_casts(arr, bit_size, mask, max_unsigned_float, min_signed_float, max_signed_float):
447ec681f3Smrg    for outer_op_type in ('u2u', 'i2i', 'u2f', 'i2f'):
457ec681f3Smrg        for outer_op_sz in (16, 32, 64):
467ec681f3Smrg            if outer_op_sz == bit_size:
477ec681f3Smrg                continue
487ec681f3Smrg            outer_op = outer_op_type + str(int(outer_op_sz))
497ec681f3Smrg            for inner_op_type in ('u2u', 'i2i'):
507ec681f3Smrg                inner_op = inner_op_type + str(int(bit_size))
517ec681f3Smrg                for src_sz in (16, 32, 64):
527ec681f3Smrg                    if (src_sz == bit_size):
537ec681f3Smrg                        continue
547ec681f3Smrg                    # Coming from integral, truncate appropriately
557ec681f3Smrg                    orig_seq = (outer_op, (inner_op, 'a@' + str(int(src_sz))))
567ec681f3Smrg                    if (outer_op[0] == 'u'):
577ec681f3Smrg                        new_seq = ('iand', a, mask)
587ec681f3Smrg                    else:
597ec681f3Smrg                        shift = src_sz - bit_size
607ec681f3Smrg                        new_seq = ('ishr', ('ishl', a, shift), shift)
617ec681f3Smrg                    # Make sure the destination is the right type/size
627ec681f3Smrg                    if outer_op_sz != src_sz or outer_op[2] != inner_op[0]:
637ec681f3Smrg                        new_seq = (outer_op, new_seq)
647ec681f3Smrg                    arr += [(orig_seq, new_seq)]
657ec681f3Smrg            for inner_op_type in ('f2u', 'f2i'):
667ec681f3Smrg                inner_op = inner_op_type + str(int(bit_size))
677ec681f3Smrg                if (outer_op[2] == 'f'):
687ec681f3Smrg                    # From float and to float, just truncate via min/max, and ensure the right float size
697ec681f3Smrg                    for src_sz in (16, 32, 64):
707ec681f3Smrg                        if (src_sz == bit_size):
717ec681f3Smrg                            continue
727ec681f3Smrg                        orig_seq = (outer_op, (inner_op, 'a@' + str(int(src_sz))))
737ec681f3Smrg                        if (outer_op[0] == 'u'):
747ec681f3Smrg                            new_seq = ('fmin', ('fmax', a, 0.0), max_unsigned_float)
757ec681f3Smrg                        else:
767ec681f3Smrg                            new_seq = ('fmin', ('fmax', a, min_signed_float), max_signed_float)
777ec681f3Smrg                        if outer_op_sz != src_sz:
787ec681f3Smrg                            new_seq = ('f2f' + str(int(outer_op_sz)), new_seq)
797ec681f3Smrg                        arr += [(orig_seq, new_seq)]
807ec681f3Smrg                else:
817ec681f3Smrg                    # From float to integral, convert to integral type first, then truncate
827ec681f3Smrg                    orig_seq = (outer_op, (inner_op, a))
837ec681f3Smrg                    float_conv = ('f2' + inner_op[2] + str(int(outer_op_sz)), a)
847ec681f3Smrg                    if (outer_op[0] == 'u'):
857ec681f3Smrg                        new_seq = ('iand', float_conv, mask)
867ec681f3Smrg                    else:
877ec681f3Smrg                        shift = outer_op_sz - bit_size
887ec681f3Smrg                        new_seq = ('ishr', ('ishl', float_conv, shift), shift)
897ec681f3Smrg                    arr += [(orig_seq, new_seq)]
907ec681f3Smrg
917ec681f3Smrgremove_unsupported_casts(no_8bit_conv, 8, 0xff, 255.0, -128.0, 127.0)
927ec681f3Smrgremove_unsupported_casts(no_16bit_conv, 16, 0xffff, 65535.0, -32768.0, 32767.0)
937ec681f3Smrg
947ec681f3Smrglower_x2b = [
957ec681f3Smrg  (('b2b32', 'a'), ('b2i32', 'a')),
967ec681f3Smrg  (('b2b1', 'a'), ('i2b1', 'a')),
977ec681f3Smrg  (('i2b1', 'a'), ('ine', a, 0)),
987ec681f3Smrg  (('f2b1', 'a'), ('fneu', a, 0)),
997ec681f3Smrg]
1007ec681f3Smrg
1017ec681f3Smrgno_16bit_conv += [
1027ec681f3Smrg  (('f2f32', ('u2u16', 'a@32')), ('unpack_half_2x16_split_x', 'a')),
1037ec681f3Smrg  (('u2u32', ('f2f16_rtz', 'a@32')), ('pack_half_2x16_split', 'a', 0)),
1047ec681f3Smrg]
1057ec681f3Smrg
1067ec681f3Smrglower_inot = [
1077ec681f3Smrg    (('inot', a), ('ixor', a, -1)),
1087ec681f3Smrg]
1097ec681f3Smrg
1107ec681f3Smrgdef main():
1117ec681f3Smrg    parser = argparse.ArgumentParser()
1127ec681f3Smrg    parser.add_argument('-p', '--import-path', required=True)
1137ec681f3Smrg    args = parser.parse_args()
1147ec681f3Smrg    sys.path.insert(0, args.import_path)
1157ec681f3Smrg    run()
1167ec681f3Smrg
1177ec681f3Smrg
1187ec681f3Smrgdef run():
1197ec681f3Smrg    import nir_algebraic  # pylint: disable=import-error
1207ec681f3Smrg
1217ec681f3Smrg    print('#include "dxil_nir.h"')
1227ec681f3Smrg
1237ec681f3Smrg    print(nir_algebraic.AlgebraicPass("dxil_nir_lower_8bit_conv",
1247ec681f3Smrg                                      no_8bit_conv).render())
1257ec681f3Smrg    print(nir_algebraic.AlgebraicPass("dxil_nir_lower_16bit_conv",
1267ec681f3Smrg                                      no_16bit_conv).render())
1277ec681f3Smrg    print(nir_algebraic.AlgebraicPass("dxil_nir_lower_x2b",
1287ec681f3Smrg                                      lower_x2b).render())
1297ec681f3Smrg    print(nir_algebraic.AlgebraicPass("dxil_nir_lower_inot",
1307ec681f3Smrg                                      lower_inot).render())
1317ec681f3Smrg
1327ec681f3Smrgif __name__ == '__main__':
1337ec681f3Smrg    main()
134