101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2015 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg#include "nir.h"
2601e04c3fSmrg#include "nir_builder.h"
2701e04c3fSmrg
2801e04c3fSmrg/*
2901e04c3fSmrg * lowers:
3001e04c3fSmrg *
3101e04c3fSmrg * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y)
3201e04c3fSmrg * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo), unpackDouble2x32_y(foo))
3301e04c3fSmrg * packInt2x32(foo) -> packInt2x32Split(foo.x, foo.y)
3401e04c3fSmrg * unpackInt2x32(foo) -> vec2(unpackInt2x32_x(foo), unpackInt2x32_y(foo))
3501e04c3fSmrg */
3601e04c3fSmrg
3701e04c3fSmrgstatic nir_ssa_def *
3801e04c3fSmrglower_pack_64_from_32(nir_builder *b, nir_ssa_def *src)
3901e04c3fSmrg{
4001e04c3fSmrg   return nir_pack_64_2x32_split(b, nir_channel(b, src, 0),
4101e04c3fSmrg                                    nir_channel(b, src, 1));
4201e04c3fSmrg}
4301e04c3fSmrg
4401e04c3fSmrgstatic nir_ssa_def *
4501e04c3fSmrglower_unpack_64_to_32(nir_builder *b, nir_ssa_def *src)
4601e04c3fSmrg{
4701e04c3fSmrg   return nir_vec2(b, nir_unpack_64_2x32_split_x(b, src),
4801e04c3fSmrg                      nir_unpack_64_2x32_split_y(b, src));
4901e04c3fSmrg}
5001e04c3fSmrg
5101e04c3fSmrgstatic nir_ssa_def *
5201e04c3fSmrglower_pack_32_from_16(nir_builder *b, nir_ssa_def *src)
5301e04c3fSmrg{
5401e04c3fSmrg   return nir_pack_32_2x16_split(b, nir_channel(b, src, 0),
5501e04c3fSmrg                                    nir_channel(b, src, 1));
5601e04c3fSmrg}
5701e04c3fSmrg
5801e04c3fSmrgstatic nir_ssa_def *
5901e04c3fSmrglower_unpack_32_to_16(nir_builder *b, nir_ssa_def *src)
6001e04c3fSmrg{
6101e04c3fSmrg   return nir_vec2(b, nir_unpack_32_2x16_split_x(b, src),
6201e04c3fSmrg                      nir_unpack_32_2x16_split_y(b, src));
6301e04c3fSmrg}
6401e04c3fSmrg
6501e04c3fSmrgstatic nir_ssa_def *
6601e04c3fSmrglower_pack_64_from_16(nir_builder *b, nir_ssa_def *src)
6701e04c3fSmrg{
6801e04c3fSmrg   nir_ssa_def *xy = nir_pack_32_2x16_split(b, nir_channel(b, src, 0),
6901e04c3fSmrg                                               nir_channel(b, src, 1));
7001e04c3fSmrg
7101e04c3fSmrg   nir_ssa_def *zw = nir_pack_32_2x16_split(b, nir_channel(b, src, 2),
7201e04c3fSmrg                                               nir_channel(b, src, 3));
7301e04c3fSmrg
7401e04c3fSmrg   return nir_pack_64_2x32_split(b, xy, zw);
7501e04c3fSmrg}
7601e04c3fSmrg
7701e04c3fSmrgstatic nir_ssa_def *
7801e04c3fSmrglower_unpack_64_to_16(nir_builder *b, nir_ssa_def *src)
7901e04c3fSmrg{
8001e04c3fSmrg   nir_ssa_def *xy = nir_unpack_64_2x32_split_x(b, src);
8101e04c3fSmrg   nir_ssa_def *zw = nir_unpack_64_2x32_split_y(b, src);
8201e04c3fSmrg
8301e04c3fSmrg   return nir_vec4(b, nir_unpack_32_2x16_split_x(b, xy),
8401e04c3fSmrg                      nir_unpack_32_2x16_split_y(b, xy),
8501e04c3fSmrg                      nir_unpack_32_2x16_split_x(b, zw),
8601e04c3fSmrg                      nir_unpack_32_2x16_split_y(b, zw));
8701e04c3fSmrg}
8801e04c3fSmrg
897ec681f3Smrgstatic nir_ssa_def *
907ec681f3Smrglower_pack_32_from_8(nir_builder *b, nir_ssa_def *src)
917ec681f3Smrg{
927ec681f3Smrg   return nir_pack_32_4x8_split(b, nir_channel(b, src, 0),
937ec681f3Smrg                                   nir_channel(b, src, 1),
947ec681f3Smrg                                   nir_channel(b, src, 2),
957ec681f3Smrg                                   nir_channel(b, src, 3));
967ec681f3Smrg}
977ec681f3Smrg
9801e04c3fSmrgstatic bool
997ec681f3Smrglower_pack_instr(nir_builder *b, nir_instr *instr, void *data)
10001e04c3fSmrg{
1017ec681f3Smrg   if (instr->type != nir_instr_type_alu)
1027ec681f3Smrg      return false;
1037ec681f3Smrg
1047ec681f3Smrg   nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1057ec681f3Smrg
1067ec681f3Smrg   if (alu_instr->op != nir_op_pack_64_2x32 &&
1077ec681f3Smrg       alu_instr->op != nir_op_unpack_64_2x32 &&
1087ec681f3Smrg       alu_instr->op != nir_op_pack_64_4x16 &&
1097ec681f3Smrg       alu_instr->op != nir_op_unpack_64_4x16 &&
1107ec681f3Smrg       alu_instr->op != nir_op_pack_32_2x16 &&
1117ec681f3Smrg       alu_instr->op != nir_op_unpack_32_2x16 &&
1127ec681f3Smrg       alu_instr->op != nir_op_pack_32_4x8)
1137ec681f3Smrg      return false;
1147ec681f3Smrg
1157ec681f3Smrg   b->cursor = nir_before_instr(&alu_instr->instr);
1167ec681f3Smrg
1177ec681f3Smrg   nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1187ec681f3Smrg   nir_ssa_def *dest;
1197ec681f3Smrg
1207ec681f3Smrg   switch (alu_instr->op) {
1217ec681f3Smrg   case nir_op_pack_64_2x32:
1227ec681f3Smrg      dest = lower_pack_64_from_32(b, src);
1237ec681f3Smrg      break;
1247ec681f3Smrg   case nir_op_unpack_64_2x32:
1257ec681f3Smrg      dest = lower_unpack_64_to_32(b, src);
1267ec681f3Smrg      break;
1277ec681f3Smrg   case nir_op_pack_64_4x16:
1287ec681f3Smrg      dest = lower_pack_64_from_16(b, src);
1297ec681f3Smrg      break;
1307ec681f3Smrg   case nir_op_unpack_64_4x16:
1317ec681f3Smrg      dest = lower_unpack_64_to_16(b, src);
1327ec681f3Smrg      break;
1337ec681f3Smrg   case nir_op_pack_32_2x16:
1347ec681f3Smrg      dest = lower_pack_32_from_16(b, src);
1357ec681f3Smrg      break;
1367ec681f3Smrg   case nir_op_unpack_32_2x16:
1377ec681f3Smrg      dest = lower_unpack_32_to_16(b, src);
1387ec681f3Smrg      break;
1397ec681f3Smrg   case nir_op_pack_32_4x8:
1407ec681f3Smrg      dest = lower_pack_32_from_8(b, src);
1417ec681f3Smrg      break;
1427ec681f3Smrg   default:
1437ec681f3Smrg      unreachable("Impossible opcode");
14401e04c3fSmrg   }
1457ec681f3Smrg   nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest);
1467ec681f3Smrg   nir_instr_remove(&alu_instr->instr);
14701e04c3fSmrg
1487ec681f3Smrg   return true;
14901e04c3fSmrg}
15001e04c3fSmrg
15101e04c3fSmrgbool
15201e04c3fSmrgnir_lower_pack(nir_shader *shader)
15301e04c3fSmrg{
1547ec681f3Smrg   return nir_shader_instructions_pass(shader, lower_pack_instr,
1557ec681f3Smrg         nir_metadata_block_index | nir_metadata_dominance, NULL);
15601e04c3fSmrg}
157