101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg#include "nir.h" 2601e04c3fSmrg#include "nir_builder.h" 2701e04c3fSmrg 2801e04c3fSmrg/* 2901e04c3fSmrg * lowers: 3001e04c3fSmrg * 3101e04c3fSmrg * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y) 3201e04c3fSmrg * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo), unpackDouble2x32_y(foo)) 3301e04c3fSmrg * packInt2x32(foo) -> packInt2x32Split(foo.x, foo.y) 3401e04c3fSmrg * unpackInt2x32(foo) -> vec2(unpackInt2x32_x(foo), unpackInt2x32_y(foo)) 3501e04c3fSmrg */ 3601e04c3fSmrg 3701e04c3fSmrgstatic nir_ssa_def * 3801e04c3fSmrglower_pack_64_from_32(nir_builder *b, nir_ssa_def *src) 3901e04c3fSmrg{ 4001e04c3fSmrg return nir_pack_64_2x32_split(b, nir_channel(b, src, 0), 4101e04c3fSmrg nir_channel(b, src, 1)); 4201e04c3fSmrg} 4301e04c3fSmrg 4401e04c3fSmrgstatic nir_ssa_def * 4501e04c3fSmrglower_unpack_64_to_32(nir_builder *b, nir_ssa_def *src) 4601e04c3fSmrg{ 4701e04c3fSmrg return nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), 4801e04c3fSmrg nir_unpack_64_2x32_split_y(b, src)); 4901e04c3fSmrg} 5001e04c3fSmrg 5101e04c3fSmrgstatic nir_ssa_def * 5201e04c3fSmrglower_pack_32_from_16(nir_builder *b, nir_ssa_def *src) 5301e04c3fSmrg{ 5401e04c3fSmrg return nir_pack_32_2x16_split(b, nir_channel(b, src, 0), 5501e04c3fSmrg nir_channel(b, src, 1)); 5601e04c3fSmrg} 5701e04c3fSmrg 5801e04c3fSmrgstatic nir_ssa_def * 5901e04c3fSmrglower_unpack_32_to_16(nir_builder *b, nir_ssa_def *src) 6001e04c3fSmrg{ 6101e04c3fSmrg return nir_vec2(b, nir_unpack_32_2x16_split_x(b, src), 6201e04c3fSmrg nir_unpack_32_2x16_split_y(b, src)); 6301e04c3fSmrg} 6401e04c3fSmrg 6501e04c3fSmrgstatic nir_ssa_def * 6601e04c3fSmrglower_pack_64_from_16(nir_builder *b, nir_ssa_def *src) 6701e04c3fSmrg{ 6801e04c3fSmrg nir_ssa_def *xy = nir_pack_32_2x16_split(b, nir_channel(b, src, 0), 6901e04c3fSmrg nir_channel(b, src, 1)); 7001e04c3fSmrg 7101e04c3fSmrg nir_ssa_def *zw = nir_pack_32_2x16_split(b, nir_channel(b, src, 2), 7201e04c3fSmrg nir_channel(b, src, 3)); 7301e04c3fSmrg 7401e04c3fSmrg return nir_pack_64_2x32_split(b, xy, zw); 7501e04c3fSmrg} 7601e04c3fSmrg 7701e04c3fSmrgstatic nir_ssa_def * 7801e04c3fSmrglower_unpack_64_to_16(nir_builder *b, nir_ssa_def *src) 7901e04c3fSmrg{ 8001e04c3fSmrg nir_ssa_def *xy = nir_unpack_64_2x32_split_x(b, src); 8101e04c3fSmrg nir_ssa_def *zw = nir_unpack_64_2x32_split_y(b, src); 8201e04c3fSmrg 8301e04c3fSmrg return nir_vec4(b, nir_unpack_32_2x16_split_x(b, xy), 8401e04c3fSmrg nir_unpack_32_2x16_split_y(b, xy), 8501e04c3fSmrg nir_unpack_32_2x16_split_x(b, zw), 8601e04c3fSmrg nir_unpack_32_2x16_split_y(b, zw)); 8701e04c3fSmrg} 8801e04c3fSmrg 897ec681f3Smrgstatic nir_ssa_def * 907ec681f3Smrglower_pack_32_from_8(nir_builder *b, nir_ssa_def *src) 917ec681f3Smrg{ 927ec681f3Smrg return nir_pack_32_4x8_split(b, nir_channel(b, src, 0), 937ec681f3Smrg nir_channel(b, src, 1), 947ec681f3Smrg nir_channel(b, src, 2), 957ec681f3Smrg nir_channel(b, src, 3)); 967ec681f3Smrg} 977ec681f3Smrg 9801e04c3fSmrgstatic bool 997ec681f3Smrglower_pack_instr(nir_builder *b, nir_instr *instr, void *data) 10001e04c3fSmrg{ 1017ec681f3Smrg if (instr->type != nir_instr_type_alu) 1027ec681f3Smrg return false; 1037ec681f3Smrg 1047ec681f3Smrg nir_alu_instr *alu_instr = (nir_alu_instr *) instr; 1057ec681f3Smrg 1067ec681f3Smrg if (alu_instr->op != nir_op_pack_64_2x32 && 1077ec681f3Smrg alu_instr->op != nir_op_unpack_64_2x32 && 1087ec681f3Smrg alu_instr->op != nir_op_pack_64_4x16 && 1097ec681f3Smrg alu_instr->op != nir_op_unpack_64_4x16 && 1107ec681f3Smrg alu_instr->op != nir_op_pack_32_2x16 && 1117ec681f3Smrg alu_instr->op != nir_op_unpack_32_2x16 && 1127ec681f3Smrg alu_instr->op != nir_op_pack_32_4x8) 1137ec681f3Smrg return false; 1147ec681f3Smrg 1157ec681f3Smrg b->cursor = nir_before_instr(&alu_instr->instr); 1167ec681f3Smrg 1177ec681f3Smrg nir_ssa_def *src = nir_ssa_for_alu_src(b, alu_instr, 0); 1187ec681f3Smrg nir_ssa_def *dest; 1197ec681f3Smrg 1207ec681f3Smrg switch (alu_instr->op) { 1217ec681f3Smrg case nir_op_pack_64_2x32: 1227ec681f3Smrg dest = lower_pack_64_from_32(b, src); 1237ec681f3Smrg break; 1247ec681f3Smrg case nir_op_unpack_64_2x32: 1257ec681f3Smrg dest = lower_unpack_64_to_32(b, src); 1267ec681f3Smrg break; 1277ec681f3Smrg case nir_op_pack_64_4x16: 1287ec681f3Smrg dest = lower_pack_64_from_16(b, src); 1297ec681f3Smrg break; 1307ec681f3Smrg case nir_op_unpack_64_4x16: 1317ec681f3Smrg dest = lower_unpack_64_to_16(b, src); 1327ec681f3Smrg break; 1337ec681f3Smrg case nir_op_pack_32_2x16: 1347ec681f3Smrg dest = lower_pack_32_from_16(b, src); 1357ec681f3Smrg break; 1367ec681f3Smrg case nir_op_unpack_32_2x16: 1377ec681f3Smrg dest = lower_unpack_32_to_16(b, src); 1387ec681f3Smrg break; 1397ec681f3Smrg case nir_op_pack_32_4x8: 1407ec681f3Smrg dest = lower_pack_32_from_8(b, src); 1417ec681f3Smrg break; 1427ec681f3Smrg default: 1437ec681f3Smrg unreachable("Impossible opcode"); 14401e04c3fSmrg } 1457ec681f3Smrg nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, dest); 1467ec681f3Smrg nir_instr_remove(&alu_instr->instr); 14701e04c3fSmrg 1487ec681f3Smrg return true; 14901e04c3fSmrg} 15001e04c3fSmrg 15101e04c3fSmrgbool 15201e04c3fSmrgnir_lower_pack(nir_shader *shader) 15301e04c3fSmrg{ 1547ec681f3Smrg return nir_shader_instructions_pass(shader, lower_pack_instr, 1557ec681f3Smrg nir_metadata_block_index | nir_metadata_dominance, NULL); 15601e04c3fSmrg} 157