1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "nir.h" 26#include "nir_builder.h" 27 28/* 29 * lowers: 30 * 31 * packDouble2x32(foo) -> packDouble2x32Split(foo.x, foo.y) 32 * unpackDouble2x32(foo) -> vec2(unpackDouble2x32_x(foo), unpackDouble2x32_y(foo)) 33 * packInt2x32(foo) -> packInt2x32Split(foo.x, foo.y) 34 * unpackInt2x32(foo) -> vec2(unpackInt2x32_x(foo), unpackInt2x32_y(foo)) 35 */ 36 37static nir_ssa_def * 38lower_pack_64_from_32(nir_builder *b, nir_ssa_def *src) 39{ 40 return nir_pack_64_2x32_split(b, nir_channel(b, src, 0), 41 nir_channel(b, src, 1)); 42} 43 44static nir_ssa_def * 45lower_unpack_64_to_32(nir_builder *b, nir_ssa_def *src) 46{ 47 return nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), 48 nir_unpack_64_2x32_split_y(b, src)); 49} 50 51static nir_ssa_def * 52lower_pack_32_from_16(nir_builder *b, nir_ssa_def *src) 53{ 54 return nir_pack_32_2x16_split(b, nir_channel(b, src, 0), 55 nir_channel(b, src, 1)); 56} 57 58static nir_ssa_def * 59lower_unpack_32_to_16(nir_builder *b, nir_ssa_def *src) 60{ 61 return nir_vec2(b, nir_unpack_32_2x16_split_x(b, src), 62 nir_unpack_32_2x16_split_y(b, src)); 63} 64 65static nir_ssa_def * 66lower_pack_64_from_16(nir_builder *b, nir_ssa_def *src) 67{ 68 nir_ssa_def *xy = nir_pack_32_2x16_split(b, nir_channel(b, src, 0), 69 nir_channel(b, src, 1)); 70 71 nir_ssa_def *zw = nir_pack_32_2x16_split(b, nir_channel(b, src, 2), 72 nir_channel(b, src, 3)); 73 74 return nir_pack_64_2x32_split(b, xy, zw); 75} 76 77static nir_ssa_def * 78lower_unpack_64_to_16(nir_builder *b, nir_ssa_def *src) 79{ 80 nir_ssa_def *xy = nir_unpack_64_2x32_split_x(b, src); 81 nir_ssa_def *zw = nir_unpack_64_2x32_split_y(b, src); 82 83 return nir_vec4(b, nir_unpack_32_2x16_split_x(b, xy), 84 nir_unpack_32_2x16_split_y(b, xy), 85 nir_unpack_32_2x16_split_x(b, zw), 86 nir_unpack_32_2x16_split_y(b, zw)); 87} 88 89static bool 90lower_pack_impl(nir_function_impl *impl) 91{ 92 nir_builder b; 93 nir_builder_init(&b, impl); 94 bool progress = false; 95 96 nir_foreach_block(block, impl) { 97 nir_foreach_instr_safe(instr, block) { 98 if (instr->type != nir_instr_type_alu) 99 continue; 100 101 nir_alu_instr *alu_instr = (nir_alu_instr *) instr; 102 103 if (alu_instr->op != nir_op_pack_64_2x32 && 104 alu_instr->op != nir_op_unpack_64_2x32 && 105 alu_instr->op != nir_op_pack_64_4x16 && 106 alu_instr->op != nir_op_unpack_64_4x16 && 107 alu_instr->op != nir_op_pack_32_2x16 && 108 alu_instr->op != nir_op_unpack_32_2x16) 109 continue; 110 111 b.cursor = nir_before_instr(&alu_instr->instr); 112 113 nir_ssa_def *src = nir_ssa_for_alu_src(&b, alu_instr, 0); 114 nir_ssa_def *dest; 115 116 switch (alu_instr->op) { 117 case nir_op_pack_64_2x32: 118 dest = lower_pack_64_from_32(&b, src); 119 break; 120 case nir_op_unpack_64_2x32: 121 dest = lower_unpack_64_to_32(&b, src); 122 break; 123 case nir_op_pack_64_4x16: 124 dest = lower_pack_64_from_16(&b, src); 125 break; 126 case nir_op_unpack_64_4x16: 127 dest = lower_unpack_64_to_16(&b, src); 128 break; 129 case nir_op_pack_32_2x16: 130 dest = lower_pack_32_from_16(&b, src); 131 break; 132 case nir_op_unpack_32_2x16: 133 dest = lower_unpack_32_to_16(&b, src); 134 break; 135 default: 136 unreachable("Impossible opcode"); 137 } 138 139 nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, nir_src_for_ssa(dest)); 140 nir_instr_remove(&alu_instr->instr); 141 nir_metadata_preserve(impl, nir_metadata_block_index | 142 nir_metadata_dominance); 143 progress = true; 144 } 145 } 146 147 return progress; 148} 149 150bool 151nir_lower_pack(nir_shader *shader) 152{ 153 bool progress = false; 154 155 nir_foreach_function(function, shader) { 156 if (function->impl) 157 progress |= lower_pack_impl(function->impl); 158 } 159 160 return false; 161} 162