1/*
2 * Copyright © 2018-2019 Igalia S.L.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "ir3_nir.h"
25#include "compiler/nir/nir_builder.h"
26
27/**
28 * This pass moves to NIR certain offset computations for different I/O
29 * ops that are currently implemented on the IR3 backend compiler, to
30 * give NIR a chance to optimize them:
31 *
32 * - Dword-offset for SSBO load, store and atomics: A new, similar intrinsic
33 *   is emitted that replaces the original one, adding a new source that
34 *   holds the result of the original byte-offset source divided by 4.
35 */
36
37
38/* Returns the ir3-specific intrinsic opcode corresponding to an SSBO
39 * instruction that is handled by this pass. It also conveniently returns
40 * the offset source index in @offset_src_idx.
41 *
42 * If @intrinsic is not SSBO, or it is not handled by the pass, -1 is
43 * returned.
44 */
45static int
46get_ir3_intrinsic_for_ssbo_intrinsic(unsigned intrinsic,
47									 uint8_t *offset_src_idx)
48{
49	debug_assert(offset_src_idx);
50
51	*offset_src_idx = 1;
52
53	switch (intrinsic) {
54	case nir_intrinsic_store_ssbo:
55		*offset_src_idx = 2;
56		return nir_intrinsic_store_ssbo_ir3;
57	case nir_intrinsic_load_ssbo:
58		return nir_intrinsic_load_ssbo_ir3;
59	case nir_intrinsic_ssbo_atomic_add:
60		return nir_intrinsic_ssbo_atomic_add_ir3;
61	case nir_intrinsic_ssbo_atomic_imin:
62		return nir_intrinsic_ssbo_atomic_imin_ir3;
63	case nir_intrinsic_ssbo_atomic_umin:
64		return nir_intrinsic_ssbo_atomic_umin_ir3;
65	case nir_intrinsic_ssbo_atomic_imax:
66		return nir_intrinsic_ssbo_atomic_imax_ir3;
67	case nir_intrinsic_ssbo_atomic_umax:
68		return nir_intrinsic_ssbo_atomic_umax_ir3;
69	case nir_intrinsic_ssbo_atomic_and:
70		return nir_intrinsic_ssbo_atomic_and_ir3;
71	case nir_intrinsic_ssbo_atomic_or:
72		return nir_intrinsic_ssbo_atomic_or_ir3;
73	case nir_intrinsic_ssbo_atomic_xor:
74		return nir_intrinsic_ssbo_atomic_xor_ir3;
75	case nir_intrinsic_ssbo_atomic_exchange:
76		return nir_intrinsic_ssbo_atomic_exchange_ir3;
77	case nir_intrinsic_ssbo_atomic_comp_swap:
78		return nir_intrinsic_ssbo_atomic_comp_swap_ir3;
79	default:
80		break;
81	}
82
83	return -1;
84}
85
86static nir_ssa_def *
87check_and_propagate_bit_shift32(nir_builder *b, nir_ssa_def *offset,
88								nir_alu_instr *alu_instr, int32_t direction,
89								int32_t shift)
90{
91	debug_assert(alu_instr->src[1].src.is_ssa);
92	nir_ssa_def *shift_ssa = alu_instr->src[1].src.ssa;
93
94	/* Only propagate if the shift is a const value so we can check value range
95	 * statically.
96	 */
97	nir_const_value *const_val = nir_src_as_const_value(alu_instr->src[1].src);
98	if (!const_val)
99		return NULL;
100
101	int32_t current_shift = const_val[0].i32 * direction;
102	int32_t new_shift = current_shift + shift;
103
104	/* If the merge would reverse the direction, bail out.
105	 * e.g, 'x << 2' then 'x >> 4' is not 'x >> 2'.
106	 */
107	if (current_shift * new_shift < 0)
108		return NULL;
109
110	/* If the propagation would overflow an int32_t, bail out too to be on the
111	 * safe side.
112	 */
113	if (new_shift < -31 || new_shift > 31)
114		return NULL;
115
116	b->cursor = nir_before_instr(&alu_instr->instr);
117
118	/* Add or substract shift depending on the final direction (SHR vs. SHL). */
119	if (shift * direction < 0)
120		shift_ssa = nir_isub(b, shift_ssa, nir_imm_int(b, abs(shift)));
121	else
122		shift_ssa = nir_iadd(b, shift_ssa, nir_imm_int(b, abs(shift)));
123
124	return shift_ssa;
125}
126
127nir_ssa_def *
128ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift)
129{
130	nir_instr *offset_instr = offset->parent_instr;
131	if (offset_instr->type != nir_instr_type_alu)
132		return NULL;
133
134	nir_alu_instr *alu = nir_instr_as_alu(offset_instr);
135	nir_ssa_def *shift_ssa;
136	nir_ssa_def *new_offset = NULL;
137
138	switch (alu->op) {
139	case nir_op_ishl:
140		shift_ssa = check_and_propagate_bit_shift32(b, offset, alu, 1, shift);
141		if (shift_ssa)
142			new_offset = nir_ishl(b, alu->src[0].src.ssa, shift_ssa);
143		break;
144	case nir_op_ishr:
145		shift_ssa = check_and_propagate_bit_shift32(b, offset, alu, -1, shift);
146		if (shift_ssa)
147			new_offset = nir_ishr(b, alu->src[0].src.ssa, shift_ssa);
148		break;
149	case nir_op_ushr:
150		shift_ssa = check_and_propagate_bit_shift32(b, offset, alu, -1, shift);
151		if (shift_ssa)
152			new_offset = nir_ushr(b, alu->src[0].src.ssa, shift_ssa);
153		break;
154	default:
155		return NULL;
156	}
157
158	return new_offset;
159}
160
161static bool
162lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
163					  unsigned ir3_ssbo_opcode, uint8_t offset_src_idx)
164{
165	unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs;
166
167	bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest;
168	nir_ssa_def *new_dest = NULL;
169
170	/* Here we create a new intrinsic and copy over all contents from the old one. */
171
172	nir_intrinsic_instr *new_intrinsic;
173	nir_src *target_src;
174
175	/* 'offset_src_idx' holds the index of the source that represent the offset. */
176	new_intrinsic =
177		nir_intrinsic_instr_create(b->shader, ir3_ssbo_opcode);
178
179	debug_assert(intrinsic->src[offset_src_idx].is_ssa);
180	nir_ssa_def *offset = intrinsic->src[offset_src_idx].ssa;
181
182	/* Since we don't have value range checking, we first try to propagate
183	 * the division by 4 ('offset >> 2') into another bit-shift instruction that
184	 * possibly defines the offset. If that's the case, we emit a similar
185	 * instructions adjusting (merging) the shift value.
186	 *
187	 * Here we use the convention that shifting right is negative while shifting
188	 * left is positive. So 'x / 4' ~ 'x >> 2' or 'x << -2'.
189	 */
190	nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, offset, -2);
191
192	/* The new source that will hold the dword-offset is always the last
193	 * one for every intrinsic.
194	 */
195	target_src = &new_intrinsic->src[num_srcs];
196	*target_src = nir_src_for_ssa(offset);
197
198	if (has_dest) {
199		debug_assert(intrinsic->dest.is_ssa);
200		nir_ssa_def *dest = &intrinsic->dest.ssa;
201		nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest,
202						  dest->num_components, dest->bit_size, NULL);
203		new_dest = &new_intrinsic->dest.ssa;
204	}
205
206	for (unsigned i = 0; i < num_srcs; i++)
207		new_intrinsic->src[i] = nir_src_for_ssa(intrinsic->src[i].ssa);
208
209	for (unsigned i = 0; i < NIR_INTRINSIC_MAX_CONST_INDEX; i++)
210		new_intrinsic->const_index[i] = intrinsic->const_index[i];
211
212	new_intrinsic->num_components = intrinsic->num_components;
213
214	b->cursor = nir_before_instr(&intrinsic->instr);
215
216	/* If we managed to propagate the division by 4, just use the new offset
217	 * register and don't emit the SHR.
218	 */
219	if (new_offset)
220		offset = new_offset;
221	else
222		offset = nir_ushr(b, offset, nir_imm_int(b, 2));
223
224	/* Insert the new intrinsic right before the old one. */
225	nir_builder_instr_insert(b, &new_intrinsic->instr);
226
227	/* Replace the last source of the new intrinsic by the result of
228	 * the offset divided by 4.
229	 */
230	nir_instr_rewrite_src(&new_intrinsic->instr,
231						  target_src,
232						  nir_src_for_ssa(offset));
233
234	if (has_dest) {
235		/* Replace the uses of the original destination by that
236		 * of the new intrinsic.
237		 */
238		nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa,
239								 nir_src_for_ssa(new_dest));
240	}
241
242	/* Finally remove the original intrinsic. */
243	nir_instr_remove(&intrinsic->instr);
244
245	return true;
246}
247
248static bool
249lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
250{
251	bool progress = false;
252
253	nir_foreach_instr_safe(instr, block) {
254		if (instr->type != nir_instr_type_intrinsic)
255			continue;
256
257		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
258
259		/* SSBO */
260		int ir3_intrinsic;
261		uint8_t offset_src_idx;
262		ir3_intrinsic = get_ir3_intrinsic_for_ssbo_intrinsic(intr->intrinsic,
263															 &offset_src_idx);
264		if (ir3_intrinsic != -1) {
265			progress |= lower_offset_for_ssbo(intr, b, (unsigned) ir3_intrinsic,
266											  offset_src_idx);
267		}
268	}
269
270	return progress;
271}
272
273static bool
274lower_io_offsets_func(nir_function_impl *impl)
275{
276	void *mem_ctx = ralloc_parent(impl);
277	nir_builder b;
278	nir_builder_init(&b, impl);
279
280	bool progress = false;
281	nir_foreach_block_safe(block, impl) {
282		progress |= lower_io_offsets_block(block, &b, mem_ctx);
283	}
284
285	if (progress) {
286		nir_metadata_preserve(impl, nir_metadata_block_index |
287									nir_metadata_dominance);
288	}
289
290	return progress;
291}
292
293bool
294ir3_nir_lower_io_offsets(nir_shader *shader)
295{
296	bool progress = false;
297
298	nir_foreach_function(function, shader) {
299		if (function->impl)
300			progress |= lower_io_offsets_func(function->impl);
301	}
302
303	return progress;
304}
305