1/*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "ir3_nir.h"
25#include "ir3_compiler.h"
26#include "compiler/nir/nir.h"
27#include "compiler/nir/nir_builder.h"
28#include "mesa/main/macros.h"
29
30static inline struct ir3_ubo_range
31get_ubo_load_range(nir_intrinsic_instr *instr)
32{
33	struct ir3_ubo_range r;
34
35	const int offset = nir_src_as_uint(instr->src[1]);
36	const int bytes = nir_intrinsic_dest_components(instr) * 4;
37
38	r.start = ROUND_DOWN_TO(offset, 16 * 4);
39	r.end = ALIGN(offset + bytes, 16 * 4);
40
41	return r;
42}
43
44static void
45gather_ubo_ranges(nir_intrinsic_instr *instr,
46				  struct ir3_ubo_analysis_state *state)
47{
48	if (!nir_src_is_const(instr->src[0]))
49		return;
50
51	if (!nir_src_is_const(instr->src[1]))
52		return;
53
54	const struct ir3_ubo_range r = get_ubo_load_range(instr);
55	const uint32_t block = nir_src_as_uint(instr->src[0]);
56
57	/* if UBO lowering is disabled, we still want to lower block 0
58	 * (which is normal uniforms):
59	 */
60	if ((block > 0) && (ir3_shader_debug & IR3_DBG_NOUBOOPT))
61		return;
62
63	if (r.start < state->range[block].start)
64		state->range[block].start = r.start;
65	if (state->range[block].end < r.end)
66		state->range[block].end = r.end;
67}
68
69static void
70lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
71						  struct ir3_ubo_analysis_state *state)
72{
73	/* We don't lower dynamic block index UBO loads to load_uniform, but we
74	 * could probably with some effort determine a block stride in number of
75	 * registers.
76	 */
77	if (!nir_src_is_const(instr->src[0]))
78		return;
79
80	const uint32_t block = nir_src_as_uint(instr->src[0]);
81
82	if (block > 0) {
83		/* We don't lower dynamic array indexing either, but we definitely should.
84		 * We don't have a good way of determining the range of the dynamic
85		 * access, so for now just fall back to pulling.
86		 */
87		if (!nir_src_is_const(instr->src[1]))
88			return;
89
90		/* After gathering the UBO access ranges, we limit the total
91		 * upload. Reject if we're now outside the range.
92		 */
93		const struct ir3_ubo_range r = get_ubo_load_range(instr);
94		if (!(state->range[block].start <= r.start &&
95			  r.end <= state->range[block].end))
96			return;
97	}
98
99	b->cursor = nir_before_instr(&instr->instr);
100
101	nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
102	nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
103	if (new_offset)
104		ubo_offset = new_offset;
105	else
106		ubo_offset = nir_ushr(b, ubo_offset, nir_imm_int(b, 2));
107
108	const int range_offset =
109		(state->range[block].offset - state->range[block].start) / 4;
110	nir_ssa_def *uniform_offset =
111		nir_iadd(b, ubo_offset, nir_imm_int(b, range_offset));
112
113	nir_intrinsic_instr *uniform =
114		nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
115	uniform->num_components = instr->num_components;
116	uniform->src[0] = nir_src_for_ssa(uniform_offset);
117	nir_ssa_dest_init(&uniform->instr, &uniform->dest,
118					  uniform->num_components, instr->dest.ssa.bit_size,
119					  instr->dest.ssa.name);
120	nir_builder_instr_insert(b, &uniform->instr);
121	nir_ssa_def_rewrite_uses(&instr->dest.ssa,
122							 nir_src_for_ssa(&uniform->dest.ssa));
123
124	nir_instr_remove(&instr->instr);
125
126	state->lower_count++;
127}
128
129bool
130ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
131{
132	struct ir3_ubo_analysis_state *state = &shader->ubo_state;
133
134	memset(state, 0, sizeof(*state));
135	state->range[0].end = align(nir->num_uniforms * 16, 16 * 4); /* align to 4*vec4 */
136
137	nir_foreach_function(function, nir) {
138		if (function->impl) {
139			nir_foreach_block(block, function->impl) {
140				nir_foreach_instr(instr, block) {
141					if (instr->type == nir_instr_type_intrinsic &&
142						nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo)
143						gather_ubo_ranges(nir_instr_as_intrinsic(instr), state);
144				}
145			}
146		}
147	}
148
149	/* For now, everything we upload is accessed statically and thus will be
150	 * used by the shader. Once we can upload dynamically indexed data, we may
151	 * upload sparsely accessed arrays, at which point we probably want to
152	 * give priority to smaller UBOs, on the assumption that big UBOs will be
153	 * accessed dynamically.  Alternatively, we can track statically and
154	 * dynamically accessed ranges separately and upload static rangtes
155	 * first.
156	 */
157	const uint32_t max_upload = 16 * 1024;
158	uint32_t offset = 0;
159	for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
160		uint32_t range_size = state->range[i].end - state->range[i].start;
161
162		debug_assert(offset <= max_upload);
163		state->range[i].offset = offset;
164		if (offset + range_size > max_upload) {
165			range_size = max_upload - offset;
166			state->range[i].end = state->range[i].start + range_size;
167		}
168		offset += range_size;
169	}
170	state->size = offset;
171
172	nir_foreach_function(function, nir) {
173		if (function->impl) {
174			nir_builder builder;
175			nir_builder_init(&builder, function->impl);
176			nir_foreach_block(block, function->impl) {
177				nir_foreach_instr_safe(instr, block) {
178					if (instr->type == nir_instr_type_intrinsic &&
179						nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo)
180						lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, state);
181				}
182			}
183
184			nir_metadata_preserve(function->impl, nir_metadata_block_index |
185								  nir_metadata_dominance);
186		}
187	}
188
189	return state->lower_count > 0;
190}
191