1/*
2 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "util/u_string.h"
28#include "util/u_memory.h"
29#include "util/u_format.h"
30
31#include "drm/freedreno_drmif.h"
32
33#include "ir3_shader.h"
34#include "ir3_compiler.h"
35#include "ir3_nir.h"
36
37int
38ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
39{
40	return glsl_count_attribute_slots(type, false);
41}
42
43static void
44delete_variant(struct ir3_shader_variant *v)
45{
46	if (v->ir)
47		ir3_destroy(v->ir);
48	if (v->bo)
49		fd_bo_del(v->bo);
50	if (v->immediates)
51		free(v->immediates);
52	free(v);
53}
54
55/* for vertex shader, the inputs are loaded into registers before the shader
56 * is executed, so max_regs from the shader instructions might not properly
57 * reflect the # of registers actually used, especially in case passthrough
58 * varyings.
59 *
60 * Likewise, for fragment shader, we can have some regs which are passed
61 * input values but never touched by the resulting shader (ie. as result
62 * of dead code elimination or simply because we don't know how to turn
63 * the reg off.
64 */
65static void
66fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
67{
68	unsigned i;
69
70	for (i = 0; i < v->inputs_count; i++) {
71		/* skip frag inputs fetch via bary.f since their reg's are
72		 * not written by gpu before shader starts (and in fact the
73		 * regid's might not even be valid)
74		 */
75		if (v->inputs[i].bary)
76			continue;
77
78		/* ignore high regs that are global to all threads in a warp
79		 * (they exist by default) (a5xx+)
80		 */
81		if (v->inputs[i].regid >= regid(48,0))
82			continue;
83
84		if (v->inputs[i].compmask) {
85			unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
86			int32_t regid = v->inputs[i].regid + n;
87			if (v->inputs[i].half) {
88				if (gpu_id < 500) {
89					v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
90				} else {
91					v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
92				}
93			} else {
94				v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
95			}
96		}
97	}
98
99	for (i = 0; i < v->outputs_count; i++) {
100		int32_t regid = v->outputs[i].regid + 3;
101		if (v->outputs[i].half) {
102			if (gpu_id < 500) {
103				v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
104			} else {
105				v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
106			}
107		} else {
108			v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
109		}
110	}
111}
112
113/* wrapper for ir3_assemble() which does some info fixup based on
114 * shader state.  Non-static since used by ir3_cmdline too.
115 */
116void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
117{
118	void *bin;
119
120	bin = ir3_assemble(v->ir, &v->info, gpu_id);
121	if (!bin)
122		return NULL;
123
124	if (gpu_id >= 400) {
125		v->instrlen = v->info.sizedwords / (2 * 16);
126	} else {
127		v->instrlen = v->info.sizedwords / (2 * 4);
128	}
129
130	/* NOTE: if relative addressing is used, we set constlen in
131	 * the compiler (to worst-case value) since we don't know in
132	 * the assembler what the max addr reg value can be:
133	 */
134	v->constlen = MAX2(v->constlen, v->info.max_const + 1);
135
136	fixup_regfootprint(v, gpu_id);
137
138	return bin;
139}
140
141static void
142assemble_variant(struct ir3_shader_variant *v)
143{
144	struct ir3_compiler *compiler = v->shader->compiler;
145	struct shader_info *info = &v->shader->nir->info;
146	uint32_t gpu_id = compiler->gpu_id;
147	uint32_t sz, *bin;
148
149	bin = ir3_shader_assemble(v, gpu_id);
150	sz = v->info.sizedwords * 4;
151
152	v->bo = fd_bo_new(compiler->dev, sz,
153			DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
154			DRM_FREEDRENO_GEM_TYPE_KMEM,
155			"%s:%s", ir3_shader_stage(v->shader), info->name);
156
157	memcpy(fd_bo_map(v->bo), bin, sz);
158
159	if (ir3_shader_debug & IR3_DBG_DISASM) {
160		struct ir3_shader_key key = v->key;
161		printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
162			v->binning_pass, key.color_two_side, key.half_precision);
163		ir3_shader_disasm(v, bin, stdout);
164	}
165
166	if (shader_debug_enabled(v->shader->type)) {
167		fprintf(stderr, "Native code for unnamed %s shader %s:\n",
168			_mesa_shader_stage_to_string(v->shader->type),
169			v->shader->nir->info.name);
170		if (v->shader->type == MESA_SHADER_FRAGMENT)
171			fprintf(stderr, "SIMD0\n");
172		ir3_shader_disasm(v, bin, stderr);
173	}
174
175	free(bin);
176
177	/* no need to keep the ir around beyond this point: */
178	ir3_destroy(v->ir);
179	v->ir = NULL;
180}
181
182static struct ir3_shader_variant *
183create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
184		bool binning_pass)
185{
186	struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
187	int ret;
188
189	if (!v)
190		return NULL;
191
192	v->id = ++shader->variant_count;
193	v->shader = shader;
194	v->binning_pass = binning_pass;
195	v->key = *key;
196	v->type = shader->type;
197
198	ret = ir3_compile_shader_nir(shader->compiler, v);
199	if (ret) {
200		debug_error("compile failed!");
201		goto fail;
202	}
203
204	assemble_variant(v);
205	if (!v->bo) {
206		debug_error("assemble failed!");
207		goto fail;
208	}
209
210	return v;
211
212fail:
213	delete_variant(v);
214	return NULL;
215}
216
217static inline struct ir3_shader_variant *
218shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
219		bool *created)
220{
221	struct ir3_shader_variant *v;
222
223	*created = false;
224
225	for (v = shader->variants; v; v = v->next)
226		if (ir3_shader_key_equal(key, &v->key))
227			return v;
228
229	/* compile new variant if it doesn't exist already: */
230	v = create_variant(shader, key, false);
231	if (v) {
232		v->next = shader->variants;
233		shader->variants = v;
234		*created = true;
235	}
236
237	return v;
238}
239
240struct ir3_shader_variant *
241ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
242		bool binning_pass, bool *created)
243{
244	struct ir3_shader_variant *v =
245			shader_variant(shader, key, created);
246
247	if (v && binning_pass) {
248		if (!v->binning)
249			v->binning = create_variant(shader, key, true);
250		return v->binning;
251	}
252
253	return v;
254}
255
256void
257ir3_shader_destroy(struct ir3_shader *shader)
258{
259	struct ir3_shader_variant *v, *t;
260	for (v = shader->variants; v; ) {
261		t = v;
262		v = v->next;
263		delete_variant(t);
264	}
265	ralloc_free(shader->nir);
266	free(shader);
267}
268
269struct ir3_shader *
270ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir)
271{
272	struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
273
274	shader->compiler = compiler;
275	shader->id = ++shader->compiler->shader_count;
276	shader->type = nir->info.stage;
277
278	NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size,
279			   (nir_lower_io_options)0);
280
281	if (nir->info.stage == MESA_SHADER_FRAGMENT) {
282		/* NOTE: lower load_barycentric_at_sample first, since it
283		 * produces load_barycentric_at_offset:
284		 */
285		NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
286		NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
287
288		NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
289	}
290
291	NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
292
293	/* do first pass optimization, ignoring the key: */
294	shader->nir = ir3_optimize_nir(shader, nir, NULL);
295	if (ir3_shader_debug & IR3_DBG_DISASM) {
296		printf("dump nir%d: type=%d", shader->id, shader->type);
297		nir_print_shader(shader->nir, stdout);
298	}
299
300	return shader;
301}
302
303static void dump_reg(FILE *out, const char *name, uint32_t r)
304{
305	if (r != regid(63,0))
306		fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
307}
308
309static void dump_output(FILE *out, struct ir3_shader_variant *so,
310		unsigned slot, const char *name)
311{
312	uint32_t regid;
313	regid = ir3_find_output_regid(so, slot);
314	dump_reg(out, name, regid);
315}
316
317void
318ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
319{
320	struct ir3 *ir = so->ir;
321	struct ir3_register *reg;
322	const char *type = ir3_shader_stage(so->shader);
323	uint8_t regid;
324	unsigned i;
325
326	for (i = 0; i < ir->ninputs; i++) {
327		if (!ir->inputs[i]) {
328			fprintf(out, "; in%d unused\n", i);
329			continue;
330		}
331		reg = ir->inputs[i]->regs[0];
332		regid = reg->num;
333		fprintf(out, "@in(%sr%d.%c)\tin%d\n",
334				(reg->flags & IR3_REG_HALF) ? "h" : "",
335				(regid >> 2), "xyzw"[regid & 0x3], i);
336	}
337
338	for (i = 0; i < ir->noutputs; i++) {
339		if (!ir->outputs[i]) {
340			fprintf(out, "; out%d unused\n", i);
341			continue;
342		}
343		/* kill shows up as a virtual output.. skip it! */
344		if (is_kill(ir->outputs[i]))
345			continue;
346		reg = ir->outputs[i]->regs[0];
347		regid = reg->num;
348		fprintf(out, "@out(%sr%d.%c)\tout%d\n",
349				(reg->flags & IR3_REG_HALF) ? "h" : "",
350				(regid >> 2), "xyzw"[regid & 0x3], i);
351	}
352
353	for (i = 0; i < so->immediates_count; i++) {
354		fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
355		fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
356				so->immediates[i].val[0],
357				so->immediates[i].val[1],
358				so->immediates[i].val[2],
359				so->immediates[i].val[3]);
360	}
361
362	disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id);
363
364	switch (so->type) {
365	case MESA_SHADER_VERTEX:
366		fprintf(out, "; %s: outputs:", type);
367		for (i = 0; i < so->outputs_count; i++) {
368			uint8_t regid = so->outputs[i].regid;
369			fprintf(out, " r%d.%c (%s)",
370					(regid >> 2), "xyzw"[regid & 0x3],
371					gl_varying_slot_name(so->outputs[i].slot));
372		}
373		fprintf(out, "\n");
374		fprintf(out, "; %s: inputs:", type);
375		for (i = 0; i < so->inputs_count; i++) {
376			uint8_t regid = so->inputs[i].regid;
377			fprintf(out, " r%d.%c (cm=%x,il=%u,b=%u)",
378					(regid >> 2), "xyzw"[regid & 0x3],
379					so->inputs[i].compmask,
380					so->inputs[i].inloc,
381					so->inputs[i].bary);
382		}
383		fprintf(out, "\n");
384		break;
385	case MESA_SHADER_FRAGMENT:
386		fprintf(out, "; %s: outputs:", type);
387		for (i = 0; i < so->outputs_count; i++) {
388			uint8_t regid = so->outputs[i].regid;
389			fprintf(out, " r%d.%c (%s)",
390					(regid >> 2), "xyzw"[regid & 0x3],
391					gl_frag_result_name(so->outputs[i].slot));
392		}
393		fprintf(out, "\n");
394		fprintf(out, "; %s: inputs:", type);
395		for (i = 0; i < so->inputs_count; i++) {
396			uint8_t regid = so->inputs[i].regid;
397			fprintf(out, " r%d.%c (%s,cm=%x,il=%u,b=%u)",
398					(regid >> 2), "xyzw"[regid & 0x3],
399					gl_varying_slot_name(so->inputs[i].slot),
400					so->inputs[i].compmask,
401					so->inputs[i].inloc,
402					so->inputs[i].bary);
403		}
404		fprintf(out, "\n");
405		break;
406	default:
407		/* TODO */
408		break;
409	}
410
411	/* print generic shader info: */
412	fprintf(out, "; %s prog %d/%d: %u instructions, %d half, %d full\n",
413			type, so->shader->id, so->id,
414			so->info.instrs_count,
415			so->info.max_half_reg + 1,
416			so->info.max_reg + 1);
417
418	fprintf(out, "; %d const, %u constlen\n",
419			so->info.max_const + 1,
420			so->constlen);
421
422	fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy);
423
424	fprintf(out, "; max_sun=%u\n", ir->max_sun);
425
426	/* print shader type specific info: */
427	switch (so->type) {
428	case MESA_SHADER_VERTEX:
429		dump_output(out, so, VARYING_SLOT_POS, "pos");
430		dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
431		break;
432	case MESA_SHADER_FRAGMENT:
433		dump_reg(out, "pos (ij_pixel)",
434			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PIXEL));
435		dump_reg(out, "pos (ij_centroid)",
436			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_CENTROID));
437		dump_reg(out, "pos (ij_size)",
438			ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_SIZE));
439		dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
440		if (so->color0_mrt) {
441			dump_output(out, so, FRAG_RESULT_COLOR, "color");
442		} else {
443			dump_output(out, so, FRAG_RESULT_DATA0, "data0");
444			dump_output(out, so, FRAG_RESULT_DATA1, "data1");
445			dump_output(out, so, FRAG_RESULT_DATA2, "data2");
446			dump_output(out, so, FRAG_RESULT_DATA3, "data3");
447			dump_output(out, so, FRAG_RESULT_DATA4, "data4");
448			dump_output(out, so, FRAG_RESULT_DATA5, "data5");
449			dump_output(out, so, FRAG_RESULT_DATA6, "data6");
450			dump_output(out, so, FRAG_RESULT_DATA7, "data7");
451		}
452		/* these two are hard-coded since we don't know how to
453		 * program them to anything but all 0's...
454		 */
455		if (so->frag_coord)
456			fprintf(out, "; fragcoord: r0.x\n");
457		if (so->frag_face)
458			fprintf(out, "; fragface: hr0.x\n");
459		break;
460	default:
461		/* TODO */
462		break;
463	}
464
465	fprintf(out, "\n");
466}
467
468uint64_t
469ir3_shader_outputs(const struct ir3_shader *so)
470{
471	return so->nir->info.outputs_written;
472}
473