1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#include "si_shader_internal.h"
26#include "si_pipe.h"
27#include "ac_llvm_util.h"
28#include "util/u_memory.h"
29
30enum si_llvm_calling_convention {
31	RADEON_LLVM_AMDGPU_VS = 87,
32	RADEON_LLVM_AMDGPU_GS = 88,
33	RADEON_LLVM_AMDGPU_PS = 89,
34	RADEON_LLVM_AMDGPU_CS = 90,
35	RADEON_LLVM_AMDGPU_HS = 93,
36};
37
38struct si_llvm_diagnostics {
39	struct pipe_debug_callback *debug;
40	unsigned retval;
41};
42
43static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
44{
45	struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
46	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
47	char *description = LLVMGetDiagInfoDescription(di);
48	const char *severity_str = NULL;
49
50	switch (severity) {
51	case LLVMDSError:
52		severity_str = "error";
53		break;
54	case LLVMDSWarning:
55		severity_str = "warning";
56		break;
57	case LLVMDSRemark:
58		severity_str = "remark";
59		break;
60	case LLVMDSNote:
61		severity_str = "note";
62		break;
63	default:
64		severity_str = "unknown";
65	}
66
67	pipe_debug_message(diag->debug, SHADER_INFO,
68			   "LLVM diagnostic (%s): %s", severity_str, description);
69
70	if (severity == LLVMDSError) {
71		diag->retval = 1;
72		fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
73	}
74
75	LLVMDisposeMessage(description);
76}
77
78/**
79 * Compile an LLVM module to machine code.
80 *
81 * @returns 0 for success, 1 for failure
82 */
83unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
84			 struct ac_llvm_compiler *compiler,
85			 struct pipe_debug_callback *debug,
86			 bool less_optimized)
87{
88	struct ac_compiler_passes *passes =
89		less_optimized && compiler->low_opt_passes ?
90			compiler->low_opt_passes : compiler->passes;
91	struct si_llvm_diagnostics diag;
92	LLVMContextRef llvm_ctx;
93
94	diag.debug = debug;
95	diag.retval = 0;
96
97	/* Setup Diagnostic Handler*/
98	llvm_ctx = LLVMGetModuleContext(M);
99
100	LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
101
102	/* Compile IR. */
103	if (!ac_compile_module_to_binary(passes, M, binary))
104		diag.retval = 1;
105
106	if (diag.retval != 0)
107		pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
108	return diag.retval;
109}
110
111LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
112			  enum tgsi_opcode_type type)
113{
114	struct si_shader_context *ctx = si_shader_context(bld_base);
115
116	switch (type) {
117	case TGSI_TYPE_UNSIGNED:
118	case TGSI_TYPE_SIGNED:
119		return ctx->ac.i32;
120	case TGSI_TYPE_UNSIGNED64:
121	case TGSI_TYPE_SIGNED64:
122		return ctx->ac.i64;
123	case TGSI_TYPE_DOUBLE:
124		return ctx->ac.f64;
125	case TGSI_TYPE_UNTYPED:
126	case TGSI_TYPE_FLOAT:
127		return ctx->ac.f32;
128	default: break;
129	}
130	return 0;
131}
132
133LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
134		     enum tgsi_opcode_type type, LLVMValueRef value)
135{
136	struct si_shader_context *ctx = si_shader_context(bld_base);
137	LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
138
139	if (dst_type)
140		return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
141	else
142		return value;
143}
144
145/**
146 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
147 * or an undefined value in the same interval otherwise.
148 */
149LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
150				 LLVMValueRef index,
151				 unsigned num)
152{
153	LLVMBuilderRef builder = ctx->ac.builder;
154	LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
155	LLVMValueRef cc;
156
157	if (util_is_power_of_two_or_zero(num)) {
158		index = LLVMBuildAnd(builder, index, c_max, "");
159	} else {
160		/* In theory, this MAX pattern should result in code that is
161		 * as good as the bit-wise AND above.
162		 *
163		 * In practice, LLVM generates worse code (at the time of
164		 * writing), because its value tracking is not strong enough.
165		 */
166		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
167		index = LLVMBuildSelect(builder, cc, index, c_max, "");
168	}
169
170	return index;
171}
172
173static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
174				 LLVMValueRef value,
175				 unsigned swizzle_x,
176				 unsigned swizzle_y,
177				 unsigned swizzle_z,
178				 unsigned swizzle_w)
179{
180	struct si_shader_context *ctx = si_shader_context(bld_base);
181	LLVMValueRef swizzles[4];
182
183	swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
184	swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
185	swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
186	swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
187
188	return LLVMBuildShuffleVector(ctx->ac.builder,
189				      value,
190				      LLVMGetUndef(LLVMTypeOf(value)),
191				      LLVMConstVector(swizzles, 4), "");
192}
193
194/**
195 * Return the description of the array covering the given temporary register
196 * index.
197 */
198static unsigned
199get_temp_array_id(struct lp_build_tgsi_context *bld_base,
200		  unsigned reg_index,
201		  const struct tgsi_ind_register *reg)
202{
203	struct si_shader_context *ctx = si_shader_context(bld_base);
204	unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
205	unsigned i;
206
207	if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
208		return reg->ArrayID;
209
210	for (i = 0; i < num_arrays; i++) {
211		const struct tgsi_array_info *array = &ctx->temp_arrays[i];
212
213		if (reg_index >= array->range.First && reg_index <= array->range.Last)
214			return i + 1;
215	}
216
217	return 0;
218}
219
220static struct tgsi_declaration_range
221get_array_range(struct lp_build_tgsi_context *bld_base,
222		unsigned File, unsigned reg_index,
223		const struct tgsi_ind_register *reg)
224{
225	struct si_shader_context *ctx = si_shader_context(bld_base);
226	struct tgsi_declaration_range range;
227
228	if (File == TGSI_FILE_TEMPORARY) {
229		unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
230		if (array_id)
231			return ctx->temp_arrays[array_id - 1].range;
232	}
233
234	range.First = 0;
235	range.Last = bld_base->info->file_max[File];
236	return range;
237}
238
239/**
240 * For indirect registers, construct a pointer directly to the requested
241 * element using getelementptr if possible.
242 *
243 * Returns NULL if the insertelement/extractelement fallback for array access
244 * must be used.
245 */
246static LLVMValueRef
247get_pointer_into_array(struct si_shader_context *ctx,
248		       unsigned file,
249		       unsigned swizzle,
250		       unsigned reg_index,
251		       const struct tgsi_ind_register *reg_indirect)
252{
253	unsigned array_id;
254	struct tgsi_array_info *array;
255	LLVMValueRef idxs[2];
256	LLVMValueRef index;
257	LLVMValueRef alloca;
258
259	if (file != TGSI_FILE_TEMPORARY)
260		return NULL;
261
262	array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
263	if (!array_id)
264		return NULL;
265
266	alloca = ctx->temp_array_allocas[array_id - 1];
267	if (!alloca)
268		return NULL;
269
270	array = &ctx->temp_arrays[array_id - 1];
271
272	if (!(array->writemask & (1 << swizzle)))
273		return ctx->undef_alloca;
274
275	index = si_get_indirect_index(ctx, reg_indirect, 1,
276				      reg_index - ctx->temp_arrays[array_id - 1].range.First);
277
278	/* Ensure that the index is within a valid range, to guard against
279	 * VM faults and overwriting critical data (e.g. spilled resource
280	 * descriptors).
281	 *
282	 * TODO It should be possible to avoid the additional instructions
283	 * if LLVM is changed so that it guarantuees:
284	 * 1. the scratch space descriptor isolates the current wave (this
285	 *    could even save the scratch offset SGPR at the cost of an
286	 *    additional SALU instruction)
287	 * 2. the memory for allocas must be allocated at the _end_ of the
288	 *    scratch space (after spilled registers)
289	 */
290	index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
291
292	index = ac_build_imad(&ctx->ac, index,
293			      LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
294			      LLVMConstInt(ctx->i32,
295					   util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0));
296	idxs[0] = ctx->i32_0;
297	idxs[1] = index;
298	return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
299}
300
301LLVMValueRef
302si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
303			 LLVMTypeRef type,
304			 LLVMValueRef ptr,
305			 LLVMValueRef ptr2)
306{
307	struct si_shader_context *ctx = si_shader_context(bld_base);
308	LLVMValueRef values[2] = {
309		ac_to_integer(&ctx->ac, ptr),
310		ac_to_integer(&ctx->ac, ptr2),
311	};
312	LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
313	return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
314}
315
316static LLVMValueRef
317emit_array_fetch(struct lp_build_tgsi_context *bld_base,
318		 unsigned File, enum tgsi_opcode_type type,
319		 struct tgsi_declaration_range range,
320		 unsigned swizzle_in)
321{
322	struct si_shader_context *ctx = si_shader_context(bld_base);
323	unsigned i, size = range.Last - range.First + 1;
324	LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
325	LLVMValueRef result = LLVMGetUndef(vec);
326	unsigned swizzle = swizzle_in;
327	struct tgsi_full_src_register tmp_reg = {};
328	tmp_reg.Register.File = File;
329	if (tgsi_type_is_64bit(type))
330		swizzle |= (swizzle_in + 1) << 16;
331
332	for (i = 0; i < size; ++i) {
333		tmp_reg.Register.Index = i + range.First;
334
335		LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
336		result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
337			LLVMConstInt(ctx->i32, i, 0), "array_vector");
338	}
339	return result;
340}
341
342static LLVMValueRef
343load_value_from_array(struct lp_build_tgsi_context *bld_base,
344		      unsigned file,
345		      enum tgsi_opcode_type type,
346		      unsigned swizzle,
347		      unsigned reg_index,
348		      const struct tgsi_ind_register *reg_indirect)
349{
350	struct si_shader_context *ctx = si_shader_context(bld_base);
351	LLVMBuilderRef builder = ctx->ac.builder;
352	LLVMValueRef ptr;
353
354	ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
355	if (ptr) {
356		LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
357		if (tgsi_type_is_64bit(type)) {
358			LLVMValueRef ptr_hi, val_hi;
359			ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
360			val_hi = LLVMBuildLoad(builder, ptr_hi, "");
361			val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
362						       val, val_hi);
363		}
364
365		return val;
366	} else {
367		struct tgsi_declaration_range range =
368			get_array_range(bld_base, file, reg_index, reg_indirect);
369		LLVMValueRef index =
370			si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
371		LLVMValueRef array =
372			emit_array_fetch(bld_base, file, type, range, swizzle);
373		return LLVMBuildExtractElement(builder, array, index, "");
374	}
375}
376
377static void
378store_value_to_array(struct lp_build_tgsi_context *bld_base,
379		     LLVMValueRef value,
380		     unsigned file,
381		     unsigned chan_index,
382		     unsigned reg_index,
383		     const struct tgsi_ind_register *reg_indirect)
384{
385	struct si_shader_context *ctx = si_shader_context(bld_base);
386	LLVMBuilderRef builder = ctx->ac.builder;
387	LLVMValueRef ptr;
388
389	ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
390	if (ptr) {
391		LLVMBuildStore(builder, value, ptr);
392	} else {
393		unsigned i, size;
394		struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
395		LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
396		LLVMValueRef array =
397			emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
398		LLVMValueRef temp_ptr;
399
400		array = LLVMBuildInsertElement(builder, array, value, index, "");
401
402		size = range.Last - range.First + 1;
403		for (i = 0; i < size; ++i) {
404			switch(file) {
405			case TGSI_FILE_OUTPUT:
406				temp_ptr = ctx->outputs[i + range.First][chan_index];
407				break;
408
409			case TGSI_FILE_TEMPORARY:
410				if (range.First + i >= ctx->temps_count)
411					continue;
412				temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
413				break;
414
415			default:
416				continue;
417			}
418			value = LLVMBuildExtractElement(builder, array,
419				LLVMConstInt(ctx->i32, i, 0), "");
420			LLVMBuildStore(builder, value, temp_ptr);
421		}
422	}
423}
424
425/* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
426 * reload them at each use. This must be true if the shader is using
427 * derivatives and KILL, because KILL can leave the WQM and then a lazy
428 * input load isn't in the WQM anymore.
429 */
430static bool si_preload_fs_inputs(struct si_shader_context *ctx)
431{
432	struct si_shader_selector *sel = ctx->shader->selector;
433
434	return sel->info.uses_derivatives &&
435	       sel->info.uses_kill;
436}
437
438static LLVMValueRef
439get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
440	       unsigned chan)
441{
442	struct si_shader_context *ctx = si_shader_context(bld_base);
443
444	assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
445	return ctx->outputs[index][chan];
446}
447
448LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
449				const struct tgsi_full_src_register *reg,
450				enum tgsi_opcode_type type,
451				unsigned swizzle_in)
452{
453	struct si_shader_context *ctx = si_shader_context(bld_base);
454	LLVMBuilderRef builder = ctx->ac.builder;
455	LLVMValueRef result = NULL, ptr, ptr2;
456	unsigned swizzle = swizzle_in & 0xffff;
457
458	if (swizzle_in == ~0) {
459		LLVMValueRef values[TGSI_NUM_CHANNELS];
460		unsigned chan;
461		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
462			values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
463		}
464		return ac_build_gather_values(&ctx->ac, values,
465					      TGSI_NUM_CHANNELS);
466	}
467
468	if (reg->Register.Indirect) {
469		LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
470				swizzle, reg->Register.Index, &reg->Indirect);
471		return bitcast(bld_base, type, load);
472	}
473
474	switch(reg->Register.File) {
475	case TGSI_FILE_IMMEDIATE: {
476		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
477		if (tgsi_type_is_64bit(type)) {
478			result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
479			result = LLVMConstInsertElement(result,
480							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
481							ctx->i32_0);
482			result = LLVMConstInsertElement(result,
483							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
484							ctx->i32_1);
485			return LLVMConstBitCast(result, ctype);
486		} else {
487			return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
488		}
489	}
490
491	case TGSI_FILE_INPUT: {
492		unsigned index = reg->Register.Index;
493		LLVMValueRef input[4];
494
495		/* I don't think doing this for vertex shaders is beneficial.
496		 * For those, we want to make sure the VMEM loads are executed
497		 * only once. Fragment shaders don't care much, because
498		 * v_interp instructions are much cheaper than VMEM loads.
499		 */
500		if (!si_preload_fs_inputs(ctx) &&
501		    ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
502			ctx->load_input(ctx, index, &ctx->input_decls[index], input);
503		else
504			memcpy(input, &ctx->inputs[index * 4], sizeof(input));
505
506		result = input[swizzle];
507
508		if (tgsi_type_is_64bit(type)) {
509			ptr = result;
510			ptr2 = input[swizzle_in >> 16];
511			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
512							ptr, ptr2);
513		}
514		break;
515	}
516
517	case TGSI_FILE_TEMPORARY:
518		if (reg->Register.Index >= ctx->temps_count)
519			return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
520		ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
521		if (tgsi_type_is_64bit(type)) {
522			ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
523			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
524							LLVMBuildLoad(builder, ptr, ""),
525							LLVMBuildLoad(builder, ptr2, ""));
526		}
527		result = LLVMBuildLoad(builder, ptr, "");
528		break;
529
530	case TGSI_FILE_OUTPUT:
531		ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
532		if (tgsi_type_is_64bit(type)) {
533			ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
534			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
535							LLVMBuildLoad(builder, ptr, ""),
536							LLVMBuildLoad(builder, ptr2, ""));
537		}
538		result = LLVMBuildLoad(builder, ptr, "");
539		break;
540
541	default:
542		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
543	}
544
545	return bitcast(bld_base, type, result);
546}
547
548static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
549				       const struct tgsi_full_src_register *reg,
550				       enum tgsi_opcode_type type,
551				       unsigned swizzle_in)
552{
553	struct si_shader_context *ctx = si_shader_context(bld_base);
554	LLVMBuilderRef builder = ctx->ac.builder;
555	LLVMValueRef cval = ctx->system_values[reg->Register.Index];
556	unsigned swizzle = swizzle_in & 0xffff;
557
558	if (tgsi_type_is_64bit(type)) {
559		LLVMValueRef lo, hi;
560
561		assert(swizzle == 0 || swizzle == 2);
562
563		lo = LLVMBuildExtractElement(
564			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
565		hi = LLVMBuildExtractElement(
566			builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
567
568		return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
569						lo, hi);
570	}
571
572	if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
573		cval = LLVMBuildExtractElement(
574			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
575	} else {
576		assert(swizzle == 0);
577	}
578
579	return bitcast(bld_base, type, cval);
580}
581
582static void emit_declaration(struct lp_build_tgsi_context *bld_base,
583			     const struct tgsi_full_declaration *decl)
584{
585	struct si_shader_context *ctx = si_shader_context(bld_base);
586	LLVMBuilderRef builder = ctx->ac.builder;
587	unsigned first, last, i;
588	switch(decl->Declaration.File) {
589	case TGSI_FILE_ADDRESS:
590	{
591		 unsigned idx;
592		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
593			unsigned chan;
594			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
595				 ctx->addrs[idx][chan] = ac_build_alloca_undef(
596					&ctx->ac, ctx->i32, "");
597			}
598		}
599		break;
600	}
601
602	case TGSI_FILE_TEMPORARY:
603	{
604		char name[18] = "";
605		LLVMValueRef array_alloca = NULL;
606		unsigned decl_size;
607		unsigned writemask = decl->Declaration.UsageMask;
608		first = decl->Range.First;
609		last = decl->Range.Last;
610		decl_size = 4 * ((last - first) + 1);
611
612		if (decl->Declaration.Array) {
613			unsigned id = decl->Array.ArrayID - 1;
614			unsigned array_size;
615
616			writemask &= ctx->temp_arrays[id].writemask;
617			ctx->temp_arrays[id].writemask = writemask;
618			array_size = ((last - first) + 1) * util_bitcount(writemask);
619
620			/* If the array has more than 16 elements, store it
621			 * in memory using an alloca that spans the entire
622			 * array.
623			 *
624			 * Otherwise, store each array element individually.
625			 * We will then generate vectors (per-channel, up to
626			 * <16 x float> if the usagemask is a single bit) for
627			 * indirect addressing.
628			 *
629			 * Note that 16 is the number of vector elements that
630			 * LLVM will store in a register, so theoretically an
631			 * array with up to 4 * 16 = 64 elements could be
632			 * handled this way, but whether that's a good idea
633			 * depends on VGPR register pressure elsewhere.
634			 *
635			 * FIXME: We shouldn't need to have the non-alloca
636			 * code path for arrays. LLVM should be smart enough to
637			 * promote allocas into registers when profitable.
638			 */
639			if (array_size > 16 ||
640			    !ctx->screen->llvm_has_working_vgpr_indexing) {
641				array_alloca = ac_build_alloca_undef(&ctx->ac,
642					LLVMArrayType(ctx->f32,
643						      array_size), "array");
644				ctx->temp_array_allocas[id] = array_alloca;
645			}
646		}
647
648		if (!ctx->temps_count) {
649			ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
650			ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
651		}
652		if (!array_alloca) {
653			for (i = 0; i < decl_size; ++i) {
654#ifdef DEBUG
655				snprintf(name, sizeof(name), "TEMP%d.%c",
656					 first + i / 4, "xyzw"[i % 4]);
657#endif
658				ctx->temps[first * TGSI_NUM_CHANNELS + i] =
659					ac_build_alloca_undef(&ctx->ac,
660							      ctx->f32,
661							      name);
662			}
663		} else {
664			LLVMValueRef idxs[2] = {
665				ctx->i32_0,
666				NULL
667			};
668			unsigned j = 0;
669
670			if (writemask != TGSI_WRITEMASK_XYZW &&
671			    !ctx->undef_alloca) {
672				/* Create a dummy alloca. We use it so that we
673				 * have a pointer that is safe to load from if
674				 * a shader ever reads from a channel that
675				 * it never writes to.
676				 */
677				ctx->undef_alloca = ac_build_alloca_undef(
678					&ctx->ac, ctx->f32, "undef");
679			}
680
681			for (i = 0; i < decl_size; ++i) {
682				LLVMValueRef ptr;
683				if (writemask & (1 << (i % 4))) {
684#ifdef DEBUG
685					snprintf(name, sizeof(name), "TEMP%d.%c",
686						 first + i / 4, "xyzw"[i % 4]);
687#endif
688					idxs[1] = LLVMConstInt(ctx->i32, j, 0);
689					ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
690					j++;
691				} else {
692					ptr = ctx->undef_alloca;
693				}
694				ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
695			}
696		}
697		break;
698	}
699	case TGSI_FILE_INPUT:
700	{
701		unsigned idx;
702		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
703			if (ctx->load_input &&
704			    ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
705				ctx->input_decls[idx] = *decl;
706				ctx->input_decls[idx].Range.First = idx;
707				ctx->input_decls[idx].Range.Last = idx;
708				ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
709
710				if (si_preload_fs_inputs(ctx) ||
711				    bld_base->info->processor != PIPE_SHADER_FRAGMENT)
712					ctx->load_input(ctx, idx, &ctx->input_decls[idx],
713							&ctx->inputs[idx * 4]);
714			}
715		}
716	}
717	break;
718
719	case TGSI_FILE_SYSTEM_VALUE:
720	{
721		unsigned idx;
722		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
723			si_load_system_value(ctx, idx, decl);
724		}
725	}
726	break;
727
728	case TGSI_FILE_OUTPUT:
729	{
730		char name[16] = "";
731		unsigned idx;
732		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
733			unsigned chan;
734			assert(idx < RADEON_LLVM_MAX_OUTPUTS);
735			if (ctx->outputs[idx][0])
736				continue;
737			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
738#ifdef DEBUG
739				snprintf(name, sizeof(name), "OUT%d.%c",
740					 idx, "xyzw"[chan % 4]);
741#endif
742				ctx->outputs[idx][chan] = ac_build_alloca_undef(
743					&ctx->ac, ctx->f32, name);
744			}
745		}
746		break;
747	}
748
749	case TGSI_FILE_MEMORY:
750		si_tgsi_declare_compute_memory(ctx, decl);
751		break;
752
753	default:
754		break;
755	}
756}
757
758void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
759			const struct tgsi_full_instruction *inst,
760			const struct tgsi_opcode_info *info,
761			unsigned index,
762			LLVMValueRef dst[4])
763{
764	struct si_shader_context *ctx = si_shader_context(bld_base);
765	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
766	LLVMBuilderRef builder = ctx->ac.builder;
767	LLVMValueRef temp_ptr, temp_ptr2 = NULL;
768	bool is_vec_store = false;
769	enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
770
771	if (dst[0]) {
772		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
773		is_vec_store = (k == LLVMVectorTypeKind);
774	}
775
776	if (is_vec_store) {
777		LLVMValueRef values[4] = {};
778		uint32_t writemask = reg->Register.WriteMask;
779		while (writemask) {
780			unsigned chan = u_bit_scan(&writemask);
781			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
782			values[chan]  = LLVMBuildExtractElement(ctx->ac.builder,
783							dst[0], index, "");
784		}
785		bld_base->emit_store(bld_base, inst, info, index, values);
786		return;
787	}
788
789	uint32_t writemask = reg->Register.WriteMask;
790	while (writemask) {
791		unsigned chan_index = u_bit_scan(&writemask);
792		LLVMValueRef value = dst[chan_index];
793
794		if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
795			continue;
796		if (inst->Instruction.Saturate)
797			value = ac_build_clamp(&ctx->ac, value);
798
799		if (reg->Register.File == TGSI_FILE_ADDRESS) {
800			temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
801			LLVMBuildStore(builder, value, temp_ptr);
802			continue;
803		}
804
805		if (!tgsi_type_is_64bit(dtype))
806			value = ac_to_float(&ctx->ac, value);
807
808		if (reg->Register.Indirect) {
809			unsigned file = reg->Register.File;
810			unsigned reg_index = reg->Register.Index;
811			store_value_to_array(bld_base, value, file, chan_index,
812					     reg_index, &reg->Indirect);
813		} else {
814			switch(reg->Register.File) {
815			case TGSI_FILE_OUTPUT:
816				temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
817				if (tgsi_type_is_64bit(dtype))
818					temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
819				break;
820
821			case TGSI_FILE_TEMPORARY:
822			{
823				if (reg->Register.Index >= ctx->temps_count)
824					continue;
825
826				temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
827				if (tgsi_type_is_64bit(dtype))
828					temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
829
830				break;
831			}
832			default:
833				return;
834			}
835			if (!tgsi_type_is_64bit(dtype))
836				LLVMBuildStore(builder, value, temp_ptr);
837			else {
838				LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
839								    LLVMVectorType(ctx->i32, 2), "");
840				LLVMValueRef val2;
841				value = LLVMBuildExtractElement(builder, ptr,
842								ctx->i32_0, "");
843				val2 = LLVMBuildExtractElement(builder, ptr,
844							       ctx->i32_1, "");
845
846				LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
847				LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
848			}
849		}
850	}
851}
852
853static int get_line(int pc)
854{
855	/* Subtract 1 so that the number shown is that of the corresponding
856	 * opcode in the TGSI dump, e.g. an if block has the same suffix as
857	 * the instruction number of the corresponding TGSI IF.
858	 */
859	return pc - 1;
860}
861
862static void bgnloop_emit(const struct lp_build_tgsi_action *action,
863			 struct lp_build_tgsi_context *bld_base,
864			 struct lp_build_emit_data *emit_data)
865{
866	struct si_shader_context *ctx = si_shader_context(bld_base);
867	ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
868}
869
870static void brk_emit(const struct lp_build_tgsi_action *action,
871		     struct lp_build_tgsi_context *bld_base,
872		     struct lp_build_emit_data *emit_data)
873{
874	struct si_shader_context *ctx = si_shader_context(bld_base);
875	ac_build_break(&ctx->ac);
876}
877
878static void cont_emit(const struct lp_build_tgsi_action *action,
879		      struct lp_build_tgsi_context *bld_base,
880		      struct lp_build_emit_data *emit_data)
881{
882	struct si_shader_context *ctx = si_shader_context(bld_base);
883	ac_build_continue(&ctx->ac);
884}
885
886static void else_emit(const struct lp_build_tgsi_action *action,
887		      struct lp_build_tgsi_context *bld_base,
888		      struct lp_build_emit_data *emit_data)
889{
890	struct si_shader_context *ctx = si_shader_context(bld_base);
891	ac_build_else(&ctx->ac, get_line(bld_base->pc));
892}
893
894static void endif_emit(const struct lp_build_tgsi_action *action,
895		       struct lp_build_tgsi_context *bld_base,
896		       struct lp_build_emit_data *emit_data)
897{
898	struct si_shader_context *ctx = si_shader_context(bld_base);
899	ac_build_endif(&ctx->ac, get_line(bld_base->pc));
900}
901
902static void endloop_emit(const struct lp_build_tgsi_action *action,
903			 struct lp_build_tgsi_context *bld_base,
904			 struct lp_build_emit_data *emit_data)
905{
906	struct si_shader_context *ctx = si_shader_context(bld_base);
907	ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
908}
909
910static void if_emit(const struct lp_build_tgsi_action *action,
911		    struct lp_build_tgsi_context *bld_base,
912		    struct lp_build_emit_data *emit_data)
913{
914	struct si_shader_context *ctx = si_shader_context(bld_base);
915	ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
916}
917
918static void uif_emit(const struct lp_build_tgsi_action *action,
919		     struct lp_build_tgsi_context *bld_base,
920		     struct lp_build_emit_data *emit_data)
921{
922	struct si_shader_context *ctx = si_shader_context(bld_base);
923	ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
924}
925
926static void emit_immediate(struct lp_build_tgsi_context *bld_base,
927			   const struct tgsi_full_immediate *imm)
928{
929	unsigned i;
930	struct si_shader_context *ctx = si_shader_context(bld_base);
931
932	for (i = 0; i < 4; ++i) {
933		ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
934				LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
935	}
936
937	ctx->imms_num++;
938}
939
940void si_llvm_context_init(struct si_shader_context *ctx,
941			  struct si_screen *sscreen,
942			  struct ac_llvm_compiler *compiler)
943{
944	struct lp_type type;
945
946	/* Initialize the gallivm object:
947	 * We are only using the module, context, and builder fields of this struct.
948	 * This should be enough for us to be able to pass our gallivm struct to the
949	 * helper functions in the gallivm module.
950	 */
951	memset(ctx, 0, sizeof(*ctx));
952	ctx->screen = sscreen;
953	ctx->compiler = compiler;
954
955	ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family);
956	ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
957
958	enum ac_float_mode float_mode =
959		sscreen->debug_flags & DBG(UNSAFE_MATH) ?
960			AC_FLOAT_MODE_UNSAFE_FP_MATH :
961			AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
962	ctx->ac.builder = ac_create_builder(ctx->ac.context, float_mode);
963
964	ctx->gallivm.context = ctx->ac.context;
965	ctx->gallivm.module = ctx->ac.module;
966	ctx->gallivm.builder = ctx->ac.builder;
967
968	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
969
970	type.floating = true;
971	type.fixed = false;
972	type.sign = true;
973	type.norm = false;
974	type.width = 32;
975	type.length = 1;
976
977	lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
978	lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
979	lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
980	type.width *= 2;
981	lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
982	lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
983	lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
984
985	bld_base->soa = 1;
986	bld_base->emit_swizzle = emit_swizzle;
987	bld_base->emit_declaration = emit_declaration;
988	bld_base->emit_immediate = emit_immediate;
989
990	bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
991	bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
992	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
993	bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
994	bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
995	bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
996	bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
997	bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
998
999	si_shader_context_init_alu(&ctx->bld_base);
1000	si_shader_context_init_mem(ctx);
1001
1002	ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1003	ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1004	ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1005	ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1006	ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1007	ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1008	ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1009	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1010	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1011	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1012	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1013
1014	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1015	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1016	ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
1017	ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
1018}
1019
1020/* Set the context to a certain TGSI shader. Can be called repeatedly
1021 * to change the shader. */
1022void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1023			      struct si_shader *shader)
1024{
1025	const struct tgsi_shader_info *info = NULL;
1026	const struct tgsi_token *tokens = NULL;
1027
1028	if (shader && shader->selector) {
1029		info = &shader->selector->info;
1030		tokens = shader->selector->tokens;
1031	}
1032
1033	ctx->shader = shader;
1034	ctx->type = info ? info->processor : -1;
1035	ctx->bld_base.info = info;
1036
1037	/* Clean up the old contents. */
1038	FREE(ctx->temp_arrays);
1039	ctx->temp_arrays = NULL;
1040	FREE(ctx->temp_array_allocas);
1041	ctx->temp_array_allocas = NULL;
1042
1043	FREE(ctx->imms);
1044	ctx->imms = NULL;
1045	ctx->imms_num = 0;
1046
1047	FREE(ctx->temps);
1048	ctx->temps = NULL;
1049	ctx->temps_count = 0;
1050
1051	if (!info)
1052		return;
1053
1054	ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1055	ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1056
1057	ctx->num_samplers = util_last_bit(info->samplers_declared);
1058	ctx->num_images = util_last_bit(info->images_declared);
1059
1060	if (!tokens)
1061		return;
1062
1063	if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1064		int size = info->array_max[TGSI_FILE_TEMPORARY];
1065
1066		ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1067		ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1068
1069		tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1070				 ctx->temp_arrays);
1071	}
1072	if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1073		int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1074		ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1075	}
1076
1077	/* Re-set these to start with a clean slate. */
1078	ctx->bld_base.num_instructions = 0;
1079	ctx->bld_base.pc = 0;
1080	memset(ctx->outputs, 0, sizeof(ctx->outputs));
1081
1082	ctx->bld_base.emit_store = si_llvm_emit_store;
1083	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1084	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1085	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1086	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1087	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1088}
1089
1090void si_llvm_create_func(struct si_shader_context *ctx,
1091			 const char *name,
1092			 LLVMTypeRef *return_types, unsigned num_return_elems,
1093			 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1094{
1095	LLVMTypeRef main_fn_type, ret_type;
1096	LLVMBasicBlockRef main_fn_body;
1097	enum si_llvm_calling_convention call_conv;
1098	unsigned real_shader_type;
1099
1100	if (num_return_elems)
1101		ret_type = LLVMStructTypeInContext(ctx->ac.context,
1102						   return_types,
1103						   num_return_elems, true);
1104	else
1105		ret_type = ctx->voidt;
1106
1107	/* Setup the function */
1108	ctx->return_type = ret_type;
1109	main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1110	ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1111	main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1112			ctx->main_fn, "main_body");
1113	LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1114
1115	real_shader_type = ctx->type;
1116
1117	/* LS is merged into HS (TCS), and ES is merged into GS. */
1118	if (ctx->screen->info.chip_class >= GFX9) {
1119		if (ctx->shader->key.as_ls)
1120			real_shader_type = PIPE_SHADER_TESS_CTRL;
1121		else if (ctx->shader->key.as_es)
1122			real_shader_type = PIPE_SHADER_GEOMETRY;
1123	}
1124
1125	switch (real_shader_type) {
1126	case PIPE_SHADER_VERTEX:
1127	case PIPE_SHADER_TESS_EVAL:
1128		call_conv = RADEON_LLVM_AMDGPU_VS;
1129		break;
1130	case PIPE_SHADER_TESS_CTRL:
1131		call_conv = RADEON_LLVM_AMDGPU_HS;
1132		break;
1133	case PIPE_SHADER_GEOMETRY:
1134		call_conv = RADEON_LLVM_AMDGPU_GS;
1135		break;
1136	case PIPE_SHADER_FRAGMENT:
1137		call_conv = RADEON_LLVM_AMDGPU_PS;
1138		break;
1139	case PIPE_SHADER_COMPUTE:
1140		call_conv = RADEON_LLVM_AMDGPU_CS;
1141		break;
1142	default:
1143		unreachable("Unhandle shader type");
1144	}
1145
1146	LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1147}
1148
1149void si_llvm_optimize_module(struct si_shader_context *ctx)
1150{
1151	/* Dump LLVM IR before any optimization passes */
1152	if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1153	    si_can_dump_shader(ctx->screen, ctx->type))
1154		LLVMDumpModule(ctx->gallivm.module);
1155
1156	/* Run the pass */
1157	LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
1158	LLVMDisposeBuilder(ctx->ac.builder);
1159}
1160
1161void si_llvm_dispose(struct si_shader_context *ctx)
1162{
1163	LLVMDisposeModule(ctx->gallivm.module);
1164	LLVMContextDispose(ctx->gallivm.context);
1165	FREE(ctx->temp_arrays);
1166	ctx->temp_arrays = NULL;
1167	FREE(ctx->temp_array_allocas);
1168	ctx->temp_array_allocas = NULL;
1169	FREE(ctx->temps);
1170	ctx->temps = NULL;
1171	ctx->temps_count = 0;
1172	FREE(ctx->imms);
1173	ctx->imms = NULL;
1174	ctx->imms_num = 0;
1175	ac_llvm_context_dispose(&ctx->ac);
1176}
1177