1/*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "ir3.h"
25
26#include <stdlib.h>
27#include <stdio.h>
28#include <string.h>
29#include <assert.h>
30#include <stdbool.h>
31#include <errno.h>
32
33#include "util/bitscan.h"
34#include "util/ralloc.h"
35#include "util/u_math.h"
36
37#include "instr-a3xx.h"
38#include "ir3_compiler.h"
39
40/* simple allocator to carve allocations out of an up-front allocated heap,
41 * so that we can free everything easily in one shot.
42 */
43void * ir3_alloc(struct ir3 *shader, int sz)
44{
45	return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
46}
47
48struct ir3 * ir3_create(struct ir3_compiler *compiler,
49		gl_shader_stage type, unsigned nin, unsigned nout)
50{
51	struct ir3 *shader = rzalloc(NULL, struct ir3);
52
53	shader->compiler = compiler;
54	shader->type = type;
55	shader->ninputs = nin;
56	shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
57
58	shader->noutputs = nout;
59	shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
60
61	list_inithead(&shader->block_list);
62	list_inithead(&shader->array_list);
63
64	return shader;
65}
66
67void ir3_destroy(struct ir3 *shader)
68{
69	ralloc_free(shader);
70}
71
72#define iassert(cond) do { \
73	if (!(cond)) { \
74		debug_assert(cond); \
75		return -1; \
76	} } while (0)
77
78#define iassert_type(reg, full) do { \
79	if ((full)) { \
80		iassert(!((reg)->flags & IR3_REG_HALF)); \
81	} else { \
82		iassert((reg)->flags & IR3_REG_HALF); \
83	} } while (0);
84
85static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
86		uint32_t repeat, uint32_t valid_flags)
87{
88	reg_t val = { .dummy32 = 0 };
89
90	if (reg->flags & ~valid_flags) {
91		debug_printf("INVALID FLAGS: %x vs %x\n",
92				reg->flags, valid_flags);
93	}
94
95	if (!(reg->flags & IR3_REG_R))
96		repeat = 0;
97
98	if (reg->flags & IR3_REG_IMMED) {
99		val.iim_val = reg->iim_val;
100	} else {
101		unsigned components;
102		int16_t max;
103
104		if (reg->flags & IR3_REG_RELATIV) {
105			components = reg->size;
106			val.idummy10 = reg->array.offset;
107			max = (reg->array.offset + repeat + components - 1);
108		} else {
109			components = util_last_bit(reg->wrmask);
110			val.comp = reg->num & 0x3;
111			val.num  = reg->num >> 2;
112			max = (reg->num + repeat + components - 1);
113		}
114
115		if (reg->flags & IR3_REG_CONST) {
116			info->max_const = MAX2(info->max_const, max >> 2);
117		} else if (val.num == 63) {
118			/* ignore writes to dummy register r63.x */
119		} else if (max < regid(48, 0)) {
120			if (reg->flags & IR3_REG_HALF) {
121				if (info->gpu_id >= 600) {
122					/* starting w/ a6xx, half regs conflict with full regs: */
123					info->max_reg = MAX2(info->max_reg, max >> 3);
124				} else {
125					info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
126				}
127			} else {
128				info->max_reg = MAX2(info->max_reg, max >> 2);
129			}
130		}
131	}
132
133	return val.dummy32;
134}
135
136static int emit_cat0(struct ir3_instruction *instr, void *ptr,
137		struct ir3_info *info)
138{
139	instr_cat0_t *cat0 = ptr;
140
141	if (info->gpu_id >= 500) {
142		cat0->a5xx.immed = instr->cat0.immed;
143	} else if (info->gpu_id >= 400) {
144		cat0->a4xx.immed = instr->cat0.immed;
145	} else {
146		cat0->a3xx.immed = instr->cat0.immed;
147	}
148	cat0->repeat   = instr->repeat;
149	cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
150	cat0->inv      = instr->cat0.inv;
151	cat0->comp     = instr->cat0.comp;
152	cat0->opc      = instr->opc;
153	cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
154	cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
155	cat0->opc_cat  = 0;
156
157	return 0;
158}
159
160static int emit_cat1(struct ir3_instruction *instr, void *ptr,
161		struct ir3_info *info)
162{
163	struct ir3_register *dst = instr->regs[0];
164	struct ir3_register *src = instr->regs[1];
165	instr_cat1_t *cat1 = ptr;
166
167	iassert(instr->regs_count == 2);
168	iassert_type(dst, type_size(instr->cat1.dst_type) == 32);
169	if (!(src->flags & IR3_REG_IMMED))
170		iassert_type(src, type_size(instr->cat1.src_type) == 32);
171
172	if (src->flags & IR3_REG_IMMED) {
173		cat1->iim_val = src->iim_val;
174		cat1->src_im  = 1;
175	} else if (src->flags & IR3_REG_RELATIV) {
176		cat1->off       = reg(src, info, instr->repeat,
177				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
178		cat1->src_rel   = 1;
179		cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
180	} else {
181		cat1->src  = reg(src, info, instr->repeat,
182				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
183		cat1->src_c     = !!(src->flags & IR3_REG_CONST);
184	}
185
186	cat1->dst      = reg(dst, info, instr->repeat,
187			IR3_REG_RELATIV | IR3_REG_EVEN |
188			IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
189	cat1->repeat   = instr->repeat;
190	cat1->src_r    = !!(src->flags & IR3_REG_R);
191	cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
192	cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
193	cat1->dst_type = instr->cat1.dst_type;
194	cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
195	cat1->src_type = instr->cat1.src_type;
196	cat1->even     = !!(dst->flags & IR3_REG_EVEN);
197	cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
198	cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
199	cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
200	cat1->opc_cat  = 1;
201
202	return 0;
203}
204
205static int emit_cat2(struct ir3_instruction *instr, void *ptr,
206		struct ir3_info *info)
207{
208	struct ir3_register *dst = instr->regs[0];
209	struct ir3_register *src1 = instr->regs[1];
210	struct ir3_register *src2 = instr->regs[2];
211	instr_cat2_t *cat2 = ptr;
212	unsigned absneg = ir3_cat2_absneg(instr->opc);
213
214	iassert((instr->regs_count == 2) || (instr->regs_count == 3));
215
216	if (instr->nop) {
217		iassert(!instr->repeat);
218		iassert(instr->nop <= 3);
219
220		cat2->src1_r = instr->nop & 0x1;
221		cat2->src2_r = (instr->nop >> 1) & 0x1;
222	} else {
223		cat2->src1_r = !!(src1->flags & IR3_REG_R);
224		if (src2)
225			cat2->src2_r = !!(src2->flags & IR3_REG_R);
226	}
227
228	if (src1->flags & IR3_REG_RELATIV) {
229		iassert(src1->array.offset < (1 << 10));
230		cat2->rel1.src1      = reg(src1, info, instr->repeat,
231				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
232				IR3_REG_HALF | absneg);
233		cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
234		cat2->rel1.src1_rel  = 1;
235	} else if (src1->flags & IR3_REG_CONST) {
236		iassert(src1->num < (1 << 12));
237		cat2->c1.src1   = reg(src1, info, instr->repeat,
238				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
239		cat2->c1.src1_c = 1;
240	} else {
241		iassert(src1->num < (1 << 11));
242		cat2->src1 = reg(src1, info, instr->repeat,
243				IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
244				absneg);
245	}
246	cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
247	cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
248	cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
249
250	if (src2) {
251		iassert((src2->flags & IR3_REG_IMMED) ||
252				!((src1->flags ^ src2->flags) & IR3_REG_HALF));
253
254		if (src2->flags & IR3_REG_RELATIV) {
255			iassert(src2->array.offset < (1 << 10));
256			cat2->rel2.src2      = reg(src2, info, instr->repeat,
257					IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
258					IR3_REG_HALF | absneg);
259			cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
260			cat2->rel2.src2_rel  = 1;
261		} else if (src2->flags & IR3_REG_CONST) {
262			iassert(src2->num < (1 << 12));
263			cat2->c2.src2   = reg(src2, info, instr->repeat,
264					IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
265			cat2->c2.src2_c = 1;
266		} else {
267			iassert(src2->num < (1 << 11));
268			cat2->src2 = reg(src2, info, instr->repeat,
269					IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
270					absneg);
271		}
272
273		cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
274		cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
275		cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
276	}
277
278	cat2->dst      = reg(dst, info, instr->repeat,
279			IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
280	cat2->repeat   = instr->repeat;
281	cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
282	cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
283	cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
284	cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
285	cat2->ei       = !!(dst->flags & IR3_REG_EI);
286	cat2->cond     = instr->cat2.condition;
287	cat2->full     = ! (src1->flags & IR3_REG_HALF);
288	cat2->opc      = instr->opc;
289	cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
290	cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
291	cat2->opc_cat  = 2;
292
293	return 0;
294}
295
296static int emit_cat3(struct ir3_instruction *instr, void *ptr,
297		struct ir3_info *info)
298{
299	struct ir3_register *dst = instr->regs[0];
300	struct ir3_register *src1 = instr->regs[1];
301	struct ir3_register *src2 = instr->regs[2];
302	struct ir3_register *src3 = instr->regs[3];
303	unsigned absneg = ir3_cat3_absneg(instr->opc);
304	instr_cat3_t *cat3 = ptr;
305	uint32_t src_flags = 0;
306
307	switch (instr->opc) {
308	case OPC_MAD_F16:
309	case OPC_MAD_U16:
310	case OPC_MAD_S16:
311	case OPC_SEL_B16:
312	case OPC_SEL_S16:
313	case OPC_SEL_F16:
314	case OPC_SAD_S16:
315	case OPC_SAD_S32:  // really??
316		src_flags |= IR3_REG_HALF;
317		break;
318	default:
319		break;
320	}
321
322	iassert(instr->regs_count == 4);
323	iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
324	iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
325	iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
326
327	if (instr->nop) {
328		iassert(!instr->repeat);
329		iassert(instr->nop <= 3);
330
331		cat3->src1_r = instr->nop & 0x1;
332		cat3->src2_r = (instr->nop >> 1) & 0x1;
333	} else {
334		cat3->src1_r = !!(src1->flags & IR3_REG_R);
335		cat3->src2_r = !!(src2->flags & IR3_REG_R);
336	}
337
338	if (src1->flags & IR3_REG_RELATIV) {
339		iassert(src1->array.offset < (1 << 10));
340		cat3->rel1.src1      = reg(src1, info, instr->repeat,
341				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
342				IR3_REG_HALF | absneg);
343		cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
344		cat3->rel1.src1_rel  = 1;
345	} else if (src1->flags & IR3_REG_CONST) {
346		iassert(src1->num < (1 << 12));
347		cat3->c1.src1   = reg(src1, info, instr->repeat,
348				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
349		cat3->c1.src1_c = 1;
350	} else {
351		iassert(src1->num < (1 << 11));
352		cat3->src1 = reg(src1, info, instr->repeat,
353				IR3_REG_R | IR3_REG_HALF | absneg);
354	}
355
356	cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
357
358	cat3->src2     = reg(src2, info, instr->repeat,
359			IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
360	cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
361	cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
362
363	if (src3->flags & IR3_REG_RELATIV) {
364		iassert(src3->array.offset < (1 << 10));
365		cat3->rel2.src3      = reg(src3, info, instr->repeat,
366				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
367				IR3_REG_HALF | absneg);
368		cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
369		cat3->rel2.src3_rel  = 1;
370	} else if (src3->flags & IR3_REG_CONST) {
371		iassert(src3->num < (1 << 12));
372		cat3->c2.src3   = reg(src3, info, instr->repeat,
373				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
374		cat3->c2.src3_c = 1;
375	} else {
376		iassert(src3->num < (1 << 11));
377		cat3->src3 = reg(src3, info, instr->repeat,
378				IR3_REG_R | IR3_REG_HALF | absneg);
379	}
380
381	cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
382	cat3->src3_r   = !!(src3->flags & IR3_REG_R);
383
384	cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
385	cat3->repeat   = instr->repeat;
386	cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
387	cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
388	cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
389	cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
390	cat3->opc      = instr->opc;
391	cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
392	cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
393	cat3->opc_cat  = 3;
394
395	return 0;
396}
397
398static int emit_cat4(struct ir3_instruction *instr, void *ptr,
399		struct ir3_info *info)
400{
401	struct ir3_register *dst = instr->regs[0];
402	struct ir3_register *src = instr->regs[1];
403	instr_cat4_t *cat4 = ptr;
404
405	iassert(instr->regs_count == 2);
406
407	if (src->flags & IR3_REG_RELATIV) {
408		iassert(src->array.offset < (1 << 10));
409		cat4->rel.src      = reg(src, info, instr->repeat,
410				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
411				IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
412		cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
413		cat4->rel.src_rel  = 1;
414	} else if (src->flags & IR3_REG_CONST) {
415		iassert(src->num < (1 << 12));
416		cat4->c.src   = reg(src, info, instr->repeat,
417				IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
418				IR3_REG_R | IR3_REG_HALF);
419		cat4->c.src_c = 1;
420	} else {
421		iassert(src->num < (1 << 11));
422		cat4->src = reg(src, info, instr->repeat,
423				IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
424				IR3_REG_R | IR3_REG_HALF);
425	}
426
427	cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
428	cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
429	cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
430	cat4->src_r    = !!(src->flags & IR3_REG_R);
431
432	cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
433	cat4->repeat   = instr->repeat;
434	cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
435	cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
436	cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
437	cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
438	cat4->full     = ! (src->flags & IR3_REG_HALF);
439	cat4->opc      = instr->opc;
440	cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
441	cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
442	cat4->opc_cat  = 4;
443
444	return 0;
445}
446
447static int emit_cat5(struct ir3_instruction *instr, void *ptr,
448		struct ir3_info *info)
449{
450	struct ir3_register *dst = instr->regs[0];
451	/* To simplify things when there could be zero, one, or two args other
452	 * than tex/sampler idx, we use the first src reg in the ir to hold
453	 * samp_tex hvec2:
454	 */
455	struct ir3_register *src1;
456	struct ir3_register *src2;
457	instr_cat5_t *cat5 = ptr;
458
459	iassert((instr->regs_count == 2) ||
460			(instr->regs_count == 3) || (instr->regs_count == 4));
461
462	switch (instr->opc) {
463	case OPC_DSX:
464	case OPC_DSXPP_1:
465	case OPC_DSY:
466	case OPC_DSYPP_1:
467	case OPC_RGETPOS:
468	case OPC_RGETINFO:
469		iassert((instr->flags & IR3_INSTR_S2EN) == 0);
470		src1 = instr->regs[1];
471		src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
472		break;
473	default:
474		src1 = instr->regs[2];
475		src2 = instr->regs_count > 3 ? instr->regs[3] : NULL;
476		break;
477	}
478
479	assume(src1 || !src2);
480
481	if (src1) {
482		cat5->full = ! (src1->flags & IR3_REG_HALF);
483		cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
484	}
485
486	if (instr->flags & IR3_INSTR_S2EN) {
487		struct ir3_register *samp_tex = instr->regs[1];
488		if (src2) {
489			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
490			cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
491		}
492		iassert(samp_tex->flags & IR3_REG_HALF);
493		cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
494		iassert(!(instr->cat5.samp | instr->cat5.tex));
495	} else {
496		if (src2) {
497			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
498			cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
499		}
500		cat5->norm.samp = instr->cat5.samp;
501		cat5->norm.tex  = instr->cat5.tex;
502	}
503
504	cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
505	cat5->wrmask   = dst->wrmask;
506	cat5->type     = instr->cat5.type;
507	cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
508	cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
509	cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
510	cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
511	cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
512	cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
513	cat5->opc      = instr->opc;
514	cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
515	cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
516	cat5->opc_cat  = 5;
517
518	return 0;
519}
520
521static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
522		struct ir3_info *info)
523{
524	struct ir3_register *src1, *src2;
525	instr_cat6_a6xx_t *cat6 = ptr;
526	bool has_dest = (instr->opc == OPC_LDIB);
527
528	/* first reg should be SSBO binding point: */
529	iassert(instr->regs[1]->flags & IR3_REG_IMMED);
530
531	src1 = instr->regs[2];
532
533	if (has_dest) {
534		/* the src2 field in the instruction is actually the destination
535		 * register for load instructions:
536		 */
537		src2 = instr->regs[0];
538	} else {
539		src2 = instr->regs[3];
540	}
541
542	cat6->type      = instr->cat6.type;
543	cat6->d         = instr->cat6.d - 1;
544	cat6->typed     = instr->cat6.typed;
545	cat6->type_size = instr->cat6.iim_val - 1;
546	cat6->opc       = instr->opc;
547	cat6->jmp_tgt   = !!(instr->flags & IR3_INSTR_JP);
548	cat6->sync      = !!(instr->flags & IR3_INSTR_SY);
549	cat6->opc_cat   = 6;
550
551	cat6->src1 = reg(src1, info, instr->repeat, 0);
552	cat6->src2 = reg(src2, info, instr->repeat, 0);
553	cat6->ssbo = instr->regs[1]->iim_val;
554
555	switch (instr->opc) {
556	case OPC_ATOMIC_ADD:
557	case OPC_ATOMIC_SUB:
558	case OPC_ATOMIC_XCHG:
559	case OPC_ATOMIC_INC:
560	case OPC_ATOMIC_DEC:
561	case OPC_ATOMIC_CMPXCHG:
562	case OPC_ATOMIC_MIN:
563	case OPC_ATOMIC_MAX:
564	case OPC_ATOMIC_AND:
565	case OPC_ATOMIC_OR:
566	case OPC_ATOMIC_XOR:
567		cat6->pad1 = 0x1;
568		cat6->pad2 = 0xc;
569		cat6->pad3 = 0x0;
570		cat6->pad4 = 0x3;
571		break;
572	case OPC_STIB:
573		cat6->pad1 = 0x0;
574		cat6->pad2 = 0xc;
575		cat6->pad3 = 0x0;
576		cat6->pad4 = 0x2;
577		break;
578	case OPC_LDIB:
579		cat6->pad1 = 0x1;
580		cat6->pad2 = 0xc;
581		cat6->pad3 = 0x0;
582		cat6->pad4 = 0x2;
583		break;
584	case OPC_LDC:
585		cat6->pad1 = 0x0;
586		cat6->pad2 = 0x8;
587		cat6->pad3 = 0x0;
588		cat6->pad4 = 0x2;
589		break;
590	default:
591		iassert(0);
592	}
593
594	return 0;
595}
596
597static int emit_cat6(struct ir3_instruction *instr, void *ptr,
598		struct ir3_info *info)
599{
600	struct ir3_register *dst, *src1, *src2;
601	instr_cat6_t *cat6 = ptr;
602
603	/* In a6xx we start using a new instruction encoding for some of
604	 * these instructions:
605	 */
606	if (info->gpu_id >= 600) {
607		switch (instr->opc) {
608		case OPC_ATOMIC_ADD:
609		case OPC_ATOMIC_SUB:
610		case OPC_ATOMIC_XCHG:
611		case OPC_ATOMIC_INC:
612		case OPC_ATOMIC_DEC:
613		case OPC_ATOMIC_CMPXCHG:
614		case OPC_ATOMIC_MIN:
615		case OPC_ATOMIC_MAX:
616		case OPC_ATOMIC_AND:
617		case OPC_ATOMIC_OR:
618		case OPC_ATOMIC_XOR:
619			/* The shared variants of these still use the old encoding: */
620			if (!(instr->flags & IR3_INSTR_G))
621				break;
622			/* fallthrough */
623		case OPC_STIB:
624		case OPC_LDIB:
625		case OPC_LDC:
626			return emit_cat6_a6xx(instr, ptr, info);
627		default:
628			break;
629		}
630	}
631
632	bool type_full = type_size(instr->cat6.type) == 32;
633
634	cat6->type     = instr->cat6.type;
635	cat6->opc      = instr->opc;
636	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
637	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
638	cat6->g        = !!(instr->flags & IR3_INSTR_G);
639	cat6->opc_cat  = 6;
640
641	switch (instr->opc) {
642	case OPC_RESINFO:
643	case OPC_RESFMT:
644		iassert_type(instr->regs[0], type_full); /* dst */
645		iassert_type(instr->regs[1], type_full); /* src1 */
646		break;
647	case OPC_L2G:
648	case OPC_G2L:
649		iassert_type(instr->regs[0], true);      /* dst */
650		iassert_type(instr->regs[1], true);      /* src1 */
651		break;
652	case OPC_STG:
653	case OPC_STL:
654	case OPC_STP:
655	case OPC_STLW:
656	case OPC_STIB:
657		/* no dst, so regs[0] is dummy */
658		iassert_type(instr->regs[1], true);      /* dst */
659		iassert_type(instr->regs[2], type_full); /* src1 */
660		iassert_type(instr->regs[3], true);      /* src2 */
661		break;
662	default:
663		iassert_type(instr->regs[0], type_full); /* dst */
664		iassert_type(instr->regs[1], true);      /* src1 */
665		if (instr->regs_count > 2)
666			iassert_type(instr->regs[2], true);  /* src1 */
667		break;
668	}
669
670	/* the "dst" for a store instruction is (from the perspective
671	 * of data flow in the shader, ie. register use/def, etc) in
672	 * fact a register that is read by the instruction, rather
673	 * than written:
674	 */
675	if (is_store(instr)) {
676		iassert(instr->regs_count >= 3);
677
678		dst  = instr->regs[1];
679		src1 = instr->regs[2];
680		src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
681	} else {
682		iassert(instr->regs_count >= 2);
683
684		dst  = instr->regs[0];
685		src1 = instr->regs[1];
686		src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
687	}
688
689	/* TODO we need a more comprehensive list about which instructions
690	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
691	 * indicate to use the src_off encoding even if offset is zero
692	 * (but then what to do about dst_off?)
693	 */
694	if (is_atomic(instr->opc)) {
695		instr_cat6ldgb_t *ldgb = ptr;
696
697		/* maybe these two bits both determine the instruction encoding? */
698		cat6->src_off = false;
699
700		ldgb->d = instr->cat6.d - 1;
701		ldgb->typed = instr->cat6.typed;
702		ldgb->type_size = instr->cat6.iim_val - 1;
703
704		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
705
706		if (ldgb->g) {
707			struct ir3_register *src3 = instr->regs[3];
708			struct ir3_register *src4 = instr->regs[4];
709
710			/* first src is src_ssbo: */
711			iassert(src1->flags & IR3_REG_IMMED);
712			ldgb->src_ssbo = src1->uim_val;
713
714			ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
715			ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
716			ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
717			ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
718
719			ldgb->src3 = reg(src4, info, instr->repeat, 0);
720			ldgb->pad0 = 0x1;
721			ldgb->pad3 = 0x1;
722		} else {
723			ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
724			ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
725			ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
726			ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
727			ldgb->pad0 = 0x1;
728			ldgb->pad3 = 0x0;
729		}
730
731		return 0;
732	} else if (instr->opc == OPC_LDGB) {
733		struct ir3_register *src3 = instr->regs[3];
734		instr_cat6ldgb_t *ldgb = ptr;
735
736		/* maybe these two bits both determine the instruction encoding? */
737		cat6->src_off = false;
738
739		ldgb->d = instr->cat6.d - 1;
740		ldgb->typed = instr->cat6.typed;
741		ldgb->type_size = instr->cat6.iim_val - 1;
742
743		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
744
745		/* first src is src_ssbo: */
746		iassert(src1->flags & IR3_REG_IMMED);
747		ldgb->src_ssbo = src1->uim_val;
748
749		/* then next two are src1/src2: */
750		ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
751		ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
752		ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
753		ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
754
755		ldgb->pad0 = 0x0;
756		ldgb->pad3 = 0x1;
757
758		return 0;
759	} else if (instr->opc == OPC_RESINFO) {
760		instr_cat6ldgb_t *ldgb = ptr;
761
762		ldgb->d = instr->cat6.d - 1;
763
764		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
765
766		/* first src is src_ssbo: */
767		iassert(src1->flags & IR3_REG_IMMED);
768		ldgb->src_ssbo = src1->uim_val;
769
770		return 0;
771	} else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
772		struct ir3_register *src3 = instr->regs[4];
773		instr_cat6stgb_t *stgb = ptr;
774
775		/* maybe these two bits both determine the instruction encoding? */
776		cat6->src_off = true;
777		stgb->pad3 = 0x2;
778
779		stgb->d = instr->cat6.d - 1;
780		stgb->typed = instr->cat6.typed;
781		stgb->type_size = instr->cat6.iim_val - 1;
782
783		/* first src is dst_ssbo: */
784		iassert(dst->flags & IR3_REG_IMMED);
785		stgb->dst_ssbo = dst->uim_val;
786
787		/* then src1/src2/src3: */
788		stgb->src1 = reg(src1, info, instr->repeat, 0);
789		stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
790		stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
791		stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
792		stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
793
794		return 0;
795	} else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
796			(instr->opc == OPC_LDL)) {
797		instr_cat6a_t *cat6a = ptr;
798
799		cat6->src_off = true;
800
801		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
802		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
803		if (src2) {
804			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
805			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
806		}
807		cat6a->off = instr->cat6.src_offset;
808	} else {
809		instr_cat6b_t *cat6b = ptr;
810
811		cat6->src_off = false;
812
813		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF);
814		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
815		if (src2) {
816			cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
817			cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
818		}
819	}
820
821	if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
822			(instr->opc == OPC_STL)) {
823		instr_cat6c_t *cat6c = ptr;
824		cat6->dst_off = true;
825		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
826		cat6c->off = instr->cat6.dst_offset;
827	} else {
828		instr_cat6d_t *cat6d = ptr;
829		cat6->dst_off = false;
830		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
831	}
832
833	return 0;
834}
835
836static int emit_cat7(struct ir3_instruction *instr, void *ptr,
837		struct ir3_info *info)
838{
839	instr_cat7_t *cat7 = ptr;
840
841	cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
842	cat7->w       = instr->cat7.w;
843	cat7->r       = instr->cat7.r;
844	cat7->l       = instr->cat7.l;
845	cat7->g       = instr->cat7.g;
846	cat7->opc     = instr->opc;
847	cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
848	cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
849	cat7->opc_cat = 7;
850
851	return 0;
852}
853
854static int (*emit[])(struct ir3_instruction *instr, void *ptr,
855		struct ir3_info *info) = {
856	emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
857	emit_cat7,
858};
859
860void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
861		uint32_t gpu_id)
862{
863	uint32_t *ptr, *dwords;
864
865	info->gpu_id        = gpu_id;
866	info->max_reg       = -1;
867	info->max_half_reg  = -1;
868	info->max_const     = -1;
869	info->instrs_count  = 0;
870	info->sizedwords    = 0;
871	info->ss = info->sy = 0;
872
873	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
874		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
875			info->sizedwords += 2;
876		}
877	}
878
879	/* need an integer number of instruction "groups" (sets of 16
880	 * instructions on a4xx or sets of 4 instructions on a3xx),
881	 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
882	 */
883	if (gpu_id >= 400) {
884		info->sizedwords = align(info->sizedwords, 16 * 2);
885	} else {
886		info->sizedwords = align(info->sizedwords, 4 * 2);
887	}
888
889	ptr = dwords = calloc(4, info->sizedwords);
890
891	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
892		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
893			int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
894			if (ret)
895				goto fail;
896			info->instrs_count += 1 + instr->repeat + instr->nop;
897			dwords += 2;
898
899			if (instr->flags & IR3_INSTR_SS)
900				info->ss++;
901
902			if (instr->flags & IR3_INSTR_SY)
903				info->sy++;
904		}
905	}
906
907	return ptr;
908
909fail:
910	free(ptr);
911	return NULL;
912}
913
914static struct ir3_register * reg_create(struct ir3 *shader,
915		int num, int flags)
916{
917	struct ir3_register *reg =
918			ir3_alloc(shader, sizeof(struct ir3_register));
919	reg->wrmask = 1;
920	reg->flags = flags;
921	reg->num = num;
922	if (shader->compiler->gpu_id >= 600)
923		reg->merged = true;
924	return reg;
925}
926
927static void insert_instr(struct ir3_block *block,
928		struct ir3_instruction *instr)
929{
930	struct ir3 *shader = block->shader;
931#ifdef DEBUG
932	instr->serialno = ++shader->instr_count;
933#endif
934	list_addtail(&instr->node, &block->instr_list);
935
936	if (is_input(instr))
937		array_insert(shader, shader->baryfs, instr);
938}
939
940struct ir3_block * ir3_block_create(struct ir3 *shader)
941{
942	struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
943#ifdef DEBUG
944	block->serialno = ++shader->block_count;
945#endif
946	block->shader = shader;
947	list_inithead(&block->node);
948	list_inithead(&block->instr_list);
949	return block;
950}
951
952static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
953{
954	struct ir3_instruction *instr;
955	unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
956	char *ptr = ir3_alloc(block->shader, sz);
957
958	instr = (struct ir3_instruction *)ptr;
959	ptr  += sizeof(*instr);
960	instr->regs = (struct ir3_register **)ptr;
961
962#ifdef DEBUG
963	instr->regs_max = nreg;
964#endif
965
966	return instr;
967}
968
969struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
970		opc_t opc, int nreg)
971{
972	struct ir3_instruction *instr = instr_create(block, nreg);
973	instr->block = block;
974	instr->opc = opc;
975	insert_instr(block, instr);
976	return instr;
977}
978
979struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
980{
981	/* NOTE: we could be slightly more clever, at least for non-meta,
982	 * and choose # of regs based on category.
983	 */
984	return ir3_instr_create2(block, opc, 4);
985}
986
987struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
988{
989	struct ir3_instruction *new_instr = instr_create(instr->block,
990			instr->regs_count);
991	struct ir3_register **regs;
992	unsigned i;
993
994	regs = new_instr->regs;
995	*new_instr = *instr;
996	new_instr->regs = regs;
997
998	insert_instr(instr->block, new_instr);
999
1000	/* clone registers: */
1001	new_instr->regs_count = 0;
1002	for (i = 0; i < instr->regs_count; i++) {
1003		struct ir3_register *reg = instr->regs[i];
1004		struct ir3_register *new_reg =
1005				ir3_reg_create(new_instr, reg->num, reg->flags);
1006		*new_reg = *reg;
1007	}
1008
1009	return new_instr;
1010}
1011
1012/* Add a false dependency to instruction, to ensure it is scheduled first: */
1013void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
1014{
1015	array_insert(instr, instr->deps, dep);
1016}
1017
1018struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
1019		int num, int flags)
1020{
1021	struct ir3 *shader = instr->block->shader;
1022	struct ir3_register *reg = reg_create(shader, num, flags);
1023#ifdef DEBUG
1024	debug_assert(instr->regs_count < instr->regs_max);
1025#endif
1026	instr->regs[instr->regs_count++] = reg;
1027	return reg;
1028}
1029
1030struct ir3_register * ir3_reg_clone(struct ir3 *shader,
1031		struct ir3_register *reg)
1032{
1033	struct ir3_register *new_reg = reg_create(shader, 0, 0);
1034	*new_reg = *reg;
1035	return new_reg;
1036}
1037
1038void
1039ir3_instr_set_address(struct ir3_instruction *instr,
1040		struct ir3_instruction *addr)
1041{
1042	if (instr->address != addr) {
1043		struct ir3 *ir = instr->block->shader;
1044		instr->address = addr;
1045		array_insert(ir, ir->indirects, instr);
1046	}
1047}
1048
1049void
1050ir3_block_clear_mark(struct ir3_block *block)
1051{
1052	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
1053		instr->flags &= ~IR3_INSTR_MARK;
1054}
1055
1056void
1057ir3_clear_mark(struct ir3 *ir)
1058{
1059	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
1060		ir3_block_clear_mark(block);
1061	}
1062}
1063
1064/* note: this will destroy instr->depth, don't do it until after sched! */
1065unsigned
1066ir3_count_instructions(struct ir3 *ir)
1067{
1068	unsigned cnt = 0;
1069	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
1070		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
1071			instr->ip = cnt++;
1072		}
1073		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
1074		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
1075	}
1076	return cnt;
1077}
1078
1079struct ir3_array *
1080ir3_lookup_array(struct ir3 *ir, unsigned id)
1081{
1082	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
1083		if (arr->id == id)
1084			return arr;
1085	return NULL;
1086}
1087