1b8e80941Smrg/*
2b8e80941Smrg * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21b8e80941Smrg * SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "ir3.h"
25b8e80941Smrg
26b8e80941Smrg#include <stdlib.h>
27b8e80941Smrg#include <stdio.h>
28b8e80941Smrg#include <string.h>
29b8e80941Smrg#include <assert.h>
30b8e80941Smrg#include <stdbool.h>
31b8e80941Smrg#include <errno.h>
32b8e80941Smrg
33b8e80941Smrg#include "util/bitscan.h"
34b8e80941Smrg#include "util/ralloc.h"
35b8e80941Smrg#include "util/u_math.h"
36b8e80941Smrg
37b8e80941Smrg#include "instr-a3xx.h"
38b8e80941Smrg#include "ir3_compiler.h"
39b8e80941Smrg
40b8e80941Smrg/* simple allocator to carve allocations out of an up-front allocated heap,
41b8e80941Smrg * so that we can free everything easily in one shot.
42b8e80941Smrg */
43b8e80941Smrgvoid * ir3_alloc(struct ir3 *shader, int sz)
44b8e80941Smrg{
45b8e80941Smrg	return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */
46b8e80941Smrg}
47b8e80941Smrg
48b8e80941Smrgstruct ir3 * ir3_create(struct ir3_compiler *compiler,
49b8e80941Smrg		gl_shader_stage type, unsigned nin, unsigned nout)
50b8e80941Smrg{
51b8e80941Smrg	struct ir3 *shader = rzalloc(NULL, struct ir3);
52b8e80941Smrg
53b8e80941Smrg	shader->compiler = compiler;
54b8e80941Smrg	shader->type = type;
55b8e80941Smrg	shader->ninputs = nin;
56b8e80941Smrg	shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
57b8e80941Smrg
58b8e80941Smrg	shader->noutputs = nout;
59b8e80941Smrg	shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
60b8e80941Smrg
61b8e80941Smrg	list_inithead(&shader->block_list);
62b8e80941Smrg	list_inithead(&shader->array_list);
63b8e80941Smrg
64b8e80941Smrg	return shader;
65b8e80941Smrg}
66b8e80941Smrg
67b8e80941Smrgvoid ir3_destroy(struct ir3 *shader)
68b8e80941Smrg{
69b8e80941Smrg	ralloc_free(shader);
70b8e80941Smrg}
71b8e80941Smrg
72b8e80941Smrg#define iassert(cond) do { \
73b8e80941Smrg	if (!(cond)) { \
74b8e80941Smrg		debug_assert(cond); \
75b8e80941Smrg		return -1; \
76b8e80941Smrg	} } while (0)
77b8e80941Smrg
78b8e80941Smrg#define iassert_type(reg, full) do { \
79b8e80941Smrg	if ((full)) { \
80b8e80941Smrg		iassert(!((reg)->flags & IR3_REG_HALF)); \
81b8e80941Smrg	} else { \
82b8e80941Smrg		iassert((reg)->flags & IR3_REG_HALF); \
83b8e80941Smrg	} } while (0);
84b8e80941Smrg
85b8e80941Smrgstatic uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
86b8e80941Smrg		uint32_t repeat, uint32_t valid_flags)
87b8e80941Smrg{
88b8e80941Smrg	reg_t val = { .dummy32 = 0 };
89b8e80941Smrg
90b8e80941Smrg	if (reg->flags & ~valid_flags) {
91b8e80941Smrg		debug_printf("INVALID FLAGS: %x vs %x\n",
92b8e80941Smrg				reg->flags, valid_flags);
93b8e80941Smrg	}
94b8e80941Smrg
95b8e80941Smrg	if (!(reg->flags & IR3_REG_R))
96b8e80941Smrg		repeat = 0;
97b8e80941Smrg
98b8e80941Smrg	if (reg->flags & IR3_REG_IMMED) {
99b8e80941Smrg		val.iim_val = reg->iim_val;
100b8e80941Smrg	} else {
101b8e80941Smrg		unsigned components;
102b8e80941Smrg		int16_t max;
103b8e80941Smrg
104b8e80941Smrg		if (reg->flags & IR3_REG_RELATIV) {
105b8e80941Smrg			components = reg->size;
106b8e80941Smrg			val.idummy10 = reg->array.offset;
107b8e80941Smrg			max = (reg->array.offset + repeat + components - 1);
108b8e80941Smrg		} else {
109b8e80941Smrg			components = util_last_bit(reg->wrmask);
110b8e80941Smrg			val.comp = reg->num & 0x3;
111b8e80941Smrg			val.num  = reg->num >> 2;
112b8e80941Smrg			max = (reg->num + repeat + components - 1);
113b8e80941Smrg		}
114b8e80941Smrg
115b8e80941Smrg		if (reg->flags & IR3_REG_CONST) {
116b8e80941Smrg			info->max_const = MAX2(info->max_const, max >> 2);
117b8e80941Smrg		} else if (val.num == 63) {
118b8e80941Smrg			/* ignore writes to dummy register r63.x */
119b8e80941Smrg		} else if (max < regid(48, 0)) {
120b8e80941Smrg			if (reg->flags & IR3_REG_HALF) {
121b8e80941Smrg				if (info->gpu_id >= 600) {
122b8e80941Smrg					/* starting w/ a6xx, half regs conflict with full regs: */
123b8e80941Smrg					info->max_reg = MAX2(info->max_reg, max >> 3);
124b8e80941Smrg				} else {
125b8e80941Smrg					info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
126b8e80941Smrg				}
127b8e80941Smrg			} else {
128b8e80941Smrg				info->max_reg = MAX2(info->max_reg, max >> 2);
129b8e80941Smrg			}
130b8e80941Smrg		}
131b8e80941Smrg	}
132b8e80941Smrg
133b8e80941Smrg	return val.dummy32;
134b8e80941Smrg}
135b8e80941Smrg
136b8e80941Smrgstatic int emit_cat0(struct ir3_instruction *instr, void *ptr,
137b8e80941Smrg		struct ir3_info *info)
138b8e80941Smrg{
139b8e80941Smrg	instr_cat0_t *cat0 = ptr;
140b8e80941Smrg
141b8e80941Smrg	if (info->gpu_id >= 500) {
142b8e80941Smrg		cat0->a5xx.immed = instr->cat0.immed;
143b8e80941Smrg	} else if (info->gpu_id >= 400) {
144b8e80941Smrg		cat0->a4xx.immed = instr->cat0.immed;
145b8e80941Smrg	} else {
146b8e80941Smrg		cat0->a3xx.immed = instr->cat0.immed;
147b8e80941Smrg	}
148b8e80941Smrg	cat0->repeat   = instr->repeat;
149b8e80941Smrg	cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
150b8e80941Smrg	cat0->inv      = instr->cat0.inv;
151b8e80941Smrg	cat0->comp     = instr->cat0.comp;
152b8e80941Smrg	cat0->opc      = instr->opc;
153b8e80941Smrg	cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
154b8e80941Smrg	cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
155b8e80941Smrg	cat0->opc_cat  = 0;
156b8e80941Smrg
157b8e80941Smrg	return 0;
158b8e80941Smrg}
159b8e80941Smrg
160b8e80941Smrgstatic int emit_cat1(struct ir3_instruction *instr, void *ptr,
161b8e80941Smrg		struct ir3_info *info)
162b8e80941Smrg{
163b8e80941Smrg	struct ir3_register *dst = instr->regs[0];
164b8e80941Smrg	struct ir3_register *src = instr->regs[1];
165b8e80941Smrg	instr_cat1_t *cat1 = ptr;
166b8e80941Smrg
167b8e80941Smrg	iassert(instr->regs_count == 2);
168b8e80941Smrg	iassert_type(dst, type_size(instr->cat1.dst_type) == 32);
169b8e80941Smrg	if (!(src->flags & IR3_REG_IMMED))
170b8e80941Smrg		iassert_type(src, type_size(instr->cat1.src_type) == 32);
171b8e80941Smrg
172b8e80941Smrg	if (src->flags & IR3_REG_IMMED) {
173b8e80941Smrg		cat1->iim_val = src->iim_val;
174b8e80941Smrg		cat1->src_im  = 1;
175b8e80941Smrg	} else if (src->flags & IR3_REG_RELATIV) {
176b8e80941Smrg		cat1->off       = reg(src, info, instr->repeat,
177b8e80941Smrg				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
178b8e80941Smrg		cat1->src_rel   = 1;
179b8e80941Smrg		cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
180b8e80941Smrg	} else {
181b8e80941Smrg		cat1->src  = reg(src, info, instr->repeat,
182b8e80941Smrg				IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
183b8e80941Smrg		cat1->src_c     = !!(src->flags & IR3_REG_CONST);
184b8e80941Smrg	}
185b8e80941Smrg
186b8e80941Smrg	cat1->dst      = reg(dst, info, instr->repeat,
187b8e80941Smrg			IR3_REG_RELATIV | IR3_REG_EVEN |
188b8e80941Smrg			IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
189b8e80941Smrg	cat1->repeat   = instr->repeat;
190b8e80941Smrg	cat1->src_r    = !!(src->flags & IR3_REG_R);
191b8e80941Smrg	cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
192b8e80941Smrg	cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
193b8e80941Smrg	cat1->dst_type = instr->cat1.dst_type;
194b8e80941Smrg	cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
195b8e80941Smrg	cat1->src_type = instr->cat1.src_type;
196b8e80941Smrg	cat1->even     = !!(dst->flags & IR3_REG_EVEN);
197b8e80941Smrg	cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
198b8e80941Smrg	cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
199b8e80941Smrg	cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
200b8e80941Smrg	cat1->opc_cat  = 1;
201b8e80941Smrg
202b8e80941Smrg	return 0;
203b8e80941Smrg}
204b8e80941Smrg
205b8e80941Smrgstatic int emit_cat2(struct ir3_instruction *instr, void *ptr,
206b8e80941Smrg		struct ir3_info *info)
207b8e80941Smrg{
208b8e80941Smrg	struct ir3_register *dst = instr->regs[0];
209b8e80941Smrg	struct ir3_register *src1 = instr->regs[1];
210b8e80941Smrg	struct ir3_register *src2 = instr->regs[2];
211b8e80941Smrg	instr_cat2_t *cat2 = ptr;
212b8e80941Smrg	unsigned absneg = ir3_cat2_absneg(instr->opc);
213b8e80941Smrg
214b8e80941Smrg	iassert((instr->regs_count == 2) || (instr->regs_count == 3));
215b8e80941Smrg
216b8e80941Smrg	if (instr->nop) {
217b8e80941Smrg		iassert(!instr->repeat);
218b8e80941Smrg		iassert(instr->nop <= 3);
219b8e80941Smrg
220b8e80941Smrg		cat2->src1_r = instr->nop & 0x1;
221b8e80941Smrg		cat2->src2_r = (instr->nop >> 1) & 0x1;
222b8e80941Smrg	} else {
223b8e80941Smrg		cat2->src1_r = !!(src1->flags & IR3_REG_R);
224b8e80941Smrg		if (src2)
225b8e80941Smrg			cat2->src2_r = !!(src2->flags & IR3_REG_R);
226b8e80941Smrg	}
227b8e80941Smrg
228b8e80941Smrg	if (src1->flags & IR3_REG_RELATIV) {
229b8e80941Smrg		iassert(src1->array.offset < (1 << 10));
230b8e80941Smrg		cat2->rel1.src1      = reg(src1, info, instr->repeat,
231b8e80941Smrg				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
232b8e80941Smrg				IR3_REG_HALF | absneg);
233b8e80941Smrg		cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
234b8e80941Smrg		cat2->rel1.src1_rel  = 1;
235b8e80941Smrg	} else if (src1->flags & IR3_REG_CONST) {
236b8e80941Smrg		iassert(src1->num < (1 << 12));
237b8e80941Smrg		cat2->c1.src1   = reg(src1, info, instr->repeat,
238b8e80941Smrg				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
239b8e80941Smrg		cat2->c1.src1_c = 1;
240b8e80941Smrg	} else {
241b8e80941Smrg		iassert(src1->num < (1 << 11));
242b8e80941Smrg		cat2->src1 = reg(src1, info, instr->repeat,
243b8e80941Smrg				IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
244b8e80941Smrg				absneg);
245b8e80941Smrg	}
246b8e80941Smrg	cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
247b8e80941Smrg	cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
248b8e80941Smrg	cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
249b8e80941Smrg
250b8e80941Smrg	if (src2) {
251b8e80941Smrg		iassert((src2->flags & IR3_REG_IMMED) ||
252b8e80941Smrg				!((src1->flags ^ src2->flags) & IR3_REG_HALF));
253b8e80941Smrg
254b8e80941Smrg		if (src2->flags & IR3_REG_RELATIV) {
255b8e80941Smrg			iassert(src2->array.offset < (1 << 10));
256b8e80941Smrg			cat2->rel2.src2      = reg(src2, info, instr->repeat,
257b8e80941Smrg					IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
258b8e80941Smrg					IR3_REG_HALF | absneg);
259b8e80941Smrg			cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
260b8e80941Smrg			cat2->rel2.src2_rel  = 1;
261b8e80941Smrg		} else if (src2->flags & IR3_REG_CONST) {
262b8e80941Smrg			iassert(src2->num < (1 << 12));
263b8e80941Smrg			cat2->c2.src2   = reg(src2, info, instr->repeat,
264b8e80941Smrg					IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
265b8e80941Smrg			cat2->c2.src2_c = 1;
266b8e80941Smrg		} else {
267b8e80941Smrg			iassert(src2->num < (1 << 11));
268b8e80941Smrg			cat2->src2 = reg(src2, info, instr->repeat,
269b8e80941Smrg					IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
270b8e80941Smrg					absneg);
271b8e80941Smrg		}
272b8e80941Smrg
273b8e80941Smrg		cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
274b8e80941Smrg		cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
275b8e80941Smrg		cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
276b8e80941Smrg	}
277b8e80941Smrg
278b8e80941Smrg	cat2->dst      = reg(dst, info, instr->repeat,
279b8e80941Smrg			IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
280b8e80941Smrg	cat2->repeat   = instr->repeat;
281b8e80941Smrg	cat2->sat      = !!(instr->flags & IR3_INSTR_SAT);
282b8e80941Smrg	cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
283b8e80941Smrg	cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
284b8e80941Smrg	cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
285b8e80941Smrg	cat2->ei       = !!(dst->flags & IR3_REG_EI);
286b8e80941Smrg	cat2->cond     = instr->cat2.condition;
287b8e80941Smrg	cat2->full     = ! (src1->flags & IR3_REG_HALF);
288b8e80941Smrg	cat2->opc      = instr->opc;
289b8e80941Smrg	cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
290b8e80941Smrg	cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
291b8e80941Smrg	cat2->opc_cat  = 2;
292b8e80941Smrg
293b8e80941Smrg	return 0;
294b8e80941Smrg}
295b8e80941Smrg
296b8e80941Smrgstatic int emit_cat3(struct ir3_instruction *instr, void *ptr,
297b8e80941Smrg		struct ir3_info *info)
298b8e80941Smrg{
299b8e80941Smrg	struct ir3_register *dst = instr->regs[0];
300b8e80941Smrg	struct ir3_register *src1 = instr->regs[1];
301b8e80941Smrg	struct ir3_register *src2 = instr->regs[2];
302b8e80941Smrg	struct ir3_register *src3 = instr->regs[3];
303b8e80941Smrg	unsigned absneg = ir3_cat3_absneg(instr->opc);
304b8e80941Smrg	instr_cat3_t *cat3 = ptr;
305b8e80941Smrg	uint32_t src_flags = 0;
306b8e80941Smrg
307b8e80941Smrg	switch (instr->opc) {
308b8e80941Smrg	case OPC_MAD_F16:
309b8e80941Smrg	case OPC_MAD_U16:
310b8e80941Smrg	case OPC_MAD_S16:
311b8e80941Smrg	case OPC_SEL_B16:
312b8e80941Smrg	case OPC_SEL_S16:
313b8e80941Smrg	case OPC_SEL_F16:
314b8e80941Smrg	case OPC_SAD_S16:
315b8e80941Smrg	case OPC_SAD_S32:  // really??
316b8e80941Smrg		src_flags |= IR3_REG_HALF;
317b8e80941Smrg		break;
318b8e80941Smrg	default:
319b8e80941Smrg		break;
320b8e80941Smrg	}
321b8e80941Smrg
322b8e80941Smrg	iassert(instr->regs_count == 4);
323b8e80941Smrg	iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
324b8e80941Smrg	iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
325b8e80941Smrg	iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
326b8e80941Smrg
327b8e80941Smrg	if (instr->nop) {
328b8e80941Smrg		iassert(!instr->repeat);
329b8e80941Smrg		iassert(instr->nop <= 3);
330b8e80941Smrg
331b8e80941Smrg		cat3->src1_r = instr->nop & 0x1;
332b8e80941Smrg		cat3->src2_r = (instr->nop >> 1) & 0x1;
333b8e80941Smrg	} else {
334b8e80941Smrg		cat3->src1_r = !!(src1->flags & IR3_REG_R);
335b8e80941Smrg		cat3->src2_r = !!(src2->flags & IR3_REG_R);
336b8e80941Smrg	}
337b8e80941Smrg
338b8e80941Smrg	if (src1->flags & IR3_REG_RELATIV) {
339b8e80941Smrg		iassert(src1->array.offset < (1 << 10));
340b8e80941Smrg		cat3->rel1.src1      = reg(src1, info, instr->repeat,
341b8e80941Smrg				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
342b8e80941Smrg				IR3_REG_HALF | absneg);
343b8e80941Smrg		cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
344b8e80941Smrg		cat3->rel1.src1_rel  = 1;
345b8e80941Smrg	} else if (src1->flags & IR3_REG_CONST) {
346b8e80941Smrg		iassert(src1->num < (1 << 12));
347b8e80941Smrg		cat3->c1.src1   = reg(src1, info, instr->repeat,
348b8e80941Smrg				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
349b8e80941Smrg		cat3->c1.src1_c = 1;
350b8e80941Smrg	} else {
351b8e80941Smrg		iassert(src1->num < (1 << 11));
352b8e80941Smrg		cat3->src1 = reg(src1, info, instr->repeat,
353b8e80941Smrg				IR3_REG_R | IR3_REG_HALF | absneg);
354b8e80941Smrg	}
355b8e80941Smrg
356b8e80941Smrg	cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
357b8e80941Smrg
358b8e80941Smrg	cat3->src2     = reg(src2, info, instr->repeat,
359b8e80941Smrg			IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
360b8e80941Smrg	cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
361b8e80941Smrg	cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
362b8e80941Smrg
363b8e80941Smrg	if (src3->flags & IR3_REG_RELATIV) {
364b8e80941Smrg		iassert(src3->array.offset < (1 << 10));
365b8e80941Smrg		cat3->rel2.src3      = reg(src3, info, instr->repeat,
366b8e80941Smrg				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
367b8e80941Smrg				IR3_REG_HALF | absneg);
368b8e80941Smrg		cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
369b8e80941Smrg		cat3->rel2.src3_rel  = 1;
370b8e80941Smrg	} else if (src3->flags & IR3_REG_CONST) {
371b8e80941Smrg		iassert(src3->num < (1 << 12));
372b8e80941Smrg		cat3->c2.src3   = reg(src3, info, instr->repeat,
373b8e80941Smrg				IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
374b8e80941Smrg		cat3->c2.src3_c = 1;
375b8e80941Smrg	} else {
376b8e80941Smrg		iassert(src3->num < (1 << 11));
377b8e80941Smrg		cat3->src3 = reg(src3, info, instr->repeat,
378b8e80941Smrg				IR3_REG_R | IR3_REG_HALF | absneg);
379b8e80941Smrg	}
380b8e80941Smrg
381b8e80941Smrg	cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
382b8e80941Smrg	cat3->src3_r   = !!(src3->flags & IR3_REG_R);
383b8e80941Smrg
384b8e80941Smrg	cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
385b8e80941Smrg	cat3->repeat   = instr->repeat;
386b8e80941Smrg	cat3->sat      = !!(instr->flags & IR3_INSTR_SAT);
387b8e80941Smrg	cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
388b8e80941Smrg	cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
389b8e80941Smrg	cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
390b8e80941Smrg	cat3->opc      = instr->opc;
391b8e80941Smrg	cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
392b8e80941Smrg	cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
393b8e80941Smrg	cat3->opc_cat  = 3;
394b8e80941Smrg
395b8e80941Smrg	return 0;
396b8e80941Smrg}
397b8e80941Smrg
398b8e80941Smrgstatic int emit_cat4(struct ir3_instruction *instr, void *ptr,
399b8e80941Smrg		struct ir3_info *info)
400b8e80941Smrg{
401b8e80941Smrg	struct ir3_register *dst = instr->regs[0];
402b8e80941Smrg	struct ir3_register *src = instr->regs[1];
403b8e80941Smrg	instr_cat4_t *cat4 = ptr;
404b8e80941Smrg
405b8e80941Smrg	iassert(instr->regs_count == 2);
406b8e80941Smrg
407b8e80941Smrg	if (src->flags & IR3_REG_RELATIV) {
408b8e80941Smrg		iassert(src->array.offset < (1 << 10));
409b8e80941Smrg		cat4->rel.src      = reg(src, info, instr->repeat,
410b8e80941Smrg				IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
411b8e80941Smrg				IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
412b8e80941Smrg		cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
413b8e80941Smrg		cat4->rel.src_rel  = 1;
414b8e80941Smrg	} else if (src->flags & IR3_REG_CONST) {
415b8e80941Smrg		iassert(src->num < (1 << 12));
416b8e80941Smrg		cat4->c.src   = reg(src, info, instr->repeat,
417b8e80941Smrg				IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
418b8e80941Smrg				IR3_REG_R | IR3_REG_HALF);
419b8e80941Smrg		cat4->c.src_c = 1;
420b8e80941Smrg	} else {
421b8e80941Smrg		iassert(src->num < (1 << 11));
422b8e80941Smrg		cat4->src = reg(src, info, instr->repeat,
423b8e80941Smrg				IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
424b8e80941Smrg				IR3_REG_R | IR3_REG_HALF);
425b8e80941Smrg	}
426b8e80941Smrg
427b8e80941Smrg	cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
428b8e80941Smrg	cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
429b8e80941Smrg	cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
430b8e80941Smrg	cat4->src_r    = !!(src->flags & IR3_REG_R);
431b8e80941Smrg
432b8e80941Smrg	cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
433b8e80941Smrg	cat4->repeat   = instr->repeat;
434b8e80941Smrg	cat4->sat      = !!(instr->flags & IR3_INSTR_SAT);
435b8e80941Smrg	cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
436b8e80941Smrg	cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
437b8e80941Smrg	cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
438b8e80941Smrg	cat4->full     = ! (src->flags & IR3_REG_HALF);
439b8e80941Smrg	cat4->opc      = instr->opc;
440b8e80941Smrg	cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
441b8e80941Smrg	cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
442b8e80941Smrg	cat4->opc_cat  = 4;
443b8e80941Smrg
444b8e80941Smrg	return 0;
445b8e80941Smrg}
446b8e80941Smrg
447b8e80941Smrgstatic int emit_cat5(struct ir3_instruction *instr, void *ptr,
448b8e80941Smrg		struct ir3_info *info)
449b8e80941Smrg{
450b8e80941Smrg	struct ir3_register *dst = instr->regs[0];
451b8e80941Smrg	/* To simplify things when there could be zero, one, or two args other
452b8e80941Smrg	 * than tex/sampler idx, we use the first src reg in the ir to hold
453b8e80941Smrg	 * samp_tex hvec2:
454b8e80941Smrg	 */
455b8e80941Smrg	struct ir3_register *src1;
456b8e80941Smrg	struct ir3_register *src2;
457b8e80941Smrg	instr_cat5_t *cat5 = ptr;
458b8e80941Smrg
459b8e80941Smrg	iassert((instr->regs_count == 2) ||
460b8e80941Smrg			(instr->regs_count == 3) || (instr->regs_count == 4));
461b8e80941Smrg
462b8e80941Smrg	switch (instr->opc) {
463b8e80941Smrg	case OPC_DSX:
464b8e80941Smrg	case OPC_DSXPP_1:
465b8e80941Smrg	case OPC_DSY:
466b8e80941Smrg	case OPC_DSYPP_1:
467b8e80941Smrg	case OPC_RGETPOS:
468b8e80941Smrg	case OPC_RGETINFO:
469b8e80941Smrg		iassert((instr->flags & IR3_INSTR_S2EN) == 0);
470b8e80941Smrg		src1 = instr->regs[1];
471b8e80941Smrg		src2 = instr->regs_count > 2 ? instr->regs[2] : NULL;
472b8e80941Smrg		break;
473b8e80941Smrg	default:
474b8e80941Smrg		src1 = instr->regs[2];
475b8e80941Smrg		src2 = instr->regs_count > 3 ? instr->regs[3] : NULL;
476b8e80941Smrg		break;
477b8e80941Smrg	}
478b8e80941Smrg
479b8e80941Smrg	assume(src1 || !src2);
480b8e80941Smrg
481b8e80941Smrg	if (src1) {
482b8e80941Smrg		cat5->full = ! (src1->flags & IR3_REG_HALF);
483b8e80941Smrg		cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
484b8e80941Smrg	}
485b8e80941Smrg
486b8e80941Smrg	if (instr->flags & IR3_INSTR_S2EN) {
487b8e80941Smrg		struct ir3_register *samp_tex = instr->regs[1];
488b8e80941Smrg		if (src2) {
489b8e80941Smrg			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
490b8e80941Smrg			cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
491b8e80941Smrg		}
492b8e80941Smrg		iassert(samp_tex->flags & IR3_REG_HALF);
493b8e80941Smrg		cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF);
494b8e80941Smrg		iassert(!(instr->cat5.samp | instr->cat5.tex));
495b8e80941Smrg	} else {
496b8e80941Smrg		if (src2) {
497b8e80941Smrg			iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
498b8e80941Smrg			cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
499b8e80941Smrg		}
500b8e80941Smrg		cat5->norm.samp = instr->cat5.samp;
501b8e80941Smrg		cat5->norm.tex  = instr->cat5.tex;
502b8e80941Smrg	}
503b8e80941Smrg
504b8e80941Smrg	cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
505b8e80941Smrg	cat5->wrmask   = dst->wrmask;
506b8e80941Smrg	cat5->type     = instr->cat5.type;
507b8e80941Smrg	cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
508b8e80941Smrg	cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
509b8e80941Smrg	cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
510b8e80941Smrg	cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
511b8e80941Smrg	cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
512b8e80941Smrg	cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
513b8e80941Smrg	cat5->opc      = instr->opc;
514b8e80941Smrg	cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
515b8e80941Smrg	cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
516b8e80941Smrg	cat5->opc_cat  = 5;
517b8e80941Smrg
518b8e80941Smrg	return 0;
519b8e80941Smrg}
520b8e80941Smrg
521b8e80941Smrgstatic int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr,
522b8e80941Smrg		struct ir3_info *info)
523b8e80941Smrg{
524b8e80941Smrg	struct ir3_register *src1, *src2;
525b8e80941Smrg	instr_cat6_a6xx_t *cat6 = ptr;
526b8e80941Smrg	bool has_dest = (instr->opc == OPC_LDIB);
527b8e80941Smrg
528b8e80941Smrg	/* first reg should be SSBO binding point: */
529b8e80941Smrg	iassert(instr->regs[1]->flags & IR3_REG_IMMED);
530b8e80941Smrg
531b8e80941Smrg	src1 = instr->regs[2];
532b8e80941Smrg
533b8e80941Smrg	if (has_dest) {
534b8e80941Smrg		/* the src2 field in the instruction is actually the destination
535b8e80941Smrg		 * register for load instructions:
536b8e80941Smrg		 */
537b8e80941Smrg		src2 = instr->regs[0];
538b8e80941Smrg	} else {
539b8e80941Smrg		src2 = instr->regs[3];
540b8e80941Smrg	}
541b8e80941Smrg
542b8e80941Smrg	cat6->type      = instr->cat6.type;
543b8e80941Smrg	cat6->d         = instr->cat6.d - 1;
544b8e80941Smrg	cat6->typed     = instr->cat6.typed;
545b8e80941Smrg	cat6->type_size = instr->cat6.iim_val - 1;
546b8e80941Smrg	cat6->opc       = instr->opc;
547b8e80941Smrg	cat6->jmp_tgt   = !!(instr->flags & IR3_INSTR_JP);
548b8e80941Smrg	cat6->sync      = !!(instr->flags & IR3_INSTR_SY);
549b8e80941Smrg	cat6->opc_cat   = 6;
550b8e80941Smrg
551b8e80941Smrg	cat6->src1 = reg(src1, info, instr->repeat, 0);
552b8e80941Smrg	cat6->src2 = reg(src2, info, instr->repeat, 0);
553b8e80941Smrg	cat6->ssbo = instr->regs[1]->iim_val;
554b8e80941Smrg
555b8e80941Smrg	switch (instr->opc) {
556b8e80941Smrg	case OPC_ATOMIC_ADD:
557b8e80941Smrg	case OPC_ATOMIC_SUB:
558b8e80941Smrg	case OPC_ATOMIC_XCHG:
559b8e80941Smrg	case OPC_ATOMIC_INC:
560b8e80941Smrg	case OPC_ATOMIC_DEC:
561b8e80941Smrg	case OPC_ATOMIC_CMPXCHG:
562b8e80941Smrg	case OPC_ATOMIC_MIN:
563b8e80941Smrg	case OPC_ATOMIC_MAX:
564b8e80941Smrg	case OPC_ATOMIC_AND:
565b8e80941Smrg	case OPC_ATOMIC_OR:
566b8e80941Smrg	case OPC_ATOMIC_XOR:
567b8e80941Smrg		cat6->pad1 = 0x1;
568b8e80941Smrg		cat6->pad2 = 0xc;
569b8e80941Smrg		cat6->pad3 = 0x0;
570b8e80941Smrg		cat6->pad4 = 0x3;
571b8e80941Smrg		break;
572b8e80941Smrg	case OPC_STIB:
573b8e80941Smrg		cat6->pad1 = 0x0;
574b8e80941Smrg		cat6->pad2 = 0xc;
575b8e80941Smrg		cat6->pad3 = 0x0;
576b8e80941Smrg		cat6->pad4 = 0x2;
577b8e80941Smrg		break;
578b8e80941Smrg	case OPC_LDIB:
579b8e80941Smrg		cat6->pad1 = 0x1;
580b8e80941Smrg		cat6->pad2 = 0xc;
581b8e80941Smrg		cat6->pad3 = 0x0;
582b8e80941Smrg		cat6->pad4 = 0x2;
583b8e80941Smrg		break;
584b8e80941Smrg	case OPC_LDC:
585b8e80941Smrg		cat6->pad1 = 0x0;
586b8e80941Smrg		cat6->pad2 = 0x8;
587b8e80941Smrg		cat6->pad3 = 0x0;
588b8e80941Smrg		cat6->pad4 = 0x2;
589b8e80941Smrg		break;
590b8e80941Smrg	default:
591b8e80941Smrg		iassert(0);
592b8e80941Smrg	}
593b8e80941Smrg
594b8e80941Smrg	return 0;
595b8e80941Smrg}
596b8e80941Smrg
597b8e80941Smrgstatic int emit_cat6(struct ir3_instruction *instr, void *ptr,
598b8e80941Smrg		struct ir3_info *info)
599b8e80941Smrg{
600b8e80941Smrg	struct ir3_register *dst, *src1, *src2;
601b8e80941Smrg	instr_cat6_t *cat6 = ptr;
602b8e80941Smrg
603b8e80941Smrg	/* In a6xx we start using a new instruction encoding for some of
604b8e80941Smrg	 * these instructions:
605b8e80941Smrg	 */
606b8e80941Smrg	if (info->gpu_id >= 600) {
607b8e80941Smrg		switch (instr->opc) {
608b8e80941Smrg		case OPC_ATOMIC_ADD:
609b8e80941Smrg		case OPC_ATOMIC_SUB:
610b8e80941Smrg		case OPC_ATOMIC_XCHG:
611b8e80941Smrg		case OPC_ATOMIC_INC:
612b8e80941Smrg		case OPC_ATOMIC_DEC:
613b8e80941Smrg		case OPC_ATOMIC_CMPXCHG:
614b8e80941Smrg		case OPC_ATOMIC_MIN:
615b8e80941Smrg		case OPC_ATOMIC_MAX:
616b8e80941Smrg		case OPC_ATOMIC_AND:
617b8e80941Smrg		case OPC_ATOMIC_OR:
618b8e80941Smrg		case OPC_ATOMIC_XOR:
619b8e80941Smrg			/* The shared variants of these still use the old encoding: */
620b8e80941Smrg			if (!(instr->flags & IR3_INSTR_G))
621b8e80941Smrg				break;
622b8e80941Smrg			/* fallthrough */
623b8e80941Smrg		case OPC_STIB:
624b8e80941Smrg		case OPC_LDIB:
625b8e80941Smrg		case OPC_LDC:
626b8e80941Smrg			return emit_cat6_a6xx(instr, ptr, info);
627b8e80941Smrg		default:
628b8e80941Smrg			break;
629b8e80941Smrg		}
630b8e80941Smrg	}
631b8e80941Smrg
632b8e80941Smrg	bool type_full = type_size(instr->cat6.type) == 32;
633b8e80941Smrg
634b8e80941Smrg	cat6->type     = instr->cat6.type;
635b8e80941Smrg	cat6->opc      = instr->opc;
636b8e80941Smrg	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
637b8e80941Smrg	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
638b8e80941Smrg	cat6->g        = !!(instr->flags & IR3_INSTR_G);
639b8e80941Smrg	cat6->opc_cat  = 6;
640b8e80941Smrg
641b8e80941Smrg	switch (instr->opc) {
642b8e80941Smrg	case OPC_RESINFO:
643b8e80941Smrg	case OPC_RESFMT:
644b8e80941Smrg		iassert_type(instr->regs[0], type_full); /* dst */
645b8e80941Smrg		iassert_type(instr->regs[1], type_full); /* src1 */
646b8e80941Smrg		break;
647b8e80941Smrg	case OPC_L2G:
648b8e80941Smrg	case OPC_G2L:
649b8e80941Smrg		iassert_type(instr->regs[0], true);      /* dst */
650b8e80941Smrg		iassert_type(instr->regs[1], true);      /* src1 */
651b8e80941Smrg		break;
652b8e80941Smrg	case OPC_STG:
653b8e80941Smrg	case OPC_STL:
654b8e80941Smrg	case OPC_STP:
655b8e80941Smrg	case OPC_STLW:
656b8e80941Smrg	case OPC_STIB:
657b8e80941Smrg		/* no dst, so regs[0] is dummy */
658b8e80941Smrg		iassert_type(instr->regs[1], true);      /* dst */
659b8e80941Smrg		iassert_type(instr->regs[2], type_full); /* src1 */
660b8e80941Smrg		iassert_type(instr->regs[3], true);      /* src2 */
661b8e80941Smrg		break;
662b8e80941Smrg	default:
663b8e80941Smrg		iassert_type(instr->regs[0], type_full); /* dst */
664b8e80941Smrg		iassert_type(instr->regs[1], true);      /* src1 */
665b8e80941Smrg		if (instr->regs_count > 2)
666b8e80941Smrg			iassert_type(instr->regs[2], true);  /* src1 */
667b8e80941Smrg		break;
668b8e80941Smrg	}
669b8e80941Smrg
670b8e80941Smrg	/* the "dst" for a store instruction is (from the perspective
671b8e80941Smrg	 * of data flow in the shader, ie. register use/def, etc) in
672b8e80941Smrg	 * fact a register that is read by the instruction, rather
673b8e80941Smrg	 * than written:
674b8e80941Smrg	 */
675b8e80941Smrg	if (is_store(instr)) {
676b8e80941Smrg		iassert(instr->regs_count >= 3);
677b8e80941Smrg
678b8e80941Smrg		dst  = instr->regs[1];
679b8e80941Smrg		src1 = instr->regs[2];
680b8e80941Smrg		src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
681b8e80941Smrg	} else {
682b8e80941Smrg		iassert(instr->regs_count >= 2);
683b8e80941Smrg
684b8e80941Smrg		dst  = instr->regs[0];
685b8e80941Smrg		src1 = instr->regs[1];
686b8e80941Smrg		src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
687b8e80941Smrg	}
688b8e80941Smrg
689b8e80941Smrg	/* TODO we need a more comprehensive list about which instructions
690b8e80941Smrg	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
691b8e80941Smrg	 * indicate to use the src_off encoding even if offset is zero
692b8e80941Smrg	 * (but then what to do about dst_off?)
693b8e80941Smrg	 */
694b8e80941Smrg	if (is_atomic(instr->opc)) {
695b8e80941Smrg		instr_cat6ldgb_t *ldgb = ptr;
696b8e80941Smrg
697b8e80941Smrg		/* maybe these two bits both determine the instruction encoding? */
698b8e80941Smrg		cat6->src_off = false;
699b8e80941Smrg
700b8e80941Smrg		ldgb->d = instr->cat6.d - 1;
701b8e80941Smrg		ldgb->typed = instr->cat6.typed;
702b8e80941Smrg		ldgb->type_size = instr->cat6.iim_val - 1;
703b8e80941Smrg
704b8e80941Smrg		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
705b8e80941Smrg
706b8e80941Smrg		if (ldgb->g) {
707b8e80941Smrg			struct ir3_register *src3 = instr->regs[3];
708b8e80941Smrg			struct ir3_register *src4 = instr->regs[4];
709b8e80941Smrg
710b8e80941Smrg			/* first src is src_ssbo: */
711b8e80941Smrg			iassert(src1->flags & IR3_REG_IMMED);
712b8e80941Smrg			ldgb->src_ssbo = src1->uim_val;
713b8e80941Smrg
714b8e80941Smrg			ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
715b8e80941Smrg			ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
716b8e80941Smrg			ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
717b8e80941Smrg			ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
718b8e80941Smrg
719b8e80941Smrg			ldgb->src3 = reg(src4, info, instr->repeat, 0);
720b8e80941Smrg			ldgb->pad0 = 0x1;
721b8e80941Smrg			ldgb->pad3 = 0x1;
722b8e80941Smrg		} else {
723b8e80941Smrg			ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
724b8e80941Smrg			ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED);
725b8e80941Smrg			ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
726b8e80941Smrg			ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
727b8e80941Smrg			ldgb->pad0 = 0x1;
728b8e80941Smrg			ldgb->pad3 = 0x0;
729b8e80941Smrg		}
730b8e80941Smrg
731b8e80941Smrg		return 0;
732b8e80941Smrg	} else if (instr->opc == OPC_LDGB) {
733b8e80941Smrg		struct ir3_register *src3 = instr->regs[3];
734b8e80941Smrg		instr_cat6ldgb_t *ldgb = ptr;
735b8e80941Smrg
736b8e80941Smrg		/* maybe these two bits both determine the instruction encoding? */
737b8e80941Smrg		cat6->src_off = false;
738b8e80941Smrg
739b8e80941Smrg		ldgb->d = instr->cat6.d - 1;
740b8e80941Smrg		ldgb->typed = instr->cat6.typed;
741b8e80941Smrg		ldgb->type_size = instr->cat6.iim_val - 1;
742b8e80941Smrg
743b8e80941Smrg		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
744b8e80941Smrg
745b8e80941Smrg		/* first src is src_ssbo: */
746b8e80941Smrg		iassert(src1->flags & IR3_REG_IMMED);
747b8e80941Smrg		ldgb->src_ssbo = src1->uim_val;
748b8e80941Smrg
749b8e80941Smrg		/* then next two are src1/src2: */
750b8e80941Smrg		ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
751b8e80941Smrg		ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
752b8e80941Smrg		ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
753b8e80941Smrg		ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
754b8e80941Smrg
755b8e80941Smrg		ldgb->pad0 = 0x0;
756b8e80941Smrg		ldgb->pad3 = 0x1;
757b8e80941Smrg
758b8e80941Smrg		return 0;
759b8e80941Smrg	} else if (instr->opc == OPC_RESINFO) {
760b8e80941Smrg		instr_cat6ldgb_t *ldgb = ptr;
761b8e80941Smrg
762b8e80941Smrg		ldgb->d = instr->cat6.d - 1;
763b8e80941Smrg
764b8e80941Smrg		ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
765b8e80941Smrg
766b8e80941Smrg		/* first src is src_ssbo: */
767b8e80941Smrg		iassert(src1->flags & IR3_REG_IMMED);
768b8e80941Smrg		ldgb->src_ssbo = src1->uim_val;
769b8e80941Smrg
770b8e80941Smrg		return 0;
771b8e80941Smrg	} else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) {
772b8e80941Smrg		struct ir3_register *src3 = instr->regs[4];
773b8e80941Smrg		instr_cat6stgb_t *stgb = ptr;
774b8e80941Smrg
775b8e80941Smrg		/* maybe these two bits both determine the instruction encoding? */
776b8e80941Smrg		cat6->src_off = true;
777b8e80941Smrg		stgb->pad3 = 0x2;
778b8e80941Smrg
779b8e80941Smrg		stgb->d = instr->cat6.d - 1;
780b8e80941Smrg		stgb->typed = instr->cat6.typed;
781b8e80941Smrg		stgb->type_size = instr->cat6.iim_val - 1;
782b8e80941Smrg
783b8e80941Smrg		/* first src is dst_ssbo: */
784b8e80941Smrg		iassert(dst->flags & IR3_REG_IMMED);
785b8e80941Smrg		stgb->dst_ssbo = dst->uim_val;
786b8e80941Smrg
787b8e80941Smrg		/* then src1/src2/src3: */
788b8e80941Smrg		stgb->src1 = reg(src1, info, instr->repeat, 0);
789b8e80941Smrg		stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
790b8e80941Smrg		stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
791b8e80941Smrg		stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
792b8e80941Smrg		stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
793b8e80941Smrg
794b8e80941Smrg		return 0;
795b8e80941Smrg	} else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) ||
796b8e80941Smrg			(instr->opc == OPC_LDL)) {
797b8e80941Smrg		instr_cat6a_t *cat6a = ptr;
798b8e80941Smrg
799b8e80941Smrg		cat6->src_off = true;
800b8e80941Smrg
801b8e80941Smrg		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
802b8e80941Smrg		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
803b8e80941Smrg		if (src2) {
804b8e80941Smrg			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
805b8e80941Smrg			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
806b8e80941Smrg		}
807b8e80941Smrg		cat6a->off = instr->cat6.src_offset;
808b8e80941Smrg	} else {
809b8e80941Smrg		instr_cat6b_t *cat6b = ptr;
810b8e80941Smrg
811b8e80941Smrg		cat6->src_off = false;
812b8e80941Smrg
813b8e80941Smrg		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF);
814b8e80941Smrg		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
815b8e80941Smrg		if (src2) {
816b8e80941Smrg			cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
817b8e80941Smrg			cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
818b8e80941Smrg		}
819b8e80941Smrg	}
820b8e80941Smrg
821b8e80941Smrg	if (instr->cat6.dst_offset || (instr->opc == OPC_STG) ||
822b8e80941Smrg			(instr->opc == OPC_STL)) {
823b8e80941Smrg		instr_cat6c_t *cat6c = ptr;
824b8e80941Smrg		cat6->dst_off = true;
825b8e80941Smrg		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
826b8e80941Smrg		cat6c->off = instr->cat6.dst_offset;
827b8e80941Smrg	} else {
828b8e80941Smrg		instr_cat6d_t *cat6d = ptr;
829b8e80941Smrg		cat6->dst_off = false;
830b8e80941Smrg		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
831b8e80941Smrg	}
832b8e80941Smrg
833b8e80941Smrg	return 0;
834b8e80941Smrg}
835b8e80941Smrg
836b8e80941Smrgstatic int emit_cat7(struct ir3_instruction *instr, void *ptr,
837b8e80941Smrg		struct ir3_info *info)
838b8e80941Smrg{
839b8e80941Smrg	instr_cat7_t *cat7 = ptr;
840b8e80941Smrg
841b8e80941Smrg	cat7->ss      = !!(instr->flags & IR3_INSTR_SS);
842b8e80941Smrg	cat7->w       = instr->cat7.w;
843b8e80941Smrg	cat7->r       = instr->cat7.r;
844b8e80941Smrg	cat7->l       = instr->cat7.l;
845b8e80941Smrg	cat7->g       = instr->cat7.g;
846b8e80941Smrg	cat7->opc     = instr->opc;
847b8e80941Smrg	cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
848b8e80941Smrg	cat7->sync    = !!(instr->flags & IR3_INSTR_SY);
849b8e80941Smrg	cat7->opc_cat = 7;
850b8e80941Smrg
851b8e80941Smrg	return 0;
852b8e80941Smrg}
853b8e80941Smrg
854b8e80941Smrgstatic int (*emit[])(struct ir3_instruction *instr, void *ptr,
855b8e80941Smrg		struct ir3_info *info) = {
856b8e80941Smrg	emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
857b8e80941Smrg	emit_cat7,
858b8e80941Smrg};
859b8e80941Smrg
860b8e80941Smrgvoid * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
861b8e80941Smrg		uint32_t gpu_id)
862b8e80941Smrg{
863b8e80941Smrg	uint32_t *ptr, *dwords;
864b8e80941Smrg
865b8e80941Smrg	info->gpu_id        = gpu_id;
866b8e80941Smrg	info->max_reg       = -1;
867b8e80941Smrg	info->max_half_reg  = -1;
868b8e80941Smrg	info->max_const     = -1;
869b8e80941Smrg	info->instrs_count  = 0;
870b8e80941Smrg	info->sizedwords    = 0;
871b8e80941Smrg	info->ss = info->sy = 0;
872b8e80941Smrg
873b8e80941Smrg	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
874b8e80941Smrg		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
875b8e80941Smrg			info->sizedwords += 2;
876b8e80941Smrg		}
877b8e80941Smrg	}
878b8e80941Smrg
879b8e80941Smrg	/* need an integer number of instruction "groups" (sets of 16
880b8e80941Smrg	 * instructions on a4xx or sets of 4 instructions on a3xx),
881b8e80941Smrg	 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
882b8e80941Smrg	 */
883b8e80941Smrg	if (gpu_id >= 400) {
884b8e80941Smrg		info->sizedwords = align(info->sizedwords, 16 * 2);
885b8e80941Smrg	} else {
886b8e80941Smrg		info->sizedwords = align(info->sizedwords, 4 * 2);
887b8e80941Smrg	}
888b8e80941Smrg
889b8e80941Smrg	ptr = dwords = calloc(4, info->sizedwords);
890b8e80941Smrg
891b8e80941Smrg	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
892b8e80941Smrg		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
893b8e80941Smrg			int ret = emit[opc_cat(instr->opc)](instr, dwords, info);
894b8e80941Smrg			if (ret)
895b8e80941Smrg				goto fail;
896b8e80941Smrg			info->instrs_count += 1 + instr->repeat + instr->nop;
897b8e80941Smrg			dwords += 2;
898b8e80941Smrg
899b8e80941Smrg			if (instr->flags & IR3_INSTR_SS)
900b8e80941Smrg				info->ss++;
901b8e80941Smrg
902b8e80941Smrg			if (instr->flags & IR3_INSTR_SY)
903b8e80941Smrg				info->sy++;
904b8e80941Smrg		}
905b8e80941Smrg	}
906b8e80941Smrg
907b8e80941Smrg	return ptr;
908b8e80941Smrg
909b8e80941Smrgfail:
910b8e80941Smrg	free(ptr);
911b8e80941Smrg	return NULL;
912b8e80941Smrg}
913b8e80941Smrg
914b8e80941Smrgstatic struct ir3_register * reg_create(struct ir3 *shader,
915b8e80941Smrg		int num, int flags)
916b8e80941Smrg{
917b8e80941Smrg	struct ir3_register *reg =
918b8e80941Smrg			ir3_alloc(shader, sizeof(struct ir3_register));
919b8e80941Smrg	reg->wrmask = 1;
920b8e80941Smrg	reg->flags = flags;
921b8e80941Smrg	reg->num = num;
922b8e80941Smrg	if (shader->compiler->gpu_id >= 600)
923b8e80941Smrg		reg->merged = true;
924b8e80941Smrg	return reg;
925b8e80941Smrg}
926b8e80941Smrg
927b8e80941Smrgstatic void insert_instr(struct ir3_block *block,
928b8e80941Smrg		struct ir3_instruction *instr)
929b8e80941Smrg{
930b8e80941Smrg	struct ir3 *shader = block->shader;
931b8e80941Smrg#ifdef DEBUG
932b8e80941Smrg	instr->serialno = ++shader->instr_count;
933b8e80941Smrg#endif
934b8e80941Smrg	list_addtail(&instr->node, &block->instr_list);
935b8e80941Smrg
936b8e80941Smrg	if (is_input(instr))
937b8e80941Smrg		array_insert(shader, shader->baryfs, instr);
938b8e80941Smrg}
939b8e80941Smrg
940b8e80941Smrgstruct ir3_block * ir3_block_create(struct ir3 *shader)
941b8e80941Smrg{
942b8e80941Smrg	struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
943b8e80941Smrg#ifdef DEBUG
944b8e80941Smrg	block->serialno = ++shader->block_count;
945b8e80941Smrg#endif
946b8e80941Smrg	block->shader = shader;
947b8e80941Smrg	list_inithead(&block->node);
948b8e80941Smrg	list_inithead(&block->instr_list);
949b8e80941Smrg	return block;
950b8e80941Smrg}
951b8e80941Smrg
952b8e80941Smrgstatic struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
953b8e80941Smrg{
954b8e80941Smrg	struct ir3_instruction *instr;
955b8e80941Smrg	unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
956b8e80941Smrg	char *ptr = ir3_alloc(block->shader, sz);
957b8e80941Smrg
958b8e80941Smrg	instr = (struct ir3_instruction *)ptr;
959b8e80941Smrg	ptr  += sizeof(*instr);
960b8e80941Smrg	instr->regs = (struct ir3_register **)ptr;
961b8e80941Smrg
962b8e80941Smrg#ifdef DEBUG
963b8e80941Smrg	instr->regs_max = nreg;
964b8e80941Smrg#endif
965b8e80941Smrg
966b8e80941Smrg	return instr;
967b8e80941Smrg}
968b8e80941Smrg
969b8e80941Smrgstruct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
970b8e80941Smrg		opc_t opc, int nreg)
971b8e80941Smrg{
972b8e80941Smrg	struct ir3_instruction *instr = instr_create(block, nreg);
973b8e80941Smrg	instr->block = block;
974b8e80941Smrg	instr->opc = opc;
975b8e80941Smrg	insert_instr(block, instr);
976b8e80941Smrg	return instr;
977b8e80941Smrg}
978b8e80941Smrg
979b8e80941Smrgstruct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc)
980b8e80941Smrg{
981b8e80941Smrg	/* NOTE: we could be slightly more clever, at least for non-meta,
982b8e80941Smrg	 * and choose # of regs based on category.
983b8e80941Smrg	 */
984b8e80941Smrg	return ir3_instr_create2(block, opc, 4);
985b8e80941Smrg}
986b8e80941Smrg
987b8e80941Smrgstruct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
988b8e80941Smrg{
989b8e80941Smrg	struct ir3_instruction *new_instr = instr_create(instr->block,
990b8e80941Smrg			instr->regs_count);
991b8e80941Smrg	struct ir3_register **regs;
992b8e80941Smrg	unsigned i;
993b8e80941Smrg
994b8e80941Smrg	regs = new_instr->regs;
995b8e80941Smrg	*new_instr = *instr;
996b8e80941Smrg	new_instr->regs = regs;
997b8e80941Smrg
998b8e80941Smrg	insert_instr(instr->block, new_instr);
999b8e80941Smrg
1000b8e80941Smrg	/* clone registers: */
1001b8e80941Smrg	new_instr->regs_count = 0;
1002b8e80941Smrg	for (i = 0; i < instr->regs_count; i++) {
1003b8e80941Smrg		struct ir3_register *reg = instr->regs[i];
1004b8e80941Smrg		struct ir3_register *new_reg =
1005b8e80941Smrg				ir3_reg_create(new_instr, reg->num, reg->flags);
1006b8e80941Smrg		*new_reg = *reg;
1007b8e80941Smrg	}
1008b8e80941Smrg
1009b8e80941Smrg	return new_instr;
1010b8e80941Smrg}
1011b8e80941Smrg
1012b8e80941Smrg/* Add a false dependency to instruction, to ensure it is scheduled first: */
1013b8e80941Smrgvoid ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
1014b8e80941Smrg{
1015b8e80941Smrg	array_insert(instr, instr->deps, dep);
1016b8e80941Smrg}
1017b8e80941Smrg
1018b8e80941Smrgstruct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
1019b8e80941Smrg		int num, int flags)
1020b8e80941Smrg{
1021b8e80941Smrg	struct ir3 *shader = instr->block->shader;
1022b8e80941Smrg	struct ir3_register *reg = reg_create(shader, num, flags);
1023b8e80941Smrg#ifdef DEBUG
1024b8e80941Smrg	debug_assert(instr->regs_count < instr->regs_max);
1025b8e80941Smrg#endif
1026b8e80941Smrg	instr->regs[instr->regs_count++] = reg;
1027b8e80941Smrg	return reg;
1028b8e80941Smrg}
1029b8e80941Smrg
1030b8e80941Smrgstruct ir3_register * ir3_reg_clone(struct ir3 *shader,
1031b8e80941Smrg		struct ir3_register *reg)
1032b8e80941Smrg{
1033b8e80941Smrg	struct ir3_register *new_reg = reg_create(shader, 0, 0);
1034b8e80941Smrg	*new_reg = *reg;
1035b8e80941Smrg	return new_reg;
1036b8e80941Smrg}
1037b8e80941Smrg
1038b8e80941Smrgvoid
1039b8e80941Smrgir3_instr_set_address(struct ir3_instruction *instr,
1040b8e80941Smrg		struct ir3_instruction *addr)
1041b8e80941Smrg{
1042b8e80941Smrg	if (instr->address != addr) {
1043b8e80941Smrg		struct ir3 *ir = instr->block->shader;
1044b8e80941Smrg		instr->address = addr;
1045b8e80941Smrg		array_insert(ir, ir->indirects, instr);
1046b8e80941Smrg	}
1047b8e80941Smrg}
1048b8e80941Smrg
1049b8e80941Smrgvoid
1050b8e80941Smrgir3_block_clear_mark(struct ir3_block *block)
1051b8e80941Smrg{
1052b8e80941Smrg	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
1053b8e80941Smrg		instr->flags &= ~IR3_INSTR_MARK;
1054b8e80941Smrg}
1055b8e80941Smrg
1056b8e80941Smrgvoid
1057b8e80941Smrgir3_clear_mark(struct ir3 *ir)
1058b8e80941Smrg{
1059b8e80941Smrg	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
1060b8e80941Smrg		ir3_block_clear_mark(block);
1061b8e80941Smrg	}
1062b8e80941Smrg}
1063b8e80941Smrg
1064b8e80941Smrg/* note: this will destroy instr->depth, don't do it until after sched! */
1065b8e80941Smrgunsigned
1066b8e80941Smrgir3_count_instructions(struct ir3 *ir)
1067b8e80941Smrg{
1068b8e80941Smrg	unsigned cnt = 0;
1069b8e80941Smrg	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
1070b8e80941Smrg		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
1071b8e80941Smrg			instr->ip = cnt++;
1072b8e80941Smrg		}
1073b8e80941Smrg		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
1074b8e80941Smrg		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
1075b8e80941Smrg	}
1076b8e80941Smrg	return cnt;
1077b8e80941Smrg}
1078b8e80941Smrg
1079b8e80941Smrgstruct ir3_array *
1080b8e80941Smrgir3_lookup_array(struct ir3 *ir, unsigned id)
1081b8e80941Smrg{
1082b8e80941Smrg	list_for_each_entry (struct ir3_array, arr, &ir->array_list, node)
1083b8e80941Smrg		if (arr->id == id)
1084b8e80941Smrg			return arr;
1085b8e80941Smrg	return NULL;
1086b8e80941Smrg}
1087