1/*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef INSTR_A3XX_H_
25#define INSTR_A3XX_H_
26
27#define PACKED __attribute__((__packed__))
28
29#include <stdint.h>
30#include <stdio.h>
31#include <stdbool.h>
32#include <assert.h>
33
34/* size of largest OPC field of all the instruction categories: */
35#define NOPC_BITS 6
36
37#define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
38
39typedef enum {
40	/* category 0: */
41	OPC_NOP             = _OPC(0, 0),
42	OPC_BR              = _OPC(0, 1),
43	OPC_JUMP            = _OPC(0, 2),
44	OPC_CALL            = _OPC(0, 3),
45	OPC_RET             = _OPC(0, 4),
46	OPC_KILL            = _OPC(0, 5),
47	OPC_END             = _OPC(0, 6),
48	OPC_EMIT            = _OPC(0, 7),
49	OPC_CUT             = _OPC(0, 8),
50	OPC_CHMASK          = _OPC(0, 9),
51	OPC_CHSH            = _OPC(0, 10),
52	OPC_FLOW_REV        = _OPC(0, 11),
53
54	/* category 1: */
55	OPC_MOV             = _OPC(1, 0),
56
57	/* category 2: */
58	OPC_ADD_F           = _OPC(2, 0),
59	OPC_MIN_F           = _OPC(2, 1),
60	OPC_MAX_F           = _OPC(2, 2),
61	OPC_MUL_F           = _OPC(2, 3),
62	OPC_SIGN_F          = _OPC(2, 4),
63	OPC_CMPS_F          = _OPC(2, 5),
64	OPC_ABSNEG_F        = _OPC(2, 6),
65	OPC_CMPV_F          = _OPC(2, 7),
66	/* 8 - invalid */
67	OPC_FLOOR_F         = _OPC(2, 9),
68	OPC_CEIL_F          = _OPC(2, 10),
69	OPC_RNDNE_F         = _OPC(2, 11),
70	OPC_RNDAZ_F         = _OPC(2, 12),
71	OPC_TRUNC_F         = _OPC(2, 13),
72	/* 14-15 - invalid */
73	OPC_ADD_U           = _OPC(2, 16),
74	OPC_ADD_S           = _OPC(2, 17),
75	OPC_SUB_U           = _OPC(2, 18),
76	OPC_SUB_S           = _OPC(2, 19),
77	OPC_CMPS_U          = _OPC(2, 20),
78	OPC_CMPS_S          = _OPC(2, 21),
79	OPC_MIN_U           = _OPC(2, 22),
80	OPC_MIN_S           = _OPC(2, 23),
81	OPC_MAX_U           = _OPC(2, 24),
82	OPC_MAX_S           = _OPC(2, 25),
83	OPC_ABSNEG_S        = _OPC(2, 26),
84	/* 27 - invalid */
85	OPC_AND_B           = _OPC(2, 28),
86	OPC_OR_B            = _OPC(2, 29),
87	OPC_NOT_B           = _OPC(2, 30),
88	OPC_XOR_B           = _OPC(2, 31),
89	/* 32 - invalid */
90	OPC_CMPV_U          = _OPC(2, 33),
91	OPC_CMPV_S          = _OPC(2, 34),
92	/* 35-47 - invalid */
93	OPC_MUL_U           = _OPC(2, 48),
94	OPC_MUL_S           = _OPC(2, 49),
95	OPC_MULL_U          = _OPC(2, 50),
96	OPC_BFREV_B         = _OPC(2, 51),
97	OPC_CLZ_S           = _OPC(2, 52),
98	OPC_CLZ_B           = _OPC(2, 53),
99	OPC_SHL_B           = _OPC(2, 54),
100	OPC_SHR_B           = _OPC(2, 55),
101	OPC_ASHR_B          = _OPC(2, 56),
102	OPC_BARY_F          = _OPC(2, 57),
103	OPC_MGEN_B          = _OPC(2, 58),
104	OPC_GETBIT_B        = _OPC(2, 59),
105	OPC_SETRM           = _OPC(2, 60),
106	OPC_CBITS_B         = _OPC(2, 61),
107	OPC_SHB             = _OPC(2, 62),
108	OPC_MSAD            = _OPC(2, 63),
109
110	/* category 3: */
111	OPC_MAD_U16         = _OPC(3, 0),
112	OPC_MADSH_U16       = _OPC(3, 1),
113	OPC_MAD_S16         = _OPC(3, 2),
114	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
115	OPC_MAD_U24         = _OPC(3, 4),
116	OPC_MAD_S24         = _OPC(3, 5),
117	OPC_MAD_F16         = _OPC(3, 6),
118	OPC_MAD_F32         = _OPC(3, 7),
119	OPC_SEL_B16         = _OPC(3, 8),
120	OPC_SEL_B32         = _OPC(3, 9),
121	OPC_SEL_S16         = _OPC(3, 10),
122	OPC_SEL_S32         = _OPC(3, 11),
123	OPC_SEL_F16         = _OPC(3, 12),
124	OPC_SEL_F32         = _OPC(3, 13),
125	OPC_SAD_S16         = _OPC(3, 14),
126	OPC_SAD_S32         = _OPC(3, 15),
127
128	/* category 4: */
129	OPC_RCP             = _OPC(4, 0),
130	OPC_RSQ             = _OPC(4, 1),
131	OPC_LOG2            = _OPC(4, 2),
132	OPC_EXP2            = _OPC(4, 3),
133	OPC_SIN             = _OPC(4, 4),
134	OPC_COS             = _OPC(4, 5),
135	OPC_SQRT            = _OPC(4, 6),
136	// 7-63 - invalid
137
138	/* category 5: */
139	OPC_ISAM            = _OPC(5, 0),
140	OPC_ISAML           = _OPC(5, 1),
141	OPC_ISAMM           = _OPC(5, 2),
142	OPC_SAM             = _OPC(5, 3),
143	OPC_SAMB            = _OPC(5, 4),
144	OPC_SAML            = _OPC(5, 5),
145	OPC_SAMGQ           = _OPC(5, 6),
146	OPC_GETLOD          = _OPC(5, 7),
147	OPC_CONV            = _OPC(5, 8),
148	OPC_CONVM           = _OPC(5, 9),
149	OPC_GETSIZE         = _OPC(5, 10),
150	OPC_GETBUF          = _OPC(5, 11),
151	OPC_GETPOS          = _OPC(5, 12),
152	OPC_GETINFO         = _OPC(5, 13),
153	OPC_DSX             = _OPC(5, 14),
154	OPC_DSY             = _OPC(5, 15),
155	OPC_GATHER4R        = _OPC(5, 16),
156	OPC_GATHER4G        = _OPC(5, 17),
157	OPC_GATHER4B        = _OPC(5, 18),
158	OPC_GATHER4A        = _OPC(5, 19),
159	OPC_SAMGP0          = _OPC(5, 20),
160	OPC_SAMGP1          = _OPC(5, 21),
161	OPC_SAMGP2          = _OPC(5, 22),
162	OPC_SAMGP3          = _OPC(5, 23),
163	OPC_DSXPP_1         = _OPC(5, 24),
164	OPC_DSYPP_1         = _OPC(5, 25),
165	OPC_RGETPOS         = _OPC(5, 26),
166	OPC_RGETINFO        = _OPC(5, 27),
167
168	/* category 6: */
169	OPC_LDG             = _OPC(6, 0),        /* load-global */
170	OPC_LDL             = _OPC(6, 1),
171	OPC_LDP             = _OPC(6, 2),
172	OPC_STG             = _OPC(6, 3),        /* store-global */
173	OPC_STL             = _OPC(6, 4),
174	OPC_STP             = _OPC(6, 5),
175	OPC_LDIB            = _OPC(6, 6),
176	OPC_G2L             = _OPC(6, 7),
177	OPC_L2G             = _OPC(6, 8),
178	OPC_PREFETCH        = _OPC(6, 9),
179	OPC_LDLW            = _OPC(6, 10),
180	OPC_STLW            = _OPC(6, 11),
181	OPC_RESFMT          = _OPC(6, 14),
182	OPC_RESINFO         = _OPC(6, 15),
183	OPC_ATOMIC_ADD      = _OPC(6, 16),
184	OPC_ATOMIC_SUB      = _OPC(6, 17),
185	OPC_ATOMIC_XCHG     = _OPC(6, 18),
186	OPC_ATOMIC_INC      = _OPC(6, 19),
187	OPC_ATOMIC_DEC      = _OPC(6, 20),
188	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
189	OPC_ATOMIC_MIN      = _OPC(6, 22),
190	OPC_ATOMIC_MAX      = _OPC(6, 23),
191	OPC_ATOMIC_AND      = _OPC(6, 24),
192	OPC_ATOMIC_OR       = _OPC(6, 25),
193	OPC_ATOMIC_XOR      = _OPC(6, 26),
194	OPC_LDGB            = _OPC(6, 27),
195	OPC_STGB            = _OPC(6, 28),
196	OPC_STIB            = _OPC(6, 29),
197	OPC_LDC             = _OPC(6, 30),
198	OPC_LDLV            = _OPC(6, 31),
199
200	/* category 7: */
201	OPC_BAR             = _OPC(7, 0),
202	OPC_FENCE           = _OPC(7, 1),
203
204	/* meta instructions (category -1): */
205	/* placeholder instr to mark shader inputs: */
206	OPC_META_INPUT      = _OPC(-1, 0),
207	/* The "fan-in" and "fan-out" instructions are used for keeping
208	 * track of instructions that write to multiple dst registers
209	 * (fan-out) like texture sample instructions, or read multiple
210	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
211	 */
212	OPC_META_FO         = _OPC(-1, 2),
213	OPC_META_FI         = _OPC(-1, 3),
214
215} opc_t;
216
217#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
218#define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
219
220typedef enum {
221	TYPE_F16 = 0,
222	TYPE_F32 = 1,
223	TYPE_U16 = 2,
224	TYPE_U32 = 3,
225	TYPE_S16 = 4,
226	TYPE_S32 = 5,
227	TYPE_U8  = 6,
228	TYPE_S8  = 7,  // XXX I assume?
229} type_t;
230
231static inline uint32_t type_size(type_t type)
232{
233	switch (type) {
234	case TYPE_F32:
235	case TYPE_U32:
236	case TYPE_S32:
237		return 32;
238	case TYPE_F16:
239	case TYPE_U16:
240	case TYPE_S16:
241		return 16;
242	case TYPE_U8:
243	case TYPE_S8:
244		return 8;
245	default:
246		assert(0); /* invalid type */
247		return 0;
248	}
249}
250
251static inline int type_float(type_t type)
252{
253	return (type == TYPE_F32) || (type == TYPE_F16);
254}
255
256static inline int type_uint(type_t type)
257{
258	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
259}
260
261static inline int type_sint(type_t type)
262{
263	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
264}
265
266typedef union PACKED {
267	/* normal gpr or const src register: */
268	struct PACKED {
269		uint32_t comp  : 2;
270		uint32_t num   : 10;
271	};
272	/* for immediate val: */
273	int32_t  iim_val   : 11;
274	/* to make compiler happy: */
275	uint32_t dummy32;
276	uint32_t dummy10   : 10;
277	int32_t  idummy10  : 10;
278	uint32_t dummy11   : 11;
279	uint32_t dummy12   : 12;
280	uint32_t dummy13   : 13;
281	uint32_t dummy8    : 8;
282} reg_t;
283
284/* special registers: */
285#define REG_A0 61       /* address register */
286#define REG_P0 62       /* predicate register */
287
288static inline int reg_special(reg_t reg)
289{
290	return (reg.num == REG_A0) || (reg.num == REG_P0);
291}
292
293typedef struct PACKED {
294	/* dword0: */
295	union PACKED {
296		struct PACKED {
297			int16_t  immed    : 16;
298			uint32_t dummy1   : 16;
299		} a3xx;
300		struct PACKED {
301			int32_t  immed    : 20;
302			uint32_t dummy1   : 12;
303		} a4xx;
304		struct PACKED {
305			int32_t immed     : 32;
306		} a5xx;
307	};
308
309	/* dword1: */
310	uint32_t dummy2   : 8;
311	uint32_t repeat   : 3;
312	uint32_t dummy3   : 1;
313	uint32_t ss       : 1;
314	uint32_t dummy4   : 7;
315	uint32_t inv      : 1;
316	uint32_t comp     : 2;
317	uint32_t opc      : 4;
318	uint32_t jmp_tgt  : 1;
319	uint32_t sync     : 1;
320	uint32_t opc_cat  : 3;
321} instr_cat0_t;
322
323typedef struct PACKED {
324	/* dword0: */
325	union PACKED {
326		/* for normal src register: */
327		struct PACKED {
328			uint32_t src : 11;
329			/* at least low bit of pad must be zero or it will
330			 * look like a address relative src
331			 */
332			uint32_t pad : 21;
333		};
334		/* for address relative: */
335		struct PACKED {
336			int32_t  off : 10;
337			uint32_t src_rel_c : 1;
338			uint32_t src_rel : 1;
339			uint32_t unknown : 20;
340		};
341		/* for immediate: */
342		int32_t  iim_val;
343		uint32_t uim_val;
344		float    fim_val;
345	};
346
347	/* dword1: */
348	uint32_t dst        : 8;
349	uint32_t repeat     : 3;
350	uint32_t src_r      : 1;
351	uint32_t ss         : 1;
352	uint32_t ul         : 1;
353	uint32_t dst_type   : 3;
354	uint32_t dst_rel    : 1;
355	uint32_t src_type   : 3;
356	uint32_t src_c      : 1;
357	uint32_t src_im     : 1;
358	uint32_t even       : 1;
359	uint32_t pos_inf    : 1;
360	uint32_t must_be_0  : 2;
361	uint32_t jmp_tgt    : 1;
362	uint32_t sync       : 1;
363	uint32_t opc_cat    : 3;
364} instr_cat1_t;
365
366typedef struct PACKED {
367	/* dword0: */
368	union PACKED {
369		struct PACKED {
370			uint32_t src1         : 11;
371			uint32_t must_be_zero1: 2;
372			uint32_t src1_im      : 1;   /* immediate */
373			uint32_t src1_neg     : 1;   /* negate */
374			uint32_t src1_abs     : 1;   /* absolute value */
375		};
376		struct PACKED {
377			uint32_t src1         : 10;
378			uint32_t src1_c       : 1;   /* relative-const */
379			uint32_t src1_rel     : 1;   /* relative address */
380			uint32_t must_be_zero : 1;
381			uint32_t dummy        : 3;
382		} rel1;
383		struct PACKED {
384			uint32_t src1         : 12;
385			uint32_t src1_c       : 1;   /* const */
386			uint32_t dummy        : 3;
387		} c1;
388	};
389
390	union PACKED {
391		struct PACKED {
392			uint32_t src2         : 11;
393			uint32_t must_be_zero2: 2;
394			uint32_t src2_im      : 1;   /* immediate */
395			uint32_t src2_neg     : 1;   /* negate */
396			uint32_t src2_abs     : 1;   /* absolute value */
397		};
398		struct PACKED {
399			uint32_t src2         : 10;
400			uint32_t src2_c       : 1;   /* relative-const */
401			uint32_t src2_rel     : 1;   /* relative address */
402			uint32_t must_be_zero : 1;
403			uint32_t dummy        : 3;
404		} rel2;
405		struct PACKED {
406			uint32_t src2         : 12;
407			uint32_t src2_c       : 1;   /* const */
408			uint32_t dummy        : 3;
409		} c2;
410	};
411
412	/* dword1: */
413	uint32_t dst      : 8;
414	uint32_t repeat   : 2;
415	uint32_t sat      : 1;
416	uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
417	uint32_t ss       : 1;
418	uint32_t ul       : 1;   /* dunno */
419	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
420	uint32_t ei       : 1;
421	uint32_t cond     : 3;
422	uint32_t src2_r   : 1;   /* doubles as nop1 if repeat==0 */
423	uint32_t full     : 1;   /* not half */
424	uint32_t opc      : 6;
425	uint32_t jmp_tgt  : 1;
426	uint32_t sync     : 1;
427	uint32_t opc_cat  : 3;
428} instr_cat2_t;
429
430typedef struct PACKED {
431	/* dword0: */
432	union PACKED {
433		struct PACKED {
434			uint32_t src1         : 11;
435			uint32_t must_be_zero1: 2;
436			uint32_t src2_c       : 1;
437			uint32_t src1_neg     : 1;
438			uint32_t src2_r       : 1;  /* doubles as nop1 if repeat==0 */
439		};
440		struct PACKED {
441			uint32_t src1         : 10;
442			uint32_t src1_c       : 1;
443			uint32_t src1_rel     : 1;
444			uint32_t must_be_zero : 1;
445			uint32_t dummy        : 3;
446		} rel1;
447		struct PACKED {
448			uint32_t src1         : 12;
449			uint32_t src1_c       : 1;
450			uint32_t dummy        : 3;
451		} c1;
452	};
453
454	union PACKED {
455		struct PACKED {
456			uint32_t src3         : 11;
457			uint32_t must_be_zero2: 2;
458			uint32_t src3_r       : 1;
459			uint32_t src2_neg     : 1;
460			uint32_t src3_neg     : 1;
461		};
462		struct PACKED {
463			uint32_t src3         : 10;
464			uint32_t src3_c       : 1;
465			uint32_t src3_rel     : 1;
466			uint32_t must_be_zero : 1;
467			uint32_t dummy        : 3;
468		} rel2;
469		struct PACKED {
470			uint32_t src3         : 12;
471			uint32_t src3_c       : 1;
472			uint32_t dummy        : 3;
473		} c2;
474	};
475
476	/* dword1: */
477	uint32_t dst      : 8;
478	uint32_t repeat   : 2;
479	uint32_t sat      : 1;
480	uint32_t src1_r   : 1;   /* doubles as nop0 if repeat==0 */
481	uint32_t ss       : 1;
482	uint32_t ul       : 1;
483	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
484	uint32_t src2     : 8;
485	uint32_t opc      : 4;
486	uint32_t jmp_tgt  : 1;
487	uint32_t sync     : 1;
488	uint32_t opc_cat  : 3;
489} instr_cat3_t;
490
491static inline bool instr_cat3_full(instr_cat3_t *cat3)
492{
493	switch (_OPC(3, cat3->opc)) {
494	case OPC_MAD_F16:
495	case OPC_MAD_U16:
496	case OPC_MAD_S16:
497	case OPC_SEL_B16:
498	case OPC_SEL_S16:
499	case OPC_SEL_F16:
500	case OPC_SAD_S16:
501	case OPC_SAD_S32:  // really??
502		return false;
503	default:
504		return true;
505	}
506}
507
508typedef struct PACKED {
509	/* dword0: */
510	union PACKED {
511		struct PACKED {
512			uint32_t src          : 11;
513			uint32_t must_be_zero1: 2;
514			uint32_t src_im       : 1;   /* immediate */
515			uint32_t src_neg      : 1;   /* negate */
516			uint32_t src_abs      : 1;   /* absolute value */
517		};
518		struct PACKED {
519			uint32_t src          : 10;
520			uint32_t src_c        : 1;   /* relative-const */
521			uint32_t src_rel      : 1;   /* relative address */
522			uint32_t must_be_zero : 1;
523			uint32_t dummy        : 3;
524		} rel;
525		struct PACKED {
526			uint32_t src          : 12;
527			uint32_t src_c        : 1;   /* const */
528			uint32_t dummy        : 3;
529		} c;
530	};
531	uint32_t dummy1   : 16;  /* seem to be ignored */
532
533	/* dword1: */
534	uint32_t dst      : 8;
535	uint32_t repeat   : 2;
536	uint32_t sat      : 1;
537	uint32_t src_r    : 1;
538	uint32_t ss       : 1;
539	uint32_t ul       : 1;
540	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
541	uint32_t dummy2   : 5;   /* seem to be ignored */
542	uint32_t full     : 1;   /* not half */
543	uint32_t opc      : 6;
544	uint32_t jmp_tgt  : 1;
545	uint32_t sync     : 1;
546	uint32_t opc_cat  : 3;
547} instr_cat4_t;
548
549typedef struct PACKED {
550	/* dword0: */
551	union PACKED {
552		/* normal case: */
553		struct PACKED {
554			uint32_t full     : 1;   /* not half */
555			uint32_t src1     : 8;
556			uint32_t src2     : 8;
557			uint32_t dummy1   : 4;   /* seem to be ignored */
558			uint32_t samp     : 4;
559			uint32_t tex      : 7;
560		} norm;
561		/* s2en case: */
562		struct PACKED {
563			uint32_t full     : 1;   /* not half */
564			uint32_t src1     : 8;
565			uint32_t src2     : 11;
566			uint32_t dummy1   : 1;
567			uint32_t src3     : 8;
568			uint32_t dummy2   : 3;
569		} s2en;
570		/* same in either case: */
571		// XXX I think, confirm this
572		struct PACKED {
573			uint32_t full     : 1;   /* not half */
574			uint32_t src1     : 8;
575			uint32_t pad      : 23;
576		};
577	};
578
579	/* dword1: */
580	uint32_t dst      : 8;
581	uint32_t wrmask   : 4;   /* write-mask */
582	uint32_t type     : 3;
583	uint32_t dummy2   : 1;   /* seems to be ignored */
584	uint32_t is_3d    : 1;
585
586	uint32_t is_a     : 1;
587	uint32_t is_s     : 1;
588	uint32_t is_s2en  : 1;
589	uint32_t is_o     : 1;
590	uint32_t is_p     : 1;
591
592	uint32_t opc      : 5;
593	uint32_t jmp_tgt  : 1;
594	uint32_t sync     : 1;
595	uint32_t opc_cat  : 3;
596} instr_cat5_t;
597
598/* dword0 encoding for src_off: [src1 + off], src2: */
599typedef struct PACKED {
600	/* dword0: */
601	uint32_t mustbe1  : 1;
602	int32_t  off      : 13;
603	uint32_t src1     : 8;
604	uint32_t src1_im  : 1;
605	uint32_t src2_im  : 1;
606	uint32_t src2     : 8;
607
608	/* dword1: */
609	uint32_t dword1;
610} instr_cat6a_t;
611
612/* dword0 encoding for !src_off: [src1], src2 */
613typedef struct PACKED {
614	/* dword0: */
615	uint32_t mustbe0  : 1;
616	uint32_t src1     : 13;
617	uint32_t ignore0  : 8;
618	uint32_t src1_im  : 1;
619	uint32_t src2_im  : 1;
620	uint32_t src2     : 8;
621
622	/* dword1: */
623	uint32_t dword1;
624} instr_cat6b_t;
625
626/* dword1 encoding for dst_off: */
627typedef struct PACKED {
628	/* dword0: */
629	uint32_t dword0;
630
631	/* note: there is some weird stuff going on where sometimes
632	 * cat6->a.off is involved.. but that seems like a bug in
633	 * the blob, since it is used even if !cat6->src_off
634	 * It would make sense for there to be some more bits to
635	 * bring us to 11 bits worth of offset, but not sure..
636	 */
637	int32_t off       : 8;
638	uint32_t mustbe1  : 1;
639	uint32_t dst      : 8;
640	uint32_t pad1     : 15;
641} instr_cat6c_t;
642
643/* dword1 encoding for !dst_off: */
644typedef struct PACKED {
645	/* dword0: */
646	uint32_t dword0;
647
648	uint32_t dst      : 8;
649	uint32_t mustbe0  : 1;
650	uint32_t idx      : 8;
651	uint32_t pad0     : 15;
652} instr_cat6d_t;
653
654/* ldgb and atomics..
655 *
656 * ldgb:      pad0=0, pad3=1
657 * atomic .g: pad0=1, pad3=1
658 *        .l: pad0=1, pad3=0
659 */
660typedef struct PACKED {
661	/* dword0: */
662	uint32_t pad0     : 1;
663	uint32_t src3     : 8;
664	uint32_t d        : 2;
665	uint32_t typed    : 1;
666	uint32_t type_size : 2;
667	uint32_t src1     : 8;
668	uint32_t src1_im  : 1;
669	uint32_t src2_im  : 1;
670	uint32_t src2     : 8;
671
672	/* dword1: */
673	uint32_t dst      : 8;
674	uint32_t mustbe0  : 1;
675	uint32_t src_ssbo : 8;
676	uint32_t pad2     : 3;  // type
677	uint32_t g        : 1;
678	uint32_t pad3     : 1;
679	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
680} instr_cat6ldgb_t;
681
682/* stgb, pad0=0, pad3=2
683 */
684typedef struct PACKED {
685	/* dword0: */
686	uint32_t mustbe1  : 1;  // ???
687	uint32_t src1     : 8;
688	uint32_t d        : 2;
689	uint32_t typed    : 1;
690	uint32_t type_size : 2;
691	uint32_t pad0     : 9;
692	uint32_t src2_im  : 1;
693	uint32_t src2     : 8;
694
695	/* dword1: */
696	uint32_t src3     : 8;
697	uint32_t src3_im  : 1;
698	uint32_t dst_ssbo : 8;
699	uint32_t pad2     : 3;  // type
700	uint32_t pad3     : 2;
701	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
702} instr_cat6stgb_t;
703
704typedef union PACKED {
705	instr_cat6a_t a;
706	instr_cat6b_t b;
707	instr_cat6c_t c;
708	instr_cat6d_t d;
709	instr_cat6ldgb_t ldgb;
710	instr_cat6stgb_t stgb;
711	struct PACKED {
712		/* dword0: */
713		uint32_t src_off  : 1;
714		uint32_t pad1     : 31;
715
716		/* dword1: */
717		uint32_t pad2     : 8;
718		uint32_t dst_off  : 1;
719		uint32_t pad3     : 8;
720		uint32_t type     : 3;
721		uint32_t g        : 1;  /* or in some cases it means dst immed */
722		uint32_t pad4     : 1;
723		uint32_t opc      : 5;
724		uint32_t jmp_tgt  : 1;
725		uint32_t sync     : 1;
726		uint32_t opc_cat  : 3;
727	};
728} instr_cat6_t;
729
730/**
731 * For atomic ops (which return a value):
732 *
733 *    pad1=1, pad2=c, pad3=0, pad4=3
734 *    src1    - vecN offset/coords
735 *    src2.x  - is actually dest register
736 *    src2.y  - is 'data' except for cmpxchg where src2.y is 'compare'
737 *              and src2.z is 'data'
738 *
739 * For stib (which does not return a value):
740 *    pad1=0, pad2=c, pad3=0, pad4=2
741 *    src1    - vecN offset/coords
742 *    src2    - value to store
743 *
744 * For ldib:
745 *    pad1=1, pad2=c, pad3=0, pad4=2
746 *    src1    - vecN offset/coords
747 *
748 * for ldc (load from UBO using descriptor):
749 *    pad1=0, pad2=8, pad3=0, pad4=2
750 */
751typedef struct PACKED {
752	/* dword0: */
753	uint32_t pad1     : 9;
754	uint32_t d        : 2;
755	uint32_t typed    : 1;
756	uint32_t type_size : 2;
757	uint32_t opc      : 5;
758	uint32_t pad2     : 5;
759	uint32_t src1     : 8;  /* coordinate/offset */
760
761	/* dword1: */
762	uint32_t src2     : 8;  /* or the dst for load instructions */
763	uint32_t pad3     : 1;  //mustbe0 ?? or zero means imm vs reg for ssbo??
764	uint32_t ssbo     : 8;  /* ssbo/image binding point */
765	uint32_t type     : 3;
766	uint32_t pad4     : 7;
767	uint32_t jmp_tgt  : 1;
768	uint32_t sync     : 1;
769	uint32_t opc_cat  : 3;
770} instr_cat6_a6xx_t;
771
772typedef struct PACKED {
773	/* dword0: */
774	uint32_t pad1     : 32;
775
776	/* dword1: */
777	uint32_t pad2     : 12;
778	uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
779	uint32_t pad3     : 6;
780	uint32_t w        : 1;  /* write */
781	uint32_t r        : 1;  /* read */
782	uint32_t l        : 1;  /* local */
783	uint32_t g        : 1;  /* global */
784	uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
785	uint32_t jmp_tgt  : 1;  /* (jp) */
786	uint32_t sync     : 1;  /* (sy) */
787	uint32_t opc_cat  : 3;
788} instr_cat7_t;
789
790typedef union PACKED {
791	instr_cat0_t cat0;
792	instr_cat1_t cat1;
793	instr_cat2_t cat2;
794	instr_cat3_t cat3;
795	instr_cat4_t cat4;
796	instr_cat5_t cat5;
797	instr_cat6_t cat6;
798	instr_cat6_a6xx_t cat6_a6xx;
799	instr_cat7_t cat7;
800	struct PACKED {
801		/* dword0: */
802		uint32_t pad1     : 32;
803
804		/* dword1: */
805		uint32_t pad2     : 12;
806		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
807		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
808		uint32_t pad3     : 13;
809		uint32_t jmp_tgt  : 1;
810		uint32_t sync     : 1;
811		uint32_t opc_cat  : 3;
812
813	};
814} instr_t;
815
816static inline uint32_t instr_repeat(instr_t *instr)
817{
818	switch (instr->opc_cat) {
819	case 0:  return instr->cat0.repeat;
820	case 1:  return instr->cat1.repeat;
821	case 2:  return instr->cat2.repeat;
822	case 3:  return instr->cat3.repeat;
823	case 4:  return instr->cat4.repeat;
824	default: return 0;
825	}
826}
827
828static inline bool instr_sat(instr_t *instr)
829{
830	switch (instr->opc_cat) {
831	case 2:  return instr->cat2.sat;
832	case 3:  return instr->cat3.sat;
833	case 4:  return instr->cat4.sat;
834	default: return false;
835	}
836}
837
838/* We can probably drop the gpu_id arg, but keeping it for now so we can
839 * assert if we see something we think should be new encoding on an older
840 * gpu.
841 */
842static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
843{
844	instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
845
846	/* At least one of these two bits is pad in all the possible
847	 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
848	 * cmdstream traces I have indicates that the pad bit is zero
849	 * in all cases.  So we can use this to detect new encoding:
850	 */
851	if ((cat6->pad2 & 0x8) && (cat6->pad4 & 0x2)) {
852		assert(gpu_id >= 600);
853		assert(instr->cat6.opc == 0);
854		return false;
855	}
856
857	return true;
858}
859
860static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
861{
862	switch (instr->opc_cat) {
863	case 0:  return instr->cat0.opc;
864	case 1:  return 0;
865	case 2:  return instr->cat2.opc;
866	case 3:  return instr->cat3.opc;
867	case 4:  return instr->cat4.opc;
868	case 5:  return instr->cat5.opc;
869	case 6:
870		if (!is_cat6_legacy(instr, gpu_id))
871			return instr->cat6_a6xx.opc;
872		return instr->cat6.opc;
873	case 7:  return instr->cat7.opc;
874	default: return 0;
875	}
876}
877
878static inline bool is_mad(opc_t opc)
879{
880	switch (opc) {
881	case OPC_MAD_U16:
882	case OPC_MAD_S16:
883	case OPC_MAD_U24:
884	case OPC_MAD_S24:
885	case OPC_MAD_F16:
886	case OPC_MAD_F32:
887		return true;
888	default:
889		return false;
890	}
891}
892
893static inline bool is_madsh(opc_t opc)
894{
895	switch (opc) {
896	case OPC_MADSH_U16:
897	case OPC_MADSH_M16:
898		return true;
899	default:
900		return false;
901	}
902}
903
904static inline bool is_atomic(opc_t opc)
905{
906	switch (opc) {
907	case OPC_ATOMIC_ADD:
908	case OPC_ATOMIC_SUB:
909	case OPC_ATOMIC_XCHG:
910	case OPC_ATOMIC_INC:
911	case OPC_ATOMIC_DEC:
912	case OPC_ATOMIC_CMPXCHG:
913	case OPC_ATOMIC_MIN:
914	case OPC_ATOMIC_MAX:
915	case OPC_ATOMIC_AND:
916	case OPC_ATOMIC_OR:
917	case OPC_ATOMIC_XOR:
918		return true;
919	default:
920		return false;
921	}
922}
923
924static inline bool is_ssbo(opc_t opc)
925{
926	switch (opc) {
927	case OPC_RESFMT:
928	case OPC_RESINFO:
929	case OPC_LDGB:
930	case OPC_STGB:
931	case OPC_STIB:
932		return true;
933	default:
934		return false;
935	}
936}
937
938static inline bool is_isam(opc_t opc)
939{
940	switch (opc) {
941	case OPC_ISAM:
942	case OPC_ISAML:
943	case OPC_ISAMM:
944		return true;
945	default:
946		return false;
947	}
948}
949
950int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id);
951
952#endif /* INSTR_A3XX_H_ */
953