1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32
33#ifndef BRW_EU_H
34#define BRW_EU_H
35
36#include <stdbool.h>
37#include <stdio.h>
38#include "brw_inst.h"
39#include "brw_compiler.h"
40#include "brw_eu_defines.h"
41#include "brw_reg.h"
42#include "brw_disasm_info.h"
43
44#ifdef __cplusplus
45extern "C" {
46#endif
47
48#define BRW_EU_MAX_INSN_STACK 5
49
50struct brw_insn_state {
51   /* One of BRW_EXECUTE_* */
52   unsigned exec_size:3;
53
54   /* Group in units of channels */
55   unsigned group:5;
56
57   /* Compression control on gfx4-5 */
58   bool compressed:1;
59
60   /* One of BRW_MASK_* */
61   unsigned mask_control:1;
62
63   /* Scheduling info for Gfx12+ */
64   struct tgl_swsb swsb;
65
66   bool saturate:1;
67
68   /* One of BRW_ALIGN_* */
69   unsigned access_mode:1;
70
71   /* One of BRW_PREDICATE_* */
72   enum brw_predicate predicate:4;
73
74   bool pred_inv:1;
75
76   /* Flag subreg.  Bottom bit is subreg, top bit is reg */
77   unsigned flag_subreg:2;
78
79   bool acc_wr_control:1;
80};
81
82
83/* A helper for accessing the last instruction emitted.  This makes it easy
84 * to set various bits on an instruction without having to create temporary
85 * variable and assign the emitted instruction to those.
86 */
87#define brw_last_inst (&p->store[p->nr_insn - 1])
88
89struct brw_codegen {
90   brw_inst *store;
91   int store_size;
92   unsigned nr_insn;
93   unsigned int next_insn_offset;
94
95   void *mem_ctx;
96
97   /* Allow clients to push/pop instruction state:
98    */
99   struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
100   struct brw_insn_state *current;
101
102   /** Whether or not the user wants automatic exec sizes
103    *
104    * If true, codegen will try to automatically infer the exec size of an
105    * instruction from the width of the destination register.  If false, it
106    * will take whatever is set by brw_set_default_exec_size verbatim.
107    *
108    * This is set to true by default in brw_init_codegen.
109    */
110   bool automatic_exec_sizes;
111
112   bool single_program_flow;
113   const struct intel_device_info *devinfo;
114
115   /* Control flow stacks:
116    * - if_stack contains IF and ELSE instructions which must be patched
117    *   (and popped) once the matching ENDIF instruction is encountered.
118    *
119    *   Just store the instruction pointer(an index).
120    */
121   int *if_stack;
122   int if_stack_depth;
123   int if_stack_array_size;
124
125   /**
126    * loop_stack contains the instruction pointers of the starts of loops which
127    * must be patched (and popped) once the matching WHILE instruction is
128    * encountered.
129    */
130   int *loop_stack;
131   /**
132    * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
133    * blocks they were popping out of, to fix up the mask stack.  This tracks
134    * the IF/ENDIF nesting in each current nested loop level.
135    */
136   int *if_depth_in_loop;
137   int loop_stack_depth;
138   int loop_stack_array_size;
139
140   struct brw_shader_reloc *relocs;
141   int num_relocs;
142   int reloc_array_size;
143};
144
145struct brw_label {
146   int offset;
147   int number;
148   struct brw_label *next;
149};
150
151void brw_pop_insn_state( struct brw_codegen *p );
152void brw_push_insn_state( struct brw_codegen *p );
153unsigned brw_get_default_exec_size(struct brw_codegen *p);
154unsigned brw_get_default_group(struct brw_codegen *p);
155unsigned brw_get_default_access_mode(struct brw_codegen *p);
156struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
157void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
158void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
159void brw_set_default_saturate( struct brw_codegen *p, bool enable );
160void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
161void brw_inst_set_compression(const struct intel_device_info *devinfo,
162                              brw_inst *inst, bool on);
163void brw_set_default_compression(struct brw_codegen *p, bool on);
164void brw_inst_set_group(const struct intel_device_info *devinfo,
165                        brw_inst *inst, unsigned group);
166void brw_set_default_group(struct brw_codegen *p, unsigned group);
167void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c);
168void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
169void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
170void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
171void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
172void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
173
174void brw_init_codegen(const struct intel_device_info *, struct brw_codegen *p,
175		      void *mem_ctx);
176bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
177bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
178const struct brw_label *brw_find_label(const struct brw_label *root, int offset);
179void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx);
180int brw_disassemble_inst(FILE *file, const struct intel_device_info *devinfo,
181                         const struct brw_inst *inst, bool is_compacted,
182                         int offset, const struct brw_label *root_label);
183const struct
184brw_label *brw_label_assembly(const struct intel_device_info *devinfo,
185                              const void *assembly, int start, int end,
186                              void *mem_ctx);
187void brw_disassemble_with_labels(const struct intel_device_info *devinfo,
188                                 const void *assembly, int start, int end, FILE *out);
189void brw_disassemble(const struct intel_device_info *devinfo,
190                     const void *assembly, int start, int end,
191                     const struct brw_label *root_label, FILE *out);
192const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
193                                                     unsigned *num_relocs);
194const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
195
196bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
197                               const char *identifier);
198
199void brw_realign(struct brw_codegen *p, unsigned align);
200int brw_append_data(struct brw_codegen *p, void *data,
201                    unsigned size, unsigned align);
202brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
203void brw_add_reloc(struct brw_codegen *p, uint32_t id,
204                   enum brw_shader_reloc_type type,
205                   uint32_t offset, uint32_t delta);
206void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
207void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
208
209void gfx6_resolve_implied_move(struct brw_codegen *p,
210			       struct brw_reg *src,
211			       unsigned msg_reg_nr);
212
213/* Helpers for regular instructions:
214 */
215#define ALU1(OP)				\
216brw_inst *brw_##OP(struct brw_codegen *p,	\
217	      struct brw_reg dest,		\
218	      struct brw_reg src0);
219
220#define ALU2(OP)				\
221brw_inst *brw_##OP(struct brw_codegen *p,	\
222	      struct brw_reg dest,		\
223	      struct brw_reg src0,		\
224	      struct brw_reg src1);
225
226#define ALU3(OP)				\
227brw_inst *brw_##OP(struct brw_codegen *p,	\
228	      struct brw_reg dest,		\
229	      struct brw_reg src0,		\
230	      struct brw_reg src1,		\
231	      struct brw_reg src2);
232
233ALU1(MOV)
234ALU2(SEL)
235ALU1(NOT)
236ALU2(AND)
237ALU2(OR)
238ALU2(XOR)
239ALU2(SHR)
240ALU2(SHL)
241ALU1(DIM)
242ALU2(ASR)
243ALU2(ROL)
244ALU2(ROR)
245ALU3(CSEL)
246ALU1(F32TO16)
247ALU1(F16TO32)
248ALU2(ADD)
249ALU3(ADD3)
250ALU2(AVG)
251ALU2(MUL)
252ALU1(FRC)
253ALU1(RNDD)
254ALU1(RNDE)
255ALU1(RNDU)
256ALU1(RNDZ)
257ALU2(MAC)
258ALU2(MACH)
259ALU1(LZD)
260ALU2(DP4)
261ALU2(DPH)
262ALU2(DP3)
263ALU2(DP2)
264ALU3(DP4A)
265ALU2(LINE)
266ALU2(PLN)
267ALU3(MAD)
268ALU3(LRP)
269ALU1(BFREV)
270ALU3(BFE)
271ALU2(BFI1)
272ALU3(BFI2)
273ALU1(FBH)
274ALU1(FBL)
275ALU1(CBIT)
276ALU2(ADDC)
277ALU2(SUBB)
278
279#undef ALU1
280#undef ALU2
281#undef ALU3
282
283
284/* Helpers for SEND instruction:
285 */
286
287/**
288 * Construct a message descriptor immediate with the specified common
289 * descriptor controls.
290 */
291static inline uint32_t
292brw_message_desc(const struct intel_device_info *devinfo,
293                 unsigned msg_length,
294                 unsigned response_length,
295                 bool header_present)
296{
297   if (devinfo->ver >= 5) {
298      return (SET_BITS(msg_length, 28, 25) |
299              SET_BITS(response_length, 24, 20) |
300              SET_BITS(header_present, 19, 19));
301   } else {
302      return (SET_BITS(msg_length, 23, 20) |
303              SET_BITS(response_length, 19, 16));
304   }
305}
306
307static inline unsigned
308brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
309{
310   if (devinfo->ver >= 5)
311      return GET_BITS(desc, 28, 25);
312   else
313      return GET_BITS(desc, 23, 20);
314}
315
316static inline unsigned
317brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
318{
319   if (devinfo->ver >= 5)
320      return GET_BITS(desc, 24, 20);
321   else
322      return GET_BITS(desc, 19, 16);
323}
324
325static inline bool
326brw_message_desc_header_present(ASSERTED
327                                const struct intel_device_info *devinfo,
328                                uint32_t desc)
329{
330   assert(devinfo->ver >= 5);
331   return GET_BITS(desc, 19, 19);
332}
333
334static inline unsigned
335brw_message_ex_desc(UNUSED const struct intel_device_info *devinfo,
336                    unsigned ex_msg_length)
337{
338   return SET_BITS(ex_msg_length, 9, 6);
339}
340
341static inline unsigned
342brw_message_ex_desc_ex_mlen(UNUSED const struct intel_device_info *devinfo,
343                            uint32_t ex_desc)
344{
345   return GET_BITS(ex_desc, 9, 6);
346}
347
348static inline uint32_t
349brw_urb_desc(const struct intel_device_info *devinfo,
350             unsigned msg_type,
351             bool per_slot_offset_present,
352             bool channel_mask_present,
353             unsigned global_offset)
354{
355   if (devinfo->ver >= 8) {
356      return (SET_BITS(per_slot_offset_present, 17, 17) |
357              SET_BITS(channel_mask_present, 15, 15) |
358              SET_BITS(global_offset, 14, 4) |
359              SET_BITS(msg_type, 3, 0));
360   } else if (devinfo->ver >= 7) {
361      assert(!channel_mask_present);
362      return (SET_BITS(per_slot_offset_present, 16, 16) |
363              SET_BITS(global_offset, 13, 3) |
364              SET_BITS(msg_type, 3, 0));
365   } else {
366      unreachable("unhandled URB write generation");
367   }
368}
369
370static inline uint32_t
371brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
372                      uint32_t desc)
373{
374   assert(devinfo->ver >= 7);
375   return GET_BITS(desc, 3, 0);
376}
377
378static inline uint32_t
379brw_urb_fence_desc(const struct intel_device_info *devinfo)
380{
381   assert(devinfo->has_lsc);
382   return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
383}
384
385/**
386 * Construct a message descriptor immediate with the specified sampler
387 * function controls.
388 */
389static inline uint32_t
390brw_sampler_desc(const struct intel_device_info *devinfo,
391                 unsigned binding_table_index,
392                 unsigned sampler,
393                 unsigned msg_type,
394                 unsigned simd_mode,
395                 unsigned return_format)
396{
397   const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
398                          SET_BITS(sampler, 11, 8));
399   if (devinfo->ver >= 7)
400      return (desc | SET_BITS(msg_type, 16, 12) |
401              SET_BITS(simd_mode, 18, 17));
402   else if (devinfo->ver >= 5)
403      return (desc | SET_BITS(msg_type, 15, 12) |
404              SET_BITS(simd_mode, 17, 16));
405   else if (devinfo->is_g4x)
406      return desc | SET_BITS(msg_type, 15, 12);
407   else
408      return (desc | SET_BITS(return_format, 13, 12) |
409              SET_BITS(msg_type, 15, 14));
410}
411
412static inline unsigned
413brw_sampler_desc_binding_table_index(UNUSED
414                                     const struct intel_device_info *devinfo,
415                                     uint32_t desc)
416{
417   return GET_BITS(desc, 7, 0);
418}
419
420static inline unsigned
421brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
422                         uint32_t desc)
423{
424   return GET_BITS(desc, 11, 8);
425}
426
427static inline unsigned
428brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
429{
430   if (devinfo->ver >= 7)
431      return GET_BITS(desc, 16, 12);
432   else if (devinfo->ver >= 5 || devinfo->is_g4x)
433      return GET_BITS(desc, 15, 12);
434   else
435      return GET_BITS(desc, 15, 14);
436}
437
438static inline unsigned
439brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
440                           uint32_t desc)
441{
442   assert(devinfo->ver >= 5);
443   if (devinfo->ver >= 7)
444      return GET_BITS(desc, 18, 17);
445   else
446      return GET_BITS(desc, 17, 16);
447}
448
449static  inline unsigned
450brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
451                               uint32_t desc)
452{
453   assert(devinfo->ver == 4 && !devinfo->is_g4x);
454   return GET_BITS(desc, 13, 12);
455}
456
457/**
458 * Construct a message descriptor for the dataport
459 */
460static inline uint32_t
461brw_dp_desc(const struct intel_device_info *devinfo,
462            unsigned binding_table_index,
463            unsigned msg_type,
464            unsigned msg_control)
465{
466   /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
467    * helpers instead.
468    */
469   assert(devinfo->ver >= 6);
470   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
471   if (devinfo->ver >= 8) {
472      return (desc | SET_BITS(msg_control, 13, 8) |
473              SET_BITS(msg_type, 18, 14));
474   } else if (devinfo->ver >= 7) {
475      return (desc | SET_BITS(msg_control, 13, 8) |
476              SET_BITS(msg_type, 17, 14));
477   } else {
478      return (desc | SET_BITS(msg_control, 12, 8) |
479              SET_BITS(msg_type, 16, 13));
480   }
481}
482
483static inline unsigned
484brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
485                                uint32_t desc)
486{
487   return GET_BITS(desc, 7, 0);
488}
489
490static inline unsigned
491brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
492{
493   assert(devinfo->ver >= 6);
494   if (devinfo->ver >= 8)
495      return GET_BITS(desc, 18, 14);
496   else if (devinfo->ver >= 7)
497      return GET_BITS(desc, 17, 14);
498   else
499      return GET_BITS(desc, 16, 13);
500}
501
502static inline unsigned
503brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
504{
505   assert(devinfo->ver >= 6);
506   if (devinfo->ver >= 7)
507      return GET_BITS(desc, 13, 8);
508   else
509      return GET_BITS(desc, 12, 8);
510}
511
512/**
513 * Construct a message descriptor immediate with the specified dataport read
514 * function controls.
515 */
516static inline uint32_t
517brw_dp_read_desc(const struct intel_device_info *devinfo,
518                 unsigned binding_table_index,
519                 unsigned msg_control,
520                 unsigned msg_type,
521                 unsigned target_cache)
522{
523   if (devinfo->ver >= 6)
524      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
525   else if (devinfo->ver >= 5 || devinfo->is_g4x)
526      return (SET_BITS(binding_table_index, 7, 0) |
527              SET_BITS(msg_control, 10, 8) |
528              SET_BITS(msg_type, 13, 11) |
529              SET_BITS(target_cache, 15, 14));
530   else
531      return (SET_BITS(binding_table_index, 7, 0) |
532              SET_BITS(msg_control, 11, 8) |
533              SET_BITS(msg_type, 13, 12) |
534              SET_BITS(target_cache, 15, 14));
535}
536
537static inline unsigned
538brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
539                          uint32_t desc)
540{
541   if (devinfo->ver >= 6)
542      return brw_dp_desc_msg_type(devinfo, desc);
543   else if (devinfo->ver >= 5 || devinfo->is_g4x)
544      return GET_BITS(desc, 13, 11);
545   else
546      return GET_BITS(desc, 13, 12);
547}
548
549static inline unsigned
550brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
551                             uint32_t desc)
552{
553   if (devinfo->ver >= 6)
554      return brw_dp_desc_msg_control(devinfo, desc);
555   else if (devinfo->ver >= 5 || devinfo->is_g4x)
556      return GET_BITS(desc, 10, 8);
557   else
558      return GET_BITS(desc, 11, 8);
559}
560
561/**
562 * Construct a message descriptor immediate with the specified dataport write
563 * function controls.
564 */
565static inline uint32_t
566brw_dp_write_desc(const struct intel_device_info *devinfo,
567                  unsigned binding_table_index,
568                  unsigned msg_control,
569                  unsigned msg_type,
570                  unsigned send_commit_msg)
571{
572   assert(devinfo->ver <= 6 || !send_commit_msg);
573   if (devinfo->ver >= 6) {
574      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
575             SET_BITS(send_commit_msg, 17, 17);
576   } else {
577      return (SET_BITS(binding_table_index, 7, 0) |
578              SET_BITS(msg_control, 11, 8) |
579              SET_BITS(msg_type, 14, 12) |
580              SET_BITS(send_commit_msg, 15, 15));
581   }
582}
583
584static inline unsigned
585brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
586                           uint32_t desc)
587{
588   if (devinfo->ver >= 6)
589      return brw_dp_desc_msg_type(devinfo, desc);
590   else
591      return GET_BITS(desc, 14, 12);
592}
593
594static inline unsigned
595brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
596                              uint32_t desc)
597{
598   if (devinfo->ver >= 6)
599      return brw_dp_desc_msg_control(devinfo, desc);
600   else
601      return GET_BITS(desc, 11, 8);
602}
603
604static inline bool
605brw_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
606                               uint32_t desc)
607{
608   assert(devinfo->ver <= 6);
609   if (devinfo->ver >= 6)
610      return GET_BITS(desc, 17, 17);
611   else
612      return GET_BITS(desc, 15, 15);
613}
614
615/**
616 * Construct a message descriptor immediate with the specified dataport
617 * surface function controls.
618 */
619static inline uint32_t
620brw_dp_surface_desc(const struct intel_device_info *devinfo,
621                    unsigned msg_type,
622                    unsigned msg_control)
623{
624   assert(devinfo->ver >= 7);
625   /* We'll OR in the binding table index later */
626   return brw_dp_desc(devinfo, 0, msg_type, msg_control);
627}
628
629static inline uint32_t
630brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
631                           unsigned exec_size, /**< 0 for SIMD4x2 */
632                           unsigned atomic_op,
633                           bool response_expected)
634{
635   assert(exec_size <= 8 || exec_size == 16);
636
637   unsigned msg_type;
638   if (devinfo->verx10 >= 75) {
639      if (exec_size > 0) {
640         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
641      } else {
642         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
643      }
644   } else {
645      msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
646   }
647
648   const unsigned msg_control =
649      SET_BITS(atomic_op, 3, 0) |
650      SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
651      SET_BITS(response_expected, 5, 5);
652
653   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
654}
655
656static inline uint32_t
657brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
658                                 unsigned exec_size,
659                                 unsigned atomic_op,
660                                 bool response_expected)
661{
662   assert(exec_size <= 8 || exec_size == 16);
663   assert(devinfo->ver >= 9);
664
665   assert(exec_size > 0);
666   const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
667
668   const unsigned msg_control =
669      SET_BITS(atomic_op, 1, 0) |
670      SET_BITS(exec_size <= 8, 4, 4) |
671      SET_BITS(response_expected, 5, 5);
672
673   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
674}
675
676static inline unsigned
677brw_mdc_cmask(unsigned num_channels)
678{
679   /* See also MDC_CMASK in the SKL PRM Vol 2d. */
680   return 0xf & (0xf << num_channels);
681}
682
683static inline unsigned
684lsc_cmask(unsigned num_channels)
685{
686   assert(num_channels > 0 && num_channels <= 4);
687   return BITSET_MASK(num_channels);
688}
689
690static inline uint32_t
691brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
692                               unsigned exec_size, /**< 0 for SIMD4x2 */
693                               unsigned num_channels,
694                               bool write)
695{
696   assert(exec_size <= 8 || exec_size == 16);
697
698   unsigned msg_type;
699   if (write) {
700      if (devinfo->verx10 >= 75) {
701         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
702      } else {
703         msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
704      }
705   } else {
706      /* Read */
707      if (devinfo->verx10 >= 75) {
708         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
709      } else {
710         msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
711      }
712   }
713
714   /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
715   if (write && devinfo->verx10 == 70 && exec_size == 0)
716      exec_size = 8;
717
718   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
719   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
720                              exec_size <= 8 ? 2 : 1;
721
722   const unsigned msg_control =
723      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
724      SET_BITS(simd_mode, 5, 4);
725
726   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
727}
728
729static inline unsigned
730brw_mdc_ds(unsigned bit_size)
731{
732   switch (bit_size) {
733   case 8:
734      return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
735   case 16:
736      return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
737   case 32:
738      return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
739   default:
740      unreachable("Unsupported bit_size for byte scattered messages");
741   }
742}
743
744static inline uint32_t
745brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
746                              unsigned exec_size,
747                              unsigned bit_size,
748                              bool write)
749{
750   assert(exec_size <= 8 || exec_size == 16);
751
752   assert(devinfo->verx10 >= 75);
753   const unsigned msg_type =
754      write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
755              HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
756
757   assert(exec_size > 0);
758   const unsigned msg_control =
759      SET_BITS(exec_size == 16, 0, 0) |
760      SET_BITS(brw_mdc_ds(bit_size), 3, 2);
761
762   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
763}
764
765static inline uint32_t
766brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
767                               unsigned exec_size,
768                               bool write)
769{
770   assert(exec_size == 8 || exec_size == 16);
771
772   unsigned msg_type;
773   if (write) {
774      if (devinfo->ver >= 6) {
775         msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
776      } else {
777         msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
778      }
779   } else {
780      if (devinfo->ver >= 7) {
781         msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
782      } else if (devinfo->ver > 4 || devinfo->is_g4x) {
783         msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
784      } else {
785         msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
786      }
787   }
788
789   const unsigned msg_control =
790      SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
791      SET_BITS(exec_size == 16, 0, 0);
792
793   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
794}
795
796static inline uint32_t
797brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
798                           bool align_16B,
799                           unsigned num_dwords,
800                           bool write)
801{
802   /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
803   assert(!write || align_16B);
804
805   const unsigned msg_type =
806      write ?     GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
807      align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
808                  GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
809
810   const unsigned msg_control =
811      SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
812
813   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
814}
815
816static inline uint32_t
817brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
818                                   unsigned exec_size, /**< 0 for SIMD4x2 */
819                                   unsigned num_channels,
820                                   bool write)
821{
822   assert(exec_size <= 8 || exec_size == 16);
823   assert(devinfo->ver >= 8);
824
825   unsigned msg_type =
826      write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
827              GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
828
829   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
830   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
831                              exec_size <= 8 ? 2 : 1;
832
833   const unsigned msg_control =
834      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
835      SET_BITS(simd_mode, 5, 4);
836
837   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
838                      msg_type, msg_control);
839}
840
841static inline uint32_t
842brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
843                               bool align_16B,
844                               unsigned num_dwords,
845                               bool write)
846{
847   /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
848   assert(!write || align_16B);
849
850   unsigned msg_type =
851      write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
852              GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
853
854   unsigned msg_control =
855      SET_BITS(!align_16B, 4, 3) |
856      SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
857
858   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
859                      msg_type, msg_control);
860}
861
862/**
863 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
864 * Skylake PRM).
865 */
866static inline uint32_t
867brw_mdc_a64_ds(unsigned elems)
868{
869   switch (elems) {
870   case 1:  return 0;
871   case 2:  return 1;
872   case 4:  return 2;
873   case 8:  return 3;
874   default:
875      unreachable("Unsupported elmeent count for A64 scattered message");
876   }
877}
878
879static inline uint32_t
880brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
881                                  unsigned exec_size, /**< 0 for SIMD4x2 */
882                                  unsigned bit_size,
883                                  bool write)
884{
885   assert(exec_size <= 8 || exec_size == 16);
886   assert(devinfo->ver >= 8);
887
888   unsigned msg_type =
889      write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
890              GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
891
892   const unsigned msg_control =
893      SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
894      SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
895      SET_BITS(exec_size == 16, 4, 4);
896
897   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
898                      msg_type, msg_control);
899}
900
901static inline uint32_t
902brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
903                               ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
904                               unsigned bit_size,
905                               unsigned atomic_op,
906                               bool response_expected)
907{
908   assert(exec_size == 8);
909   assert(devinfo->ver >= 8);
910   assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
911   assert(devinfo->ver >= 12 || bit_size >= 32);
912
913   const unsigned msg_type = bit_size == 16 ?
914      GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
915      GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
916
917   const unsigned msg_control =
918      SET_BITS(atomic_op, 3, 0) |
919      SET_BITS(bit_size == 64, 4, 4) |
920      SET_BITS(response_expected, 5, 5);
921
922   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
923                      msg_type, msg_control);
924}
925
926static inline uint32_t
927brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
928                                     ASSERTED unsigned exec_size,
929                                     unsigned bit_size,
930                                     unsigned atomic_op,
931                                     bool response_expected)
932{
933   assert(exec_size == 8);
934   assert(devinfo->ver >= 9);
935   assert(bit_size == 16 || bit_size == 32);
936   assert(devinfo->ver >= 12 || bit_size == 32);
937
938   assert(exec_size > 0);
939   const unsigned msg_type = bit_size == 32 ?
940      GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
941      GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
942
943   const unsigned msg_control =
944      SET_BITS(atomic_op, 1, 0) |
945      SET_BITS(response_expected, 5, 5);
946
947   return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
948                      msg_type, msg_control);
949}
950
951static inline uint32_t
952brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
953                         unsigned exec_size,
954                         unsigned exec_group,
955                         unsigned atomic_op,
956                         bool response_expected)
957{
958   assert(exec_size > 0 || exec_group == 0);
959   assert(exec_group % 8 == 0);
960
961   unsigned msg_type;
962   if (devinfo->verx10 >= 75) {
963      if (exec_size == 0) {
964         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
965      } else {
966         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
967      }
968   } else {
969      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
970      assert(exec_size > 0);
971      msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
972   }
973
974   const bool high_sample_mask = (exec_group / 8) % 2 == 1;
975
976   const unsigned msg_control =
977      SET_BITS(atomic_op, 3, 0) |
978      SET_BITS(high_sample_mask, 4, 4) |
979      SET_BITS(response_expected, 5, 5);
980
981   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
982}
983
984static inline uint32_t
985brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
986                             unsigned exec_size,
987                             unsigned exec_group,
988                             unsigned num_channels,
989                             bool write)
990{
991   assert(exec_size > 0 || exec_group == 0);
992   assert(exec_group % 8 == 0);
993
994   /* Typed surface reads and writes don't support SIMD16 */
995   assert(exec_size <= 8);
996
997   unsigned msg_type;
998   if (write) {
999      if (devinfo->verx10 >= 75) {
1000         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
1001      } else {
1002         msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
1003      }
1004   } else {
1005      if (devinfo->verx10 >= 75) {
1006         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
1007      } else {
1008         msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
1009      }
1010   }
1011
1012   /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1013   unsigned msg_control;
1014   if (devinfo->verx10 >= 75) {
1015      /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1016      const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
1017                                  1 + ((exec_group / 8) % 2);
1018
1019      msg_control =
1020         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1021         SET_BITS(slot_group, 5, 4);
1022   } else {
1023      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1024      assert(exec_size > 0);
1025      const unsigned slot_group = ((exec_group / 8) % 2);
1026
1027      msg_control =
1028         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1029         SET_BITS(slot_group, 5, 5);
1030   }
1031
1032   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
1033}
1034
1035static inline uint32_t
1036brw_fb_desc(const struct intel_device_info *devinfo,
1037            unsigned binding_table_index,
1038            unsigned msg_type,
1039            unsigned msg_control)
1040{
1041   /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1042    * helpers instead.
1043    */
1044   assert(devinfo->ver >= 6);
1045   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1046   if (devinfo->ver >= 7) {
1047      return (desc | SET_BITS(msg_control, 13, 8) |
1048              SET_BITS(msg_type, 17, 14));
1049   } else {
1050      return (desc | SET_BITS(msg_control, 12, 8) |
1051              SET_BITS(msg_type, 16, 13));
1052   }
1053}
1054
1055static inline unsigned
1056brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1057                                uint32_t desc)
1058{
1059   return GET_BITS(desc, 7, 0);
1060}
1061
1062static inline uint32_t
1063brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1064{
1065   assert(devinfo->ver >= 6);
1066   if (devinfo->ver >= 7)
1067      return GET_BITS(desc, 13, 8);
1068   else
1069      return GET_BITS(desc, 12, 8);
1070}
1071
1072static inline unsigned
1073brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1074{
1075   assert(devinfo->ver >= 6);
1076   if (devinfo->ver >= 7)
1077      return GET_BITS(desc, 17, 14);
1078   else
1079      return GET_BITS(desc, 16, 13);
1080}
1081
1082static inline uint32_t
1083brw_fb_read_desc(const struct intel_device_info *devinfo,
1084                 unsigned binding_table_index,
1085                 unsigned msg_control,
1086                 unsigned exec_size,
1087                 bool per_sample)
1088{
1089   assert(devinfo->ver >= 9);
1090   assert(exec_size == 8 || exec_size == 16);
1091
1092   return brw_fb_desc(devinfo, binding_table_index,
1093                      GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
1094          SET_BITS(per_sample, 13, 13) |
1095          SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
1096}
1097
1098static inline uint32_t
1099brw_fb_write_desc(const struct intel_device_info *devinfo,
1100                  unsigned binding_table_index,
1101                  unsigned msg_control,
1102                  bool last_render_target,
1103                  bool coarse_write)
1104{
1105   const unsigned msg_type =
1106      devinfo->ver >= 6 ?
1107      GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1108      BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1109
1110   assert(devinfo->ver >= 10 || !coarse_write);
1111
1112   if (devinfo->ver >= 6) {
1113      return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1114             SET_BITS(last_render_target, 12, 12) |
1115             SET_BITS(coarse_write, 18, 18);
1116   } else {
1117      return (SET_BITS(binding_table_index, 7, 0) |
1118              SET_BITS(msg_control, 11, 8) |
1119              SET_BITS(last_render_target, 11, 11) |
1120              SET_BITS(msg_type, 14, 12));
1121   }
1122}
1123
1124static inline unsigned
1125brw_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1126                           uint32_t desc)
1127{
1128   if (devinfo->ver >= 6)
1129      return brw_fb_desc_msg_type(devinfo, desc);
1130   else
1131      return GET_BITS(desc, 14, 12);
1132}
1133
1134static inline unsigned
1135brw_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1136                              uint32_t desc)
1137{
1138   if (devinfo->ver >= 6)
1139      return brw_fb_desc_msg_control(devinfo, desc);
1140   else
1141      return GET_BITS(desc, 11, 8);
1142}
1143
1144static inline bool
1145brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1146                                     uint32_t desc)
1147{
1148   if (devinfo->ver >= 6)
1149      return GET_BITS(desc, 12, 12);
1150   else
1151      return GET_BITS(desc, 11, 11);
1152}
1153
1154static inline bool
1155brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1156                               uint32_t desc)
1157{
1158   assert(devinfo->ver <= 6);
1159   if (devinfo->ver >= 6)
1160      return GET_BITS(desc, 17, 17);
1161   else
1162      return GET_BITS(desc, 15, 15);
1163}
1164
1165static inline bool
1166brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
1167                               uint32_t desc)
1168{
1169   assert(devinfo->ver >= 10);
1170   return GET_BITS(desc, 18, 18);
1171}
1172
1173static inline bool
1174lsc_opcode_has_cmask(enum lsc_opcode opcode)
1175{
1176   return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1177}
1178
1179static inline bool
1180lsc_opcode_has_transpose(enum lsc_opcode opcode)
1181{
1182   return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1183}
1184
1185static inline uint32_t
1186lsc_data_size_bytes(enum lsc_data_size data_size)
1187{
1188   switch (data_size) {
1189   case LSC_DATA_SIZE_D8:
1190      return 1;
1191   case LSC_DATA_SIZE_D16:
1192      return 2;
1193   case LSC_DATA_SIZE_D32:
1194   case LSC_DATA_SIZE_D8U32:
1195   case LSC_DATA_SIZE_D16U32:
1196   case LSC_DATA_SIZE_D16BF32:
1197      return 4;
1198   case LSC_DATA_SIZE_D64:
1199      return 8;
1200   default:
1201      unreachable("Unsupported data payload size.");
1202   }
1203}
1204
1205static inline uint32_t
1206lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1207{
1208   switch (addr_size) {
1209   case LSC_ADDR_SIZE_A16: return 2;
1210   case LSC_ADDR_SIZE_A32: return 4;
1211   case LSC_ADDR_SIZE_A64: return 8;
1212   default:
1213      unreachable("Unsupported address size.");
1214   }
1215}
1216
1217static inline uint32_t
1218lsc_vector_length(enum lsc_vect_size vect_size)
1219{
1220   switch (vect_size) {
1221   case LSC_VECT_SIZE_V1: return 1;
1222   case LSC_VECT_SIZE_V2: return 2;
1223   case LSC_VECT_SIZE_V3: return 3;
1224   case LSC_VECT_SIZE_V4: return 4;
1225   case LSC_VECT_SIZE_V8: return 8;
1226   case LSC_VECT_SIZE_V16: return 16;
1227   case LSC_VECT_SIZE_V32: return 32;
1228   case LSC_VECT_SIZE_V64: return 64;
1229   default:
1230      unreachable("Unsupported size of vector");
1231   }
1232}
1233
1234static inline enum lsc_vect_size
1235lsc_vect_size(unsigned vect_size)
1236{
1237   switch(vect_size) {
1238   case 1:  return LSC_VECT_SIZE_V1;
1239   case 2:  return LSC_VECT_SIZE_V2;
1240   case 3:  return LSC_VECT_SIZE_V3;
1241   case 4:  return LSC_VECT_SIZE_V4;
1242   case 8:  return LSC_VECT_SIZE_V8;
1243   case 16: return LSC_VECT_SIZE_V16;
1244   case 32: return LSC_VECT_SIZE_V32;
1245   case 64: return LSC_VECT_SIZE_V64;
1246   default:
1247      unreachable("Unsupported vector size for dataport");
1248   }
1249}
1250
1251static inline uint32_t
1252lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1253             enum lsc_opcode opcode, unsigned simd_size,
1254             enum lsc_addr_surface_type addr_type,
1255             enum lsc_addr_size addr_sz, unsigned num_coordinates,
1256             enum lsc_data_size data_sz, unsigned num_channels,
1257             bool transpose, unsigned cache_ctrl, bool has_dest)
1258{
1259   assert(devinfo->has_lsc);
1260
1261   unsigned dest_length = !has_dest ? 0 :
1262      DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1263                   REG_SIZE);
1264
1265   unsigned src0_length =
1266      DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1267                   REG_SIZE);
1268
1269   assert(!transpose || lsc_opcode_has_transpose(opcode));
1270
1271   unsigned msg_desc =
1272      SET_BITS(opcode, 5, 0) |
1273      SET_BITS(addr_sz, 8, 7) |
1274      SET_BITS(data_sz, 11, 9) |
1275      SET_BITS(transpose, 15, 15) |
1276      SET_BITS(cache_ctrl, 19, 17) |
1277      SET_BITS(dest_length, 24, 20) |
1278      SET_BITS(src0_length, 28, 25) |
1279      SET_BITS(addr_type, 30, 29);
1280
1281   if (lsc_opcode_has_cmask(opcode))
1282      msg_desc |= SET_BITS(lsc_cmask(num_channels), 15, 12);
1283   else
1284      msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1285
1286   return msg_desc;
1287}
1288
1289static inline enum lsc_opcode
1290lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1291                    uint32_t desc)
1292{
1293   assert(devinfo->has_lsc);
1294   return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1295}
1296
1297static inline enum lsc_addr_size
1298lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1299                       uint32_t desc)
1300{
1301   assert(devinfo->has_lsc);
1302   return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1303}
1304
1305static inline enum lsc_data_size
1306lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1307                       uint32_t desc)
1308{
1309   assert(devinfo->has_lsc);
1310   return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1311}
1312
1313static inline enum lsc_vect_size
1314lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1315                       uint32_t desc)
1316{
1317   assert(devinfo->has_lsc);
1318   assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1319   return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1320}
1321
1322static inline enum lsc_cmask
1323lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1324                   uint32_t desc)
1325{
1326   assert(devinfo->has_lsc);
1327   assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1328   return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1329}
1330
1331static inline bool
1332lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1333                       uint32_t desc)
1334{
1335   assert(devinfo->has_lsc);
1336   return GET_BITS(desc, 15, 15);
1337}
1338
1339static inline unsigned
1340lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1341                        uint32_t desc)
1342{
1343   assert(devinfo->has_lsc);
1344   return GET_BITS(desc, 19, 17);
1345}
1346
1347static inline unsigned
1348lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1349                      uint32_t desc)
1350{
1351   assert(devinfo->has_lsc);
1352   return GET_BITS(desc, 24, 20);
1353}
1354
1355static inline unsigned
1356lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1357                      uint32_t desc)
1358{
1359   assert(devinfo->has_lsc);
1360   return GET_BITS(desc, 28, 25);
1361}
1362
1363static inline enum lsc_addr_surface_type
1364lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1365                       uint32_t desc)
1366{
1367   assert(devinfo->has_lsc);
1368   return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1369}
1370
1371static inline uint32_t
1372lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1373                   enum lsc_fence_scope scope,
1374                   enum lsc_flush_type flush_type,
1375                   bool route_to_lsc)
1376{
1377   assert(devinfo->has_lsc);
1378   return SET_BITS(LSC_OP_FENCE, 5, 0) |
1379          SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1380          SET_BITS(scope, 11, 9) |
1381          SET_BITS(flush_type, 14, 12) |
1382          SET_BITS(route_to_lsc, 18, 18) |
1383          SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1384}
1385
1386static inline enum lsc_fence_scope
1387lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1388                         uint32_t desc)
1389{
1390   assert(devinfo->has_lsc);
1391   return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1392}
1393
1394static inline enum lsc_flush_type
1395lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1396                              uint32_t desc)
1397{
1398   assert(devinfo->has_lsc);
1399   return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1400}
1401
1402static inline enum lsc_backup_fence_routing
1403lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1404                                  uint32_t desc)
1405{
1406   assert(devinfo->has_lsc);
1407   return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1408}
1409
1410static inline uint32_t
1411lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1412{
1413   assert(devinfo->has_lsc);
1414   return SET_BITS(bti, 31, 24) |
1415          SET_BITS(0, 23, 12);  /* base offset */
1416}
1417
1418static inline unsigned
1419lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1420                            uint32_t ex_desc)
1421{
1422   assert(devinfo->has_lsc);
1423   return GET_BITS(ex_desc, 23, 12);
1424}
1425
1426static inline unsigned
1427lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1428                      uint32_t ex_desc)
1429{
1430   assert(devinfo->has_lsc);
1431   return GET_BITS(ex_desc, 31, 24);
1432}
1433
1434static inline unsigned
1435lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1436                             uint32_t ex_desc)
1437{
1438   assert(devinfo->has_lsc);
1439   return GET_BITS(ex_desc, 31, 12);
1440}
1441
1442static inline uint32_t
1443lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1444                unsigned surface_state_index)
1445{
1446   assert(devinfo->has_lsc);
1447   return SET_BITS(surface_state_index, 31, 6);
1448}
1449
1450static inline unsigned
1451lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1452                      uint32_t ex_desc)
1453{
1454   assert(devinfo->has_lsc);
1455   return GET_BITS(ex_desc, 31, 6);
1456}
1457
1458static inline uint32_t
1459brw_mdc_sm2(unsigned exec_size)
1460{
1461   assert(exec_size == 8 || exec_size == 16);
1462   return exec_size > 8;
1463}
1464
1465static inline uint32_t
1466brw_mdc_sm2_exec_size(uint32_t sm2)
1467{
1468   assert(sm2 <= 1);
1469   return 8 << sm2;
1470}
1471
1472static inline uint32_t
1473brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1474                   unsigned exec_size, unsigned msg_type)
1475{
1476   assert(devinfo->has_ray_tracing);
1477
1478   return SET_BITS(0, 19, 19) | /* No header */
1479          SET_BITS(msg_type, 17, 14) |
1480          SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1481}
1482
1483static inline uint32_t
1484brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1485                       uint32_t desc)
1486{
1487   return GET_BITS(desc, 17, 14);
1488}
1489
1490static inline uint32_t
1491brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1492                        uint32_t desc)
1493{
1494   return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1495}
1496
1497static inline uint32_t
1498brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1499                      unsigned exec_size)
1500{
1501   assert(devinfo->has_ray_tracing);
1502
1503   return SET_BITS(0, 19, 19) | /* No header */
1504          SET_BITS(0, 17, 14) | /* Message type */
1505          SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1506}
1507
1508static inline uint32_t
1509brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1510                                uint32_t desc)
1511{
1512   return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1513}
1514
1515/**
1516 * Construct a message descriptor immediate with the specified pixel
1517 * interpolator function controls.
1518 */
1519static inline uint32_t
1520brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1521                      unsigned msg_type,
1522                      bool noperspective,
1523                      bool coarse_pixel_rate,
1524                      unsigned simd_mode,
1525                      unsigned slot_group)
1526{
1527   assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1528   return (SET_BITS(slot_group, 11, 11) |
1529           SET_BITS(msg_type, 13, 12) |
1530           SET_BITS(!!noperspective, 14, 14) |
1531           SET_BITS(coarse_pixel_rate, 15, 15) |
1532           SET_BITS(simd_mode, 16, 16));
1533}
1534
1535void brw_urb_WRITE(struct brw_codegen *p,
1536		   struct brw_reg dest,
1537		   unsigned msg_reg_nr,
1538		   struct brw_reg src0,
1539                   enum brw_urb_write_flags flags,
1540		   unsigned msg_length,
1541		   unsigned response_length,
1542		   unsigned offset,
1543		   unsigned swizzle);
1544
1545/**
1546 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1547 * desc.  If \p desc is not an immediate it will be transparently loaded to an
1548 * address register using an OR instruction.
1549 */
1550void
1551brw_send_indirect_message(struct brw_codegen *p,
1552                          unsigned sfid,
1553                          struct brw_reg dst,
1554                          struct brw_reg payload,
1555                          struct brw_reg desc,
1556                          unsigned desc_imm,
1557                          bool eot);
1558
1559void
1560brw_send_indirect_split_message(struct brw_codegen *p,
1561                                unsigned sfid,
1562                                struct brw_reg dst,
1563                                struct brw_reg payload0,
1564                                struct brw_reg payload1,
1565                                struct brw_reg desc,
1566                                unsigned desc_imm,
1567                                struct brw_reg ex_desc,
1568                                unsigned ex_desc_imm,
1569                                bool eot);
1570
1571void brw_ff_sync(struct brw_codegen *p,
1572		   struct brw_reg dest,
1573		   unsigned msg_reg_nr,
1574		   struct brw_reg src0,
1575		   bool allocate,
1576		   unsigned response_length,
1577		   bool eot);
1578
1579void brw_svb_write(struct brw_codegen *p,
1580                   struct brw_reg dest,
1581                   unsigned msg_reg_nr,
1582                   struct brw_reg src0,
1583                   unsigned binding_table_index,
1584                   bool   send_commit_msg);
1585
1586brw_inst *brw_fb_WRITE(struct brw_codegen *p,
1587                       struct brw_reg payload,
1588                       struct brw_reg implied_header,
1589                       unsigned msg_control,
1590                       unsigned binding_table_index,
1591                       unsigned msg_length,
1592                       unsigned response_length,
1593                       bool eot,
1594                       bool last_render_target,
1595                       bool header_present);
1596
1597brw_inst *gfx9_fb_READ(struct brw_codegen *p,
1598                       struct brw_reg dst,
1599                       struct brw_reg payload,
1600                       unsigned binding_table_index,
1601                       unsigned msg_length,
1602                       unsigned response_length,
1603                       bool per_sample);
1604
1605void brw_SAMPLE(struct brw_codegen *p,
1606		struct brw_reg dest,
1607		unsigned msg_reg_nr,
1608		struct brw_reg src0,
1609		unsigned binding_table_index,
1610		unsigned sampler,
1611		unsigned msg_type,
1612		unsigned response_length,
1613		unsigned msg_length,
1614		unsigned header_present,
1615		unsigned simd_mode,
1616		unsigned return_format);
1617
1618void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
1619                                      struct brw_reg header,
1620                                      struct brw_reg sampler_index);
1621
1622void gfx4_math(struct brw_codegen *p,
1623	       struct brw_reg dest,
1624	       unsigned function,
1625	       unsigned msg_reg_nr,
1626	       struct brw_reg src,
1627	       unsigned precision );
1628
1629void gfx6_math(struct brw_codegen *p,
1630	       struct brw_reg dest,
1631	       unsigned function,
1632	       struct brw_reg src0,
1633	       struct brw_reg src1);
1634
1635void brw_oword_block_read(struct brw_codegen *p,
1636			  struct brw_reg dest,
1637			  struct brw_reg mrf,
1638			  uint32_t offset,
1639			  uint32_t bind_table_index);
1640
1641unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
1642
1643void brw_oword_block_read_scratch(struct brw_codegen *p,
1644				  struct brw_reg dest,
1645				  struct brw_reg mrf,
1646				  int num_regs,
1647				  unsigned offset);
1648
1649void brw_oword_block_write_scratch(struct brw_codegen *p,
1650				   struct brw_reg mrf,
1651				   int num_regs,
1652				   unsigned offset);
1653
1654void gfx7_block_read_scratch(struct brw_codegen *p,
1655                             struct brw_reg dest,
1656                             int num_regs,
1657                             unsigned offset);
1658
1659void brw_shader_time_add(struct brw_codegen *p,
1660                         struct brw_reg payload,
1661                         uint32_t surf_index);
1662
1663/**
1664 * Return the generation-specific jump distance scaling factor.
1665 *
1666 * Given the number of instructions to jump, we need to scale by
1667 * some number to obtain the actual jump distance to program in an
1668 * instruction.
1669 */
1670static inline unsigned
1671brw_jump_scale(const struct intel_device_info *devinfo)
1672{
1673   /* Broadwell measures jump targets in bytes. */
1674   if (devinfo->ver >= 8)
1675      return 16;
1676
1677   /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1678    * (to support compaction), so each 128-bit instruction requires 2 chunks.
1679    */
1680   if (devinfo->ver >= 5)
1681      return 2;
1682
1683   /* Gfx4 simply uses the number of 128-bit instructions. */
1684   return 1;
1685}
1686
1687void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1688
1689/* If/else/endif.  Works by manipulating the execution flags on each
1690 * channel.
1691 */
1692brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1693brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1694                  struct brw_reg src0, struct brw_reg src1);
1695
1696void brw_ELSE(struct brw_codegen *p);
1697void brw_ENDIF(struct brw_codegen *p);
1698
1699/* DO/WHILE loops:
1700 */
1701brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1702
1703brw_inst *brw_WHILE(struct brw_codegen *p);
1704
1705brw_inst *brw_BREAK(struct brw_codegen *p);
1706brw_inst *brw_CONT(struct brw_codegen *p);
1707brw_inst *brw_HALT(struct brw_codegen *p);
1708
1709/* Forward jumps:
1710 */
1711void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx);
1712
1713brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1714                   unsigned predicate_control);
1715
1716void brw_NOP(struct brw_codegen *p);
1717
1718void brw_WAIT(struct brw_codegen *p);
1719
1720void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1721
1722/* Special case: there is never a destination, execution size will be
1723 * taken from src0:
1724 */
1725void brw_CMP(struct brw_codegen *p,
1726	     struct brw_reg dest,
1727	     unsigned conditional,
1728	     struct brw_reg src0,
1729	     struct brw_reg src1);
1730
1731void brw_CMPN(struct brw_codegen *p,
1732              struct brw_reg dest,
1733              unsigned conditional,
1734              struct brw_reg src0,
1735              struct brw_reg src1);
1736
1737void
1738brw_untyped_atomic(struct brw_codegen *p,
1739                   struct brw_reg dst,
1740                   struct brw_reg payload,
1741                   struct brw_reg surface,
1742                   unsigned atomic_op,
1743                   unsigned msg_length,
1744                   bool response_expected,
1745                   bool header_present);
1746
1747void
1748brw_untyped_surface_read(struct brw_codegen *p,
1749                         struct brw_reg dst,
1750                         struct brw_reg payload,
1751                         struct brw_reg surface,
1752                         unsigned msg_length,
1753                         unsigned num_channels);
1754
1755void
1756brw_untyped_surface_write(struct brw_codegen *p,
1757                          struct brw_reg payload,
1758                          struct brw_reg surface,
1759                          unsigned msg_length,
1760                          unsigned num_channels,
1761                          bool header_present);
1762
1763void
1764brw_memory_fence(struct brw_codegen *p,
1765                 struct brw_reg dst,
1766                 struct brw_reg src,
1767                 enum opcode send_op,
1768                 enum brw_message_target sfid,
1769                 bool commit_enable,
1770                 unsigned bti);
1771
1772void
1773brw_pixel_interpolator_query(struct brw_codegen *p,
1774                             struct brw_reg dest,
1775                             struct brw_reg mrf,
1776                             bool noperspective,
1777                             bool coarse_pixel_rate,
1778                             unsigned mode,
1779                             struct brw_reg data,
1780                             unsigned msg_length,
1781                             unsigned response_length);
1782
1783void
1784brw_find_live_channel(struct brw_codegen *p,
1785                      struct brw_reg dst,
1786                      struct brw_reg mask);
1787
1788void
1789brw_broadcast(struct brw_codegen *p,
1790              struct brw_reg dst,
1791              struct brw_reg src,
1792              struct brw_reg idx);
1793
1794void
1795brw_float_controls_mode(struct brw_codegen *p,
1796                        unsigned mode, unsigned mask);
1797
1798void
1799brw_update_reloc_imm(const struct intel_device_info *devinfo,
1800                     brw_inst *inst,
1801                     uint32_t value);
1802
1803void
1804brw_MOV_reloc_imm(struct brw_codegen *p,
1805                  struct brw_reg dst,
1806                  enum brw_reg_type src_type,
1807                  uint32_t id);
1808
1809/***********************************************************************
1810 * brw_eu_util.c:
1811 */
1812
1813void brw_copy_indirect_to_indirect(struct brw_codegen *p,
1814				   struct brw_indirect dst_ptr,
1815				   struct brw_indirect src_ptr,
1816				   unsigned count);
1817
1818void brw_copy_from_indirect(struct brw_codegen *p,
1819			    struct brw_reg dst,
1820			    struct brw_indirect ptr,
1821			    unsigned count);
1822
1823void brw_copy4(struct brw_codegen *p,
1824	       struct brw_reg dst,
1825	       struct brw_reg src,
1826	       unsigned count);
1827
1828void brw_copy8(struct brw_codegen *p,
1829	       struct brw_reg dst,
1830	       struct brw_reg src,
1831	       unsigned count);
1832
1833void brw_math_invert( struct brw_codegen *p,
1834		      struct brw_reg dst,
1835		      struct brw_reg src);
1836
1837void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1838
1839void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1840                     unsigned desc, unsigned ex_desc);
1841
1842static inline void
1843brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1844{
1845   brw_set_desc_ex(p, insn, desc, 0);
1846}
1847
1848void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1849
1850enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1851enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1852
1853/* brw_eu_compact.c */
1854void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1855                              struct disasm_info *disasm);
1856void brw_uncompact_instruction(const struct intel_device_info *devinfo,
1857                               brw_inst *dst, brw_compact_inst *src);
1858bool brw_try_compact_instruction(const struct intel_device_info *devinfo,
1859                                 brw_compact_inst *dst, const brw_inst *src);
1860
1861void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,
1862                                 brw_inst *orig, brw_inst *uncompacted);
1863
1864/* brw_eu_validate.c */
1865bool brw_validate_instruction(const struct intel_device_info *devinfo,
1866                              const brw_inst *inst, int offset,
1867                              struct disasm_info *disasm);
1868bool brw_validate_instructions(const struct intel_device_info *devinfo,
1869                               const void *assembly, int start_offset, int end_offset,
1870                               struct disasm_info *disasm);
1871
1872static inline int
1873next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1874{
1875   brw_inst *insn = (brw_inst *)((char *)store + offset);
1876
1877   if (brw_inst_cmpt_control(devinfo, insn))
1878      return offset + 8;
1879   else
1880      return offset + 16;
1881}
1882
1883struct opcode_desc {
1884   unsigned ir;
1885   unsigned hw;
1886   const char *name;
1887   int nsrc;
1888   int ndst;
1889   int gfx_vers;
1890};
1891
1892const struct opcode_desc *
1893brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode);
1894
1895const struct opcode_desc *
1896brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw);
1897
1898static inline unsigned
1899brw_opcode_encode(const struct intel_device_info *devinfo, enum opcode opcode)
1900{
1901   return brw_opcode_desc(devinfo, opcode)->hw;
1902}
1903
1904static inline enum opcode
1905brw_opcode_decode(const struct intel_device_info *devinfo, unsigned hw)
1906{
1907   const struct opcode_desc *desc = brw_opcode_desc_from_hw(devinfo, hw);
1908   return desc ? (enum opcode)desc->ir : BRW_OPCODE_ILLEGAL;
1909}
1910
1911static inline void
1912brw_inst_set_opcode(const struct intel_device_info *devinfo,
1913                    brw_inst *inst, enum opcode opcode)
1914{
1915   brw_inst_set_hw_opcode(devinfo, inst, brw_opcode_encode(devinfo, opcode));
1916}
1917
1918static inline enum opcode
1919brw_inst_opcode(const struct intel_device_info *devinfo, const brw_inst *inst)
1920{
1921   return brw_opcode_decode(devinfo, brw_inst_hw_opcode(devinfo, inst));
1922}
1923
1924static inline bool
1925is_3src(const struct intel_device_info *devinfo, enum opcode opcode)
1926{
1927   const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
1928   return desc && desc->nsrc == 3;
1929}
1930
1931/** Maximum SEND message length */
1932#define BRW_MAX_MSG_LENGTH 15
1933
1934/** First MRF register used by pull loads */
1935#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1936
1937/** First MRF register used by spills */
1938#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1939
1940#ifdef __cplusplus
1941}
1942#endif
1943
1944#endif
1945