brw_eu.h revision b8e80941
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32
33#ifndef BRW_EU_H
34#define BRW_EU_H
35
36#include <stdbool.h>
37#include <stdio.h>
38#include "brw_inst.h"
39#include "brw_eu_defines.h"
40#include "brw_reg.h"
41#include "brw_disasm_info.h"
42
43#ifdef __cplusplus
44extern "C" {
45#endif
46
47#define BRW_EU_MAX_INSN_STACK 5
48
49struct brw_insn_state {
50   /* One of BRW_EXECUTE_* */
51   unsigned exec_size:3;
52
53   /* Group in units of channels */
54   unsigned group:5;
55
56   /* Compression control on gen4-5 */
57   bool compressed:1;
58
59   /* One of BRW_MASK_* */
60   unsigned mask_control:1;
61
62   bool saturate:1;
63
64   /* One of BRW_ALIGN_* */
65   unsigned access_mode:1;
66
67   /* One of BRW_PREDICATE_* */
68   enum brw_predicate predicate:4;
69
70   bool pred_inv:1;
71
72   /* Flag subreg.  Bottom bit is subreg, top bit is reg */
73   unsigned flag_subreg:2;
74
75   bool acc_wr_control:1;
76};
77
78
79/* A helper for accessing the last instruction emitted.  This makes it easy
80 * to set various bits on an instruction without having to create temporary
81 * variable and assign the emitted instruction to those.
82 */
83#define brw_last_inst (&p->store[p->nr_insn - 1])
84
85struct brw_codegen {
86   brw_inst *store;
87   int store_size;
88   unsigned nr_insn;
89   unsigned int next_insn_offset;
90
91   void *mem_ctx;
92
93   /* Allow clients to push/pop instruction state:
94    */
95   struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
96   struct brw_insn_state *current;
97
98   /** Whether or not the user wants automatic exec sizes
99    *
100    * If true, codegen will try to automatically infer the exec size of an
101    * instruction from the width of the destination register.  If false, it
102    * will take whatever is set by brw_set_default_exec_size verbatim.
103    *
104    * This is set to true by default in brw_init_codegen.
105    */
106   bool automatic_exec_sizes;
107
108   bool single_program_flow;
109   const struct gen_device_info *devinfo;
110
111   /* Control flow stacks:
112    * - if_stack contains IF and ELSE instructions which must be patched
113    *   (and popped) once the matching ENDIF instruction is encountered.
114    *
115    *   Just store the instruction pointer(an index).
116    */
117   int *if_stack;
118   int if_stack_depth;
119   int if_stack_array_size;
120
121   /**
122    * loop_stack contains the instruction pointers of the starts of loops which
123    * must be patched (and popped) once the matching WHILE instruction is
124    * encountered.
125    */
126   int *loop_stack;
127   /**
128    * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
129    * blocks they were popping out of, to fix up the mask stack.  This tracks
130    * the IF/ENDIF nesting in each current nested loop level.
131    */
132   int *if_depth_in_loop;
133   int loop_stack_depth;
134   int loop_stack_array_size;
135};
136
137void brw_pop_insn_state( struct brw_codegen *p );
138void brw_push_insn_state( struct brw_codegen *p );
139unsigned brw_get_default_exec_size(struct brw_codegen *p);
140unsigned brw_get_default_group(struct brw_codegen *p);
141unsigned brw_get_default_access_mode(struct brw_codegen *p);
142void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
143void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
144void brw_set_default_saturate( struct brw_codegen *p, bool enable );
145void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
146void brw_inst_set_compression(const struct gen_device_info *devinfo,
147                              brw_inst *inst, bool on);
148void brw_set_default_compression(struct brw_codegen *p, bool on);
149void brw_inst_set_group(const struct gen_device_info *devinfo,
150                        brw_inst *inst, unsigned group);
151void brw_set_default_group(struct brw_codegen *p, unsigned group);
152void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c);
153void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc );
154void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
155void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
156void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
157
158void brw_init_codegen(const struct gen_device_info *, struct brw_codegen *p,
159		      void *mem_ctx);
160int brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
161                         const struct brw_inst *inst, bool is_compacted);
162void brw_disassemble(const struct gen_device_info *devinfo,
163                     const void *assembly, int start, int end, FILE *out);
164const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
165
166brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
167void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
168void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
169
170void gen6_resolve_implied_move(struct brw_codegen *p,
171			       struct brw_reg *src,
172			       unsigned msg_reg_nr);
173
174/* Helpers for regular instructions:
175 */
176#define ALU1(OP)				\
177brw_inst *brw_##OP(struct brw_codegen *p,	\
178	      struct brw_reg dest,		\
179	      struct brw_reg src0);
180
181#define ALU2(OP)				\
182brw_inst *brw_##OP(struct brw_codegen *p,	\
183	      struct brw_reg dest,		\
184	      struct brw_reg src0,		\
185	      struct brw_reg src1);
186
187#define ALU3(OP)				\
188brw_inst *brw_##OP(struct brw_codegen *p,	\
189	      struct brw_reg dest,		\
190	      struct brw_reg src0,		\
191	      struct brw_reg src1,		\
192	      struct brw_reg src2);
193
194#define ROUND(OP) \
195void brw_##OP(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0);
196
197ALU1(MOV)
198ALU2(SEL)
199ALU1(NOT)
200ALU2(AND)
201ALU2(OR)
202ALU2(XOR)
203ALU2(SHR)
204ALU2(SHL)
205ALU1(DIM)
206ALU2(ASR)
207ALU3(CSEL)
208ALU1(F32TO16)
209ALU1(F16TO32)
210ALU2(ADD)
211ALU2(AVG)
212ALU2(MUL)
213ALU1(FRC)
214ALU1(RNDD)
215ALU2(MAC)
216ALU2(MACH)
217ALU1(LZD)
218ALU2(DP4)
219ALU2(DPH)
220ALU2(DP3)
221ALU2(DP2)
222ALU2(LINE)
223ALU2(PLN)
224ALU3(MAD)
225ALU3(LRP)
226ALU1(BFREV)
227ALU3(BFE)
228ALU2(BFI1)
229ALU3(BFI2)
230ALU1(FBH)
231ALU1(FBL)
232ALU1(CBIT)
233ALU2(ADDC)
234ALU2(SUBB)
235ALU2(MAC)
236
237ROUND(RNDZ)
238ROUND(RNDE)
239
240#undef ALU1
241#undef ALU2
242#undef ALU3
243#undef ROUND
244
245
246/* Helpers for SEND instruction:
247 */
248
249/**
250 * Construct a message descriptor immediate with the specified common
251 * descriptor controls.
252 */
253static inline uint32_t
254brw_message_desc(const struct gen_device_info *devinfo,
255                 unsigned msg_length,
256                 unsigned response_length,
257                 bool header_present)
258{
259   if (devinfo->gen >= 5) {
260      return (SET_BITS(msg_length, 28, 25) |
261              SET_BITS(response_length, 24, 20) |
262              SET_BITS(header_present, 19, 19));
263   } else {
264      return (SET_BITS(msg_length, 23, 20) |
265              SET_BITS(response_length, 19, 16));
266   }
267}
268
269static inline unsigned
270brw_message_desc_mlen(const struct gen_device_info *devinfo, uint32_t desc)
271{
272   if (devinfo->gen >= 5)
273      return GET_BITS(desc, 28, 25);
274   else
275      return GET_BITS(desc, 23, 20);
276}
277
278static inline unsigned
279brw_message_desc_rlen(const struct gen_device_info *devinfo, uint32_t desc)
280{
281   if (devinfo->gen >= 5)
282      return GET_BITS(desc, 24, 20);
283   else
284      return GET_BITS(desc, 19, 16);
285}
286
287static inline bool
288brw_message_desc_header_present(MAYBE_UNUSED const struct gen_device_info *devinfo,
289                                uint32_t desc)
290{
291   assert(devinfo->gen >= 5);
292   return GET_BITS(desc, 19, 19);
293}
294
295static inline unsigned
296brw_message_ex_desc(UNUSED const struct gen_device_info *devinfo,
297                    unsigned ex_msg_length)
298{
299   return SET_BITS(ex_msg_length, 9, 6);
300}
301
302static inline unsigned
303brw_message_ex_desc_ex_mlen(UNUSED const struct gen_device_info *devinfo,
304                            uint32_t ex_desc)
305{
306   return GET_BITS(ex_desc, 9, 6);
307}
308
309/**
310 * Construct a message descriptor immediate with the specified sampler
311 * function controls.
312 */
313static inline uint32_t
314brw_sampler_desc(const struct gen_device_info *devinfo,
315                 unsigned binding_table_index,
316                 unsigned sampler,
317                 unsigned msg_type,
318                 unsigned simd_mode,
319                 unsigned return_format)
320{
321   const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
322                          SET_BITS(sampler, 11, 8));
323   if (devinfo->gen >= 7)
324      return (desc | SET_BITS(msg_type, 16, 12) |
325              SET_BITS(simd_mode, 18, 17));
326   else if (devinfo->gen >= 5)
327      return (desc | SET_BITS(msg_type, 15, 12) |
328              SET_BITS(simd_mode, 17, 16));
329   else if (devinfo->is_g4x)
330      return desc | SET_BITS(msg_type, 15, 12);
331   else
332      return (desc | SET_BITS(return_format, 13, 12) |
333              SET_BITS(msg_type, 15, 14));
334}
335
336static inline unsigned
337brw_sampler_desc_binding_table_index(UNUSED const struct gen_device_info *devinfo,
338                                     uint32_t desc)
339{
340   return GET_BITS(desc, 7, 0);
341}
342
343static inline unsigned
344brw_sampler_desc_sampler(UNUSED const struct gen_device_info *devinfo, uint32_t desc)
345{
346   return GET_BITS(desc, 11, 8);
347}
348
349static inline unsigned
350brw_sampler_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
351{
352   if (devinfo->gen >= 7)
353      return GET_BITS(desc, 16, 12);
354   else if (devinfo->gen >= 5 || devinfo->is_g4x)
355      return GET_BITS(desc, 15, 12);
356   else
357      return GET_BITS(desc, 15, 14);
358}
359
360static inline unsigned
361brw_sampler_desc_simd_mode(const struct gen_device_info *devinfo, uint32_t desc)
362{
363   assert(devinfo->gen >= 5);
364   if (devinfo->gen >= 7)
365      return GET_BITS(desc, 18, 17);
366   else
367      return GET_BITS(desc, 17, 16);
368}
369
370static  inline unsigned
371brw_sampler_desc_return_format(MAYBE_UNUSED const struct gen_device_info *devinfo,
372                               uint32_t desc)
373{
374   assert(devinfo->gen == 4 && !devinfo->is_g4x);
375   return GET_BITS(desc, 13, 12);
376}
377
378/**
379 * Construct a message descriptor for the dataport
380 */
381static inline uint32_t
382brw_dp_desc(const struct gen_device_info *devinfo,
383            unsigned binding_table_index,
384            unsigned msg_type,
385            unsigned msg_control)
386{
387   /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
388    * helpers instead.
389    */
390   assert(devinfo->gen >= 6);
391   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
392   if (devinfo->gen >= 8) {
393      return (desc | SET_BITS(msg_control, 13, 8) |
394              SET_BITS(msg_type, 18, 14));
395   } else if (devinfo->gen >= 7) {
396      return (desc | SET_BITS(msg_control, 13, 8) |
397              SET_BITS(msg_type, 17, 14));
398   } else {
399      return (desc | SET_BITS(msg_control, 12, 8) |
400              SET_BITS(msg_type, 16, 13));
401   }
402}
403
404static inline unsigned
405brw_dp_desc_binding_table_index(UNUSED const struct gen_device_info *devinfo,
406                                uint32_t desc)
407{
408   return GET_BITS(desc, 7, 0);
409}
410
411static inline unsigned
412brw_dp_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
413{
414   assert(devinfo->gen >= 6);
415   if (devinfo->gen >= 8)
416      return GET_BITS(desc, 18, 14);
417   else if (devinfo->gen >= 7)
418      return GET_BITS(desc, 17, 14);
419   else
420      return GET_BITS(desc, 16, 13);
421}
422
423static inline unsigned
424brw_dp_desc_msg_control(const struct gen_device_info *devinfo, uint32_t desc)
425{
426   assert(devinfo->gen >= 6);
427   if (devinfo->gen >= 7)
428      return GET_BITS(desc, 13, 8);
429   else
430      return GET_BITS(desc, 12, 8);
431}
432
433/**
434 * Construct a message descriptor immediate with the specified dataport read
435 * function controls.
436 */
437static inline uint32_t
438brw_dp_read_desc(const struct gen_device_info *devinfo,
439                 unsigned binding_table_index,
440                 unsigned msg_control,
441                 unsigned msg_type,
442                 unsigned target_cache)
443{
444   if (devinfo->gen >= 6)
445      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
446   else if (devinfo->gen >= 5 || devinfo->is_g4x)
447      return (SET_BITS(binding_table_index, 7, 0) |
448              SET_BITS(msg_control, 10, 8) |
449              SET_BITS(msg_type, 13, 11) |
450              SET_BITS(target_cache, 15, 14));
451   else
452      return (SET_BITS(binding_table_index, 7, 0) |
453              SET_BITS(msg_control, 11, 8) |
454              SET_BITS(msg_type, 13, 12) |
455              SET_BITS(target_cache, 15, 14));
456}
457
458static inline unsigned
459brw_dp_read_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
460{
461   if (devinfo->gen >= 6)
462      return brw_dp_desc_msg_type(devinfo, desc);
463   else if (devinfo->gen >= 5 || devinfo->is_g4x)
464      return GET_BITS(desc, 13, 11);
465   else
466      return GET_BITS(desc, 13, 12);
467}
468
469static inline unsigned
470brw_dp_read_desc_msg_control(const struct gen_device_info *devinfo,
471                             uint32_t desc)
472{
473   if (devinfo->gen >= 6)
474      return brw_dp_desc_msg_control(devinfo, desc);
475   else if (devinfo->gen >= 5 || devinfo->is_g4x)
476      return GET_BITS(desc, 10, 8);
477   else
478      return GET_BITS(desc, 11, 8);
479}
480
481/**
482 * Construct a message descriptor immediate with the specified dataport write
483 * function controls.
484 */
485static inline uint32_t
486brw_dp_write_desc(const struct gen_device_info *devinfo,
487                  unsigned binding_table_index,
488                  unsigned msg_control,
489                  unsigned msg_type,
490                  unsigned last_render_target,
491                  unsigned send_commit_msg)
492{
493   assert(devinfo->gen <= 6 || !send_commit_msg);
494   if (devinfo->gen >= 6)
495      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
496             SET_BITS(last_render_target, 12, 12) |
497             SET_BITS(send_commit_msg, 17, 17);
498   else
499      return (SET_BITS(binding_table_index, 7, 0) |
500              SET_BITS(msg_control, 11, 8) |
501              SET_BITS(last_render_target, 11, 11) |
502              SET_BITS(msg_type, 14, 12) |
503              SET_BITS(send_commit_msg, 15, 15));
504}
505
506static inline unsigned
507brw_dp_write_desc_msg_type(const struct gen_device_info *devinfo,
508                           uint32_t desc)
509{
510   if (devinfo->gen >= 6)
511      return brw_dp_desc_msg_type(devinfo, desc);
512   else
513      return GET_BITS(desc, 14, 12);
514}
515
516static inline unsigned
517brw_dp_write_desc_msg_control(const struct gen_device_info *devinfo,
518                              uint32_t desc)
519{
520   if (devinfo->gen >= 6)
521      return brw_dp_desc_msg_control(devinfo, desc);
522   else
523      return GET_BITS(desc, 11, 8);
524}
525
526static inline bool
527brw_dp_write_desc_last_render_target(const struct gen_device_info *devinfo,
528                                     uint32_t desc)
529{
530   if (devinfo->gen >= 6)
531      return GET_BITS(desc, 12, 12);
532   else
533      return GET_BITS(desc, 11, 11);
534}
535
536static inline bool
537brw_dp_write_desc_write_commit(const struct gen_device_info *devinfo,
538                               uint32_t desc)
539{
540   assert(devinfo->gen <= 6);
541   if (devinfo->gen >= 6)
542      return GET_BITS(desc, 17, 17);
543   else
544      return GET_BITS(desc, 15, 15);
545}
546
547/**
548 * Construct a message descriptor immediate with the specified dataport
549 * surface function controls.
550 */
551static inline uint32_t
552brw_dp_surface_desc(const struct gen_device_info *devinfo,
553                    unsigned msg_type,
554                    unsigned msg_control)
555{
556   assert(devinfo->gen >= 7);
557   /* We'll OR in the binding table index later */
558   return brw_dp_desc(devinfo, 0, msg_type, msg_control);
559}
560
561static inline uint32_t
562brw_dp_untyped_atomic_desc(const struct gen_device_info *devinfo,
563                           unsigned exec_size, /**< 0 for SIMD4x2 */
564                           unsigned atomic_op,
565                           bool response_expected)
566{
567   assert(exec_size <= 8 || exec_size == 16);
568
569   unsigned msg_type;
570   if (devinfo->gen >= 8 || devinfo->is_haswell) {
571      if (exec_size > 0) {
572         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
573      } else {
574         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
575      }
576   } else {
577      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
578   }
579
580   const unsigned msg_control =
581      SET_BITS(atomic_op, 3, 0) |
582      SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
583      SET_BITS(response_expected, 5, 5);
584
585   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
586}
587
588static inline uint32_t
589brw_dp_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
590                                 unsigned exec_size,
591                                 unsigned atomic_op,
592                                 bool response_expected)
593{
594   assert(exec_size <= 8 || exec_size == 16);
595   assert(devinfo->gen >= 9);
596
597   assert(exec_size > 0);
598   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
599
600   const unsigned msg_control =
601      SET_BITS(atomic_op, 1, 0) |
602      SET_BITS(exec_size <= 8, 4, 4) |
603      SET_BITS(response_expected, 5, 5);
604
605   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
606}
607
608static inline unsigned
609brw_mdc_cmask(unsigned num_channels)
610{
611   /* See also MDC_CMASK in the SKL PRM Vol 2d. */
612   return 0xf & (0xf << num_channels);
613}
614
615static inline uint32_t
616brw_dp_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
617                               unsigned exec_size, /**< 0 for SIMD4x2 */
618                               unsigned num_channels,
619                               bool write)
620{
621   assert(exec_size <= 8 || exec_size == 16);
622
623   unsigned msg_type;
624   if (write) {
625      if (devinfo->gen >= 8 || devinfo->is_haswell) {
626         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
627      } else {
628         msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
629      }
630   } else {
631      /* Read */
632      if (devinfo->gen >= 8 || devinfo->is_haswell) {
633         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
634      } else {
635         msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
636      }
637   }
638
639   /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
640   if (write && devinfo->gen == 7 && !devinfo->is_haswell && exec_size == 0)
641      exec_size = 8;
642
643   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
644   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
645                              exec_size <= 8 ? 2 : 1;
646
647   const unsigned msg_control =
648      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
649      SET_BITS(simd_mode, 5, 4);
650
651   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
652}
653
654static inline unsigned
655brw_mdc_ds(unsigned bit_size)
656{
657   switch (bit_size) {
658   case 8:
659      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
660   case 16:
661      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
662   case 32:
663      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
664   default:
665      unreachable("Unsupported bit_size for byte scattered messages");
666   }
667}
668
669static inline uint32_t
670brw_dp_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
671                              unsigned exec_size,
672                              unsigned bit_size,
673                              bool write)
674{
675   assert(exec_size <= 8 || exec_size == 16);
676
677   assert(devinfo->gen > 7 || devinfo->is_haswell);
678   const unsigned msg_type =
679      write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
680              HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
681
682   assert(exec_size > 0);
683   const unsigned msg_control =
684      SET_BITS(exec_size == 16, 0, 0) |
685      SET_BITS(brw_mdc_ds(bit_size), 3, 2);
686
687   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
688}
689
690static inline uint32_t
691brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
692                                   unsigned exec_size, /**< 0 for SIMD4x2 */
693                                   unsigned num_channels,
694                                   bool write)
695{
696   assert(exec_size <= 8 || exec_size == 16);
697   assert(devinfo->gen >= 8);
698
699   unsigned msg_type =
700      write ? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
701              GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
702
703   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
704   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
705                              exec_size <= 8 ? 2 : 1;
706
707   const unsigned msg_control =
708      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
709      SET_BITS(simd_mode, 5, 4);
710
711   return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
712}
713
714/**
715 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
716 * Skylake PRM).
717 */
718static inline uint32_t
719brw_mdc_a64_ds(unsigned elems)
720{
721   switch (elems) {
722   case 1:  return 0;
723   case 2:  return 1;
724   case 4:  return 2;
725   case 8:  return 3;
726   default:
727      unreachable("Unsupported elmeent count for A64 scattered message");
728   }
729}
730
731static inline uint32_t
732brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
733                                  unsigned exec_size, /**< 0 for SIMD4x2 */
734                                  unsigned bit_size,
735                                  bool write)
736{
737   assert(exec_size <= 8 || exec_size == 16);
738   assert(devinfo->gen >= 8);
739
740   unsigned msg_type =
741      write ? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
742              GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
743
744   const unsigned msg_control =
745      SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
746      SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
747      SET_BITS(exec_size == 16, 4, 4);
748
749   return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
750}
751
752static inline uint32_t
753brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo,
754                               MAYBE_UNUSED unsigned exec_size, /**< 0 for SIMD4x2 */
755                               unsigned bit_size,
756                               unsigned atomic_op,
757                               bool response_expected)
758{
759   assert(exec_size == 8);
760   assert(devinfo->gen >= 8);
761   assert(bit_size == 32 || bit_size == 64);
762
763   const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
764
765   const unsigned msg_control =
766      SET_BITS(atomic_op, 3, 0) |
767      SET_BITS(bit_size == 64, 4, 4) |
768      SET_BITS(response_expected, 5, 5);
769
770   return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
771}
772
773static inline uint32_t
774brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
775                                     MAYBE_UNUSED unsigned exec_size,
776                                     unsigned atomic_op,
777                                     bool response_expected)
778{
779   assert(exec_size == 8);
780   assert(devinfo->gen >= 9);
781
782   assert(exec_size > 0);
783   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP;
784
785   const unsigned msg_control =
786      SET_BITS(atomic_op, 1, 0) |
787      SET_BITS(response_expected, 5, 5);
788
789   return brw_dp_desc(devinfo, BRW_BTI_STATELESS, msg_type, msg_control);
790}
791
792static inline uint32_t
793brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo,
794                         unsigned exec_size,
795                         unsigned exec_group,
796                         unsigned atomic_op,
797                         bool response_expected)
798{
799   assert(exec_size > 0 || exec_group == 0);
800   assert(exec_group % 8 == 0);
801
802   unsigned msg_type;
803   if (devinfo->gen >= 8 || devinfo->is_haswell) {
804      if (exec_size == 0) {
805         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
806      } else {
807         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
808      }
809   } else {
810      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
811      assert(exec_size > 0);
812      msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP;
813   }
814
815   const bool high_sample_mask = (exec_group / 8) % 2 == 1;
816
817   const unsigned msg_control =
818      SET_BITS(atomic_op, 3, 0) |
819      SET_BITS(high_sample_mask, 4, 4) |
820      SET_BITS(response_expected, 5, 5);
821
822   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
823}
824
825static inline uint32_t
826brw_dp_typed_surface_rw_desc(const struct gen_device_info *devinfo,
827                             unsigned exec_size,
828                             unsigned exec_group,
829                             unsigned num_channels,
830                             bool write)
831{
832   assert(exec_size > 0 || exec_group == 0);
833   assert(exec_group % 8 == 0);
834
835   /* Typed surface reads and writes don't support SIMD16 */
836   assert(exec_size <= 8);
837
838   unsigned msg_type;
839   if (write) {
840      if (devinfo->gen >= 8 || devinfo->is_haswell) {
841         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
842      } else {
843         msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE;
844      }
845   } else {
846      if (devinfo->gen >= 8 || devinfo->is_haswell) {
847         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
848      } else {
849         msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_READ;
850      }
851   }
852
853   /* See also MDC_SG3 in the SKL PRM Vol 2d. */
854   unsigned msg_control;
855   if (devinfo->gen >= 8 || devinfo->is_haswell) {
856      /* See also MDC_SG3 in the SKL PRM Vol 2d. */
857      const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
858                                  1 + ((exec_group / 8) % 2);
859
860      msg_control =
861         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
862         SET_BITS(slot_group, 5, 4);
863   } else {
864      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
865      assert(exec_size > 0);
866      const unsigned slot_group = ((exec_group / 8) % 2);
867
868      msg_control =
869         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
870         SET_BITS(slot_group, 5, 5);
871   }
872
873   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
874}
875
876/**
877 * Construct a message descriptor immediate with the specified pixel
878 * interpolator function controls.
879 */
880static inline uint32_t
881brw_pixel_interp_desc(UNUSED const struct gen_device_info *devinfo,
882                      unsigned msg_type,
883                      bool noperspective,
884                      unsigned simd_mode,
885                      unsigned slot_group)
886{
887   return (SET_BITS(slot_group, 11, 11) |
888           SET_BITS(msg_type, 13, 12) |
889           SET_BITS(!!noperspective, 14, 14) |
890           SET_BITS(simd_mode, 16, 16));
891}
892
893void brw_urb_WRITE(struct brw_codegen *p,
894		   struct brw_reg dest,
895		   unsigned msg_reg_nr,
896		   struct brw_reg src0,
897                   enum brw_urb_write_flags flags,
898		   unsigned msg_length,
899		   unsigned response_length,
900		   unsigned offset,
901		   unsigned swizzle);
902
903/**
904 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
905 * desc.  If \p desc is not an immediate it will be transparently loaded to an
906 * address register using an OR instruction.
907 */
908void
909brw_send_indirect_message(struct brw_codegen *p,
910                          unsigned sfid,
911                          struct brw_reg dst,
912                          struct brw_reg payload,
913                          struct brw_reg desc,
914                          unsigned desc_imm,
915                          bool eot);
916
917void
918brw_send_indirect_split_message(struct brw_codegen *p,
919                                unsigned sfid,
920                                struct brw_reg dst,
921                                struct brw_reg payload0,
922                                struct brw_reg payload1,
923                                struct brw_reg desc,
924                                unsigned desc_imm,
925                                struct brw_reg ex_desc,
926                                unsigned ex_desc_imm,
927                                bool eot);
928
929void brw_ff_sync(struct brw_codegen *p,
930		   struct brw_reg dest,
931		   unsigned msg_reg_nr,
932		   struct brw_reg src0,
933		   bool allocate,
934		   unsigned response_length,
935		   bool eot);
936
937void brw_svb_write(struct brw_codegen *p,
938                   struct brw_reg dest,
939                   unsigned msg_reg_nr,
940                   struct brw_reg src0,
941                   unsigned binding_table_index,
942                   bool   send_commit_msg);
943
944brw_inst *brw_fb_WRITE(struct brw_codegen *p,
945                       struct brw_reg payload,
946                       struct brw_reg implied_header,
947                       unsigned msg_control,
948                       unsigned binding_table_index,
949                       unsigned msg_length,
950                       unsigned response_length,
951                       bool eot,
952                       bool last_render_target,
953                       bool header_present);
954
955brw_inst *gen9_fb_READ(struct brw_codegen *p,
956                       struct brw_reg dst,
957                       struct brw_reg payload,
958                       unsigned binding_table_index,
959                       unsigned msg_length,
960                       unsigned response_length,
961                       bool per_sample);
962
963void brw_SAMPLE(struct brw_codegen *p,
964		struct brw_reg dest,
965		unsigned msg_reg_nr,
966		struct brw_reg src0,
967		unsigned binding_table_index,
968		unsigned sampler,
969		unsigned msg_type,
970		unsigned response_length,
971		unsigned msg_length,
972		unsigned header_present,
973		unsigned simd_mode,
974		unsigned return_format);
975
976void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
977                                      struct brw_reg header,
978                                      struct brw_reg sampler_index);
979
980void gen4_math(struct brw_codegen *p,
981	       struct brw_reg dest,
982	       unsigned function,
983	       unsigned msg_reg_nr,
984	       struct brw_reg src,
985	       unsigned precision );
986
987void gen6_math(struct brw_codegen *p,
988	       struct brw_reg dest,
989	       unsigned function,
990	       struct brw_reg src0,
991	       struct brw_reg src1);
992
993void brw_oword_block_read(struct brw_codegen *p,
994			  struct brw_reg dest,
995			  struct brw_reg mrf,
996			  uint32_t offset,
997			  uint32_t bind_table_index);
998
999unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
1000
1001void brw_oword_block_read_scratch(struct brw_codegen *p,
1002				  struct brw_reg dest,
1003				  struct brw_reg mrf,
1004				  int num_regs,
1005				  unsigned offset);
1006
1007void brw_oword_block_write_scratch(struct brw_codegen *p,
1008				   struct brw_reg mrf,
1009				   int num_regs,
1010				   unsigned offset);
1011
1012void gen7_block_read_scratch(struct brw_codegen *p,
1013                             struct brw_reg dest,
1014                             int num_regs,
1015                             unsigned offset);
1016
1017void brw_shader_time_add(struct brw_codegen *p,
1018                         struct brw_reg payload,
1019                         uint32_t surf_index);
1020
1021/**
1022 * Return the generation-specific jump distance scaling factor.
1023 *
1024 * Given the number of instructions to jump, we need to scale by
1025 * some number to obtain the actual jump distance to program in an
1026 * instruction.
1027 */
1028static inline unsigned
1029brw_jump_scale(const struct gen_device_info *devinfo)
1030{
1031   /* Broadwell measures jump targets in bytes. */
1032   if (devinfo->gen >= 8)
1033      return 16;
1034
1035   /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1036    * (to support compaction), so each 128-bit instruction requires 2 chunks.
1037    */
1038   if (devinfo->gen >= 5)
1039      return 2;
1040
1041   /* Gen4 simply uses the number of 128-bit instructions. */
1042   return 1;
1043}
1044
1045void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1046
1047/* If/else/endif.  Works by manipulating the execution flags on each
1048 * channel.
1049 */
1050brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1051brw_inst *gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1052                  struct brw_reg src0, struct brw_reg src1);
1053
1054void brw_ELSE(struct brw_codegen *p);
1055void brw_ENDIF(struct brw_codegen *p);
1056
1057/* DO/WHILE loops:
1058 */
1059brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1060
1061brw_inst *brw_WHILE(struct brw_codegen *p);
1062
1063brw_inst *brw_BREAK(struct brw_codegen *p);
1064brw_inst *brw_CONT(struct brw_codegen *p);
1065brw_inst *gen6_HALT(struct brw_codegen *p);
1066
1067/* Forward jumps:
1068 */
1069void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx);
1070
1071brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1072                   unsigned predicate_control);
1073
1074void brw_NOP(struct brw_codegen *p);
1075
1076void brw_WAIT(struct brw_codegen *p);
1077
1078/* Special case: there is never a destination, execution size will be
1079 * taken from src0:
1080 */
1081void brw_CMP(struct brw_codegen *p,
1082	     struct brw_reg dest,
1083	     unsigned conditional,
1084	     struct brw_reg src0,
1085	     struct brw_reg src1);
1086
1087void
1088brw_untyped_atomic(struct brw_codegen *p,
1089                   struct brw_reg dst,
1090                   struct brw_reg payload,
1091                   struct brw_reg surface,
1092                   unsigned atomic_op,
1093                   unsigned msg_length,
1094                   bool response_expected,
1095                   bool header_present);
1096
1097void
1098brw_untyped_surface_read(struct brw_codegen *p,
1099                         struct brw_reg dst,
1100                         struct brw_reg payload,
1101                         struct brw_reg surface,
1102                         unsigned msg_length,
1103                         unsigned num_channels);
1104
1105void
1106brw_untyped_surface_write(struct brw_codegen *p,
1107                          struct brw_reg payload,
1108                          struct brw_reg surface,
1109                          unsigned msg_length,
1110                          unsigned num_channels,
1111                          bool header_present);
1112
1113void
1114brw_memory_fence(struct brw_codegen *p,
1115                 struct brw_reg dst,
1116                 struct brw_reg src,
1117                 enum opcode send_op,
1118                 bool stall);
1119
1120void
1121brw_pixel_interpolator_query(struct brw_codegen *p,
1122                             struct brw_reg dest,
1123                             struct brw_reg mrf,
1124                             bool noperspective,
1125                             unsigned mode,
1126                             struct brw_reg data,
1127                             unsigned msg_length,
1128                             unsigned response_length);
1129
1130void
1131brw_find_live_channel(struct brw_codegen *p,
1132                      struct brw_reg dst,
1133                      struct brw_reg mask);
1134
1135void
1136brw_broadcast(struct brw_codegen *p,
1137              struct brw_reg dst,
1138              struct brw_reg src,
1139              struct brw_reg idx);
1140
1141void
1142brw_rounding_mode(struct brw_codegen *p,
1143                  enum brw_rnd_mode mode);
1144
1145/***********************************************************************
1146 * brw_eu_util.c:
1147 */
1148
1149void brw_copy_indirect_to_indirect(struct brw_codegen *p,
1150				   struct brw_indirect dst_ptr,
1151				   struct brw_indirect src_ptr,
1152				   unsigned count);
1153
1154void brw_copy_from_indirect(struct brw_codegen *p,
1155			    struct brw_reg dst,
1156			    struct brw_indirect ptr,
1157			    unsigned count);
1158
1159void brw_copy4(struct brw_codegen *p,
1160	       struct brw_reg dst,
1161	       struct brw_reg src,
1162	       unsigned count);
1163
1164void brw_copy8(struct brw_codegen *p,
1165	       struct brw_reg dst,
1166	       struct brw_reg src,
1167	       unsigned count);
1168
1169void brw_math_invert( struct brw_codegen *p,
1170		      struct brw_reg dst,
1171		      struct brw_reg src);
1172
1173void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1174
1175void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1176                     unsigned desc, unsigned ex_desc);
1177
1178static inline void
1179brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1180{
1181   brw_set_desc_ex(p, insn, desc, 0);
1182}
1183
1184void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1185
1186enum brw_conditional_mod brw_negate_cmod(uint32_t cmod);
1187enum brw_conditional_mod brw_swap_cmod(uint32_t cmod);
1188
1189/* brw_eu_compact.c */
1190void brw_init_compaction_tables(const struct gen_device_info *devinfo);
1191void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1192                              struct disasm_info *disasm);
1193void brw_uncompact_instruction(const struct gen_device_info *devinfo,
1194                               brw_inst *dst, brw_compact_inst *src);
1195bool brw_try_compact_instruction(const struct gen_device_info *devinfo,
1196                                 brw_compact_inst *dst, const brw_inst *src);
1197
1198void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
1199                                 brw_inst *orig, brw_inst *uncompacted);
1200
1201/* brw_eu_validate.c */
1202bool brw_validate_instructions(const struct gen_device_info *devinfo,
1203                               const void *assembly, int start_offset, int end_offset,
1204                               struct disasm_info *disasm);
1205
1206static inline int
1207next_offset(const struct gen_device_info *devinfo, void *store, int offset)
1208{
1209   brw_inst *insn = (brw_inst *)((char *)store + offset);
1210
1211   if (brw_inst_cmpt_control(devinfo, insn))
1212      return offset + 8;
1213   else
1214      return offset + 16;
1215}
1216
1217struct opcode_desc {
1218   /* The union is an implementation detail used by brw_opcode_desc() to handle
1219    * opcodes that have been reused for different instructions across hardware
1220    * generations.
1221    *
1222    * The gens field acts as a tag. If it is non-zero, name points to a string
1223    * containing the instruction mnemonic. If it is zero, the table field is
1224    * valid and either points to a secondary opcode_desc table with 'size'
1225    * elements or is NULL and no such instruction exists for the opcode.
1226    */
1227   union {
1228      struct {
1229         char    *name;
1230         int      nsrc;
1231      };
1232      struct {
1233         const struct opcode_desc *table;
1234         unsigned size;
1235      };
1236   };
1237   int      ndst;
1238   int      gens;
1239};
1240
1241const struct opcode_desc *
1242brw_opcode_desc(const struct gen_device_info *devinfo, enum opcode opcode);
1243
1244static inline bool
1245is_3src(const struct gen_device_info *devinfo, enum opcode opcode)
1246{
1247   const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
1248   return desc && desc->nsrc == 3;
1249}
1250
1251/** Maximum SEND message length */
1252#define BRW_MAX_MSG_LENGTH 15
1253
1254/** First MRF register used by pull loads */
1255#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1256
1257/** First MRF register used by spills */
1258#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1259
1260#ifdef __cplusplus
1261}
1262#endif
1263
1264#endif
1265