brw_eu.cpp revision 7ec681f3
1/*
2 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28  * Authors:
29  *   Keith Whitwell <keithw@vmware.com>
30  */
31
32#include <sys/stat.h>
33#include <fcntl.h>
34
35#include "brw_eu_defines.h"
36#include "brw_eu.h"
37#include "brw_shader.h"
38#include "brw_gfx_ver_enum.h"
39#include "dev/intel_debug.h"
40
41#include "util/ralloc.h"
42
43/* Returns a conditional modifier that negates the condition. */
44enum brw_conditional_mod
45brw_negate_cmod(enum brw_conditional_mod cmod)
46{
47   switch (cmod) {
48   case BRW_CONDITIONAL_Z:
49      return BRW_CONDITIONAL_NZ;
50   case BRW_CONDITIONAL_NZ:
51      return BRW_CONDITIONAL_Z;
52   case BRW_CONDITIONAL_G:
53      return BRW_CONDITIONAL_LE;
54   case BRW_CONDITIONAL_GE:
55      return BRW_CONDITIONAL_L;
56   case BRW_CONDITIONAL_L:
57      return BRW_CONDITIONAL_GE;
58   case BRW_CONDITIONAL_LE:
59      return BRW_CONDITIONAL_G;
60   default:
61      unreachable("Can't negate this cmod");
62   }
63}
64
65/* Returns the corresponding conditional mod for swapping src0 and
66 * src1 in e.g. CMP.
67 */
68enum brw_conditional_mod
69brw_swap_cmod(enum brw_conditional_mod cmod)
70{
71   switch (cmod) {
72   case BRW_CONDITIONAL_Z:
73   case BRW_CONDITIONAL_NZ:
74      return cmod;
75   case BRW_CONDITIONAL_G:
76      return BRW_CONDITIONAL_L;
77   case BRW_CONDITIONAL_GE:
78      return BRW_CONDITIONAL_LE;
79   case BRW_CONDITIONAL_L:
80      return BRW_CONDITIONAL_G;
81   case BRW_CONDITIONAL_LE:
82      return BRW_CONDITIONAL_GE;
83   default:
84      return BRW_CONDITIONAL_NONE;
85   }
86}
87
88/**
89 * Get the least significant bit offset of the i+1-th component of immediate
90 * type \p type.  For \p i equal to the two's complement of j, return the
91 * offset of the j-th component starting from the end of the vector.  For
92 * scalar register types return zero.
93 */
94static unsigned
95imm_shift(enum brw_reg_type type, unsigned i)
96{
97   assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V &&
98          "Not implemented.");
99
100   if (type == BRW_REGISTER_TYPE_VF)
101      return 8 * (i & 3);
102   else
103      return 0;
104}
105
106/**
107 * Swizzle an arbitrary immediate \p x of the given type according to the
108 * permutation specified as \p swz.
109 */
110uint32_t
111brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
112{
113   if (imm_shift(type, 1)) {
114      const unsigned n = 32 / imm_shift(type, 1);
115      uint32_t y = 0;
116
117      for (unsigned i = 0; i < n; i++) {
118         /* Shift the specified component all the way to the right and left to
119          * discard any undesired L/MSBs, then shift it right into component i.
120          */
121         y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))
122                << imm_shift(type, ~0u)
123                >> imm_shift(type, ~0u - i);
124      }
125
126      return y;
127   } else {
128      return x;
129   }
130}
131
132unsigned
133brw_get_default_exec_size(struct brw_codegen *p)
134{
135   return p->current->exec_size;
136}
137
138unsigned
139brw_get_default_group(struct brw_codegen *p)
140{
141   return p->current->group;
142}
143
144unsigned
145brw_get_default_access_mode(struct brw_codegen *p)
146{
147   return p->current->access_mode;
148}
149
150tgl_swsb
151brw_get_default_swsb(struct brw_codegen *p)
152{
153   return p->current->swsb;
154}
155
156void
157brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
158{
159   p->current->exec_size = value;
160}
161
162void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)
163{
164   p->current->predicate = pc;
165}
166
167void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
168{
169   p->current->pred_inv = predicate_inverse;
170}
171
172void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
173{
174   assert(subreg < 2);
175   p->current->flag_subreg = reg * 2 + subreg;
176}
177
178void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
179{
180   p->current->access_mode = access_mode;
181}
182
183void
184brw_set_default_compression_control(struct brw_codegen *p,
185			    enum brw_compression compression_control)
186{
187   switch (compression_control) {
188   case BRW_COMPRESSION_NONE:
189      /* This is the "use the first set of bits of dmask/vmask/arf
190       * according to execsize" option.
191       */
192      p->current->group = 0;
193      break;
194   case BRW_COMPRESSION_2NDHALF:
195      /* For SIMD8, this is "use the second set of 8 bits." */
196      p->current->group = 8;
197      break;
198   case BRW_COMPRESSION_COMPRESSED:
199      /* For SIMD16 instruction compression, use the first set of 16 bits
200       * since we don't do SIMD32 dispatch.
201       */
202      p->current->group = 0;
203      break;
204   default:
205      unreachable("not reached");
206   }
207
208   if (p->devinfo->ver <= 6) {
209      p->current->compressed =
210         (compression_control == BRW_COMPRESSION_COMPRESSED);
211   }
212}
213
214/**
215 * Enable or disable instruction compression on the given instruction leaving
216 * the currently selected channel enable group untouched.
217 */
218void
219brw_inst_set_compression(const struct intel_device_info *devinfo,
220                         brw_inst *inst, bool on)
221{
222   if (devinfo->ver >= 6) {
223      /* No-op, the EU will figure out for us whether the instruction needs to
224       * be compressed.
225       */
226   } else {
227      /* The channel group and compression controls are non-orthogonal, there
228       * are two possible representations for uncompressed instructions and we
229       * may need to preserve the current one to avoid changing the selected
230       * channel group inadvertently.
231       */
232      if (on)
233         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_COMPRESSED);
234      else if (brw_inst_qtr_control(devinfo, inst)
235               == BRW_COMPRESSION_COMPRESSED)
236         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
237   }
238}
239
240void
241brw_set_default_compression(struct brw_codegen *p, bool on)
242{
243   p->current->compressed = on;
244}
245
246/**
247 * Apply the range of channel enable signals given by
248 * [group, group + exec_size) to the instruction passed as argument.
249 */
250void
251brw_inst_set_group(const struct intel_device_info *devinfo,
252                   brw_inst *inst, unsigned group)
253{
254   if (devinfo->ver >= 7) {
255      assert(group % 4 == 0 && group < 32);
256      brw_inst_set_qtr_control(devinfo, inst, group / 8);
257      brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
258
259   } else if (devinfo->ver == 6) {
260      assert(group % 8 == 0 && group < 32);
261      brw_inst_set_qtr_control(devinfo, inst, group / 8);
262
263   } else {
264      assert(group % 8 == 0 && group < 16);
265      /* The channel group and compression controls are non-orthogonal, there
266       * are two possible representations for group zero and we may need to
267       * preserve the current one to avoid changing the selected compression
268       * enable inadvertently.
269       */
270      if (group == 8)
271         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_2NDHALF);
272      else if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_2NDHALF)
273         brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
274   }
275}
276
277void
278brw_set_default_group(struct brw_codegen *p, unsigned group)
279{
280   p->current->group = group;
281}
282
283void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
284{
285   p->current->mask_control = value;
286}
287
288void brw_set_default_saturate( struct brw_codegen *p, bool enable )
289{
290   p->current->saturate = enable;
291}
292
293void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
294{
295   p->current->acc_wr_control = value;
296}
297
298void brw_set_default_swsb(struct brw_codegen *p, tgl_swsb value)
299{
300   p->current->swsb = value;
301}
302
303void brw_push_insn_state( struct brw_codegen *p )
304{
305   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
306   *(p->current + 1) = *p->current;
307   p->current++;
308}
309
310void brw_pop_insn_state( struct brw_codegen *p )
311{
312   assert(p->current != p->stack);
313   p->current--;
314}
315
316
317/***********************************************************************
318 */
319void
320brw_init_codegen(const struct intel_device_info *devinfo,
321                 struct brw_codegen *p, void *mem_ctx)
322{
323   memset(p, 0, sizeof(*p));
324
325   p->devinfo = devinfo;
326   p->automatic_exec_sizes = true;
327   /*
328    * Set the initial instruction store array size to 1024, if found that
329    * isn't enough, then it will double the store size at brw_next_insn()
330    * until out of memory.
331    */
332   p->store_size = 1024;
333   p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);
334   p->nr_insn = 0;
335   p->current = p->stack;
336   memset(p->current, 0, sizeof(p->current[0]));
337
338   p->mem_ctx = mem_ctx;
339
340   /* Some defaults?
341    */
342   brw_set_default_exec_size(p, BRW_EXECUTE_8);
343   brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
344   brw_set_default_saturate(p, 0);
345   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
346
347   /* Set up control flow stack */
348   p->if_stack_depth = 0;
349   p->if_stack_array_size = 16;
350   p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
351
352   p->loop_stack_depth = 0;
353   p->loop_stack_array_size = 16;
354   p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
355   p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
356}
357
358
359const unsigned *brw_get_program( struct brw_codegen *p,
360			       unsigned *sz )
361{
362   *sz = p->next_insn_offset;
363   return (const unsigned *)p->store;
364}
365
366const brw_shader_reloc *
367brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)
368{
369   *num_relocs = p->num_relocs;
370   return p->relocs;
371}
372
373bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
374                               const char *identifier)
375{
376   const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
377   if (!read_path) {
378      return false;
379   }
380
381   char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
382
383   int fd = open(name, O_RDONLY);
384   ralloc_free(name);
385
386   if (fd == -1) {
387      return false;
388   }
389
390   struct stat sb;
391   if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
392      close(fd);
393      return false;
394   }
395
396   p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);
397   p->nr_insn += sb.st_size / sizeof(brw_inst);
398
399   p->next_insn_offset = start_offset + sb.st_size;
400   p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);
401   p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
402   assert(p->store);
403
404   ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
405   close(fd);
406   if (ret != sb.st_size) {
407      return false;
408   }
409
410   ASSERTED bool valid =
411      brw_validate_instructions(p->devinfo, p->store,
412                                start_offset, p->next_insn_offset,
413                                NULL);
414   assert(valid);
415
416   return true;
417}
418
419const struct brw_label *
420brw_find_label(const struct brw_label *root, int offset)
421{
422   const struct brw_label *curr = root;
423
424   if (curr != NULL)
425   {
426      do {
427         if (curr->offset == offset)
428            return curr;
429
430         curr = curr->next;
431      } while (curr != NULL);
432   }
433
434   return curr;
435}
436
437void
438brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)
439{
440   if (*labels != NULL) {
441      struct brw_label *curr = *labels;
442      struct brw_label *prev;
443
444      do {
445         prev = curr;
446
447         if (curr->offset == offset)
448            return;
449
450         curr = curr->next;
451      } while (curr != NULL);
452
453      curr = ralloc(mem_ctx, struct brw_label);
454      curr->offset = offset;
455      curr->number = prev->number + 1;
456      curr->next = NULL;
457      prev->next = curr;
458   } else {
459      struct brw_label *root = ralloc(mem_ctx, struct brw_label);
460      root->number = 0;
461      root->offset = offset;
462      root->next = NULL;
463      *labels = root;
464   }
465}
466
467const struct brw_label *
468brw_label_assembly(const struct intel_device_info *devinfo,
469                   const void *assembly, int start, int end, void *mem_ctx)
470{
471   struct brw_label *root_label = NULL;
472
473   int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);
474
475   for (int offset = start; offset < end;) {
476      const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);
477      brw_inst uncompacted;
478
479      bool is_compact = brw_inst_cmpt_control(devinfo, inst);
480
481      if (is_compact) {
482         brw_compact_inst *compacted = (brw_compact_inst *)inst;
483         brw_uncompact_instruction(devinfo, &uncompacted, compacted);
484         inst = &uncompacted;
485      }
486
487      if (brw_has_uip(devinfo, brw_inst_opcode(devinfo, inst))) {
488         /* Instructions that have UIP also have JIP. */
489         brw_create_label(&root_label,
490            offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
491         brw_create_label(&root_label,
492            offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
493      } else if (brw_has_jip(devinfo, brw_inst_opcode(devinfo, inst))) {
494         int jip;
495         if (devinfo->ver >= 7) {
496            jip = brw_inst_jip(devinfo, inst);
497         } else {
498            jip = brw_inst_gfx6_jump_count(devinfo, inst);
499         }
500
501         brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
502      }
503
504      if (is_compact) {
505         offset += sizeof(brw_compact_inst);
506      } else {
507         offset += sizeof(brw_inst);
508      }
509   }
510
511   return root_label;
512}
513
514void
515brw_disassemble_with_labels(const struct intel_device_info *devinfo,
516                            const void *assembly, int start, int end, FILE *out)
517{
518   void *mem_ctx = ralloc_context(NULL);
519   const struct brw_label *root_label =
520      brw_label_assembly(devinfo, assembly, start, end, mem_ctx);
521
522   brw_disassemble(devinfo, assembly, start, end, root_label, out);
523
524   ralloc_free(mem_ctx);
525}
526
527void
528brw_disassemble(const struct intel_device_info *devinfo,
529                const void *assembly, int start, int end,
530                const struct brw_label *root_label, FILE *out)
531{
532   bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
533
534   for (int offset = start; offset < end;) {
535      const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);
536      brw_inst uncompacted;
537
538      if (root_label != NULL) {
539        const struct brw_label *label = brw_find_label(root_label, offset);
540        if (label != NULL) {
541           fprintf(out, "\nLABEL%d:\n", label->number);
542        }
543      }
544
545      bool compacted = brw_inst_cmpt_control(devinfo, insn);
546      if (0)
547         fprintf(out, "0x%08x: ", offset);
548
549      if (compacted) {
550         brw_compact_inst *compacted = (brw_compact_inst *)insn;
551         if (dump_hex) {
552            unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
553            const unsigned int blank_spaces = 24;
554            for (int i = 0 ; i < 8; i = i + 4) {
555               fprintf(out, "%02x %02x %02x %02x ",
556                       insn_ptr[i],
557                       insn_ptr[i + 1],
558                       insn_ptr[i + 2],
559                       insn_ptr[i + 3]);
560            }
561            /* Make compacted instructions hex value output vertically aligned
562             * with uncompacted instructions hex value
563             */
564            fprintf(out, "%*c", blank_spaces, ' ');
565         }
566
567         brw_uncompact_instruction(devinfo, &uncompacted, compacted);
568         insn = &uncompacted;
569      } else {
570         if (dump_hex) {
571            unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
572            for (int i = 0 ; i < 16; i = i + 4) {
573               fprintf(out, "%02x %02x %02x %02x ",
574                       insn_ptr[i],
575                       insn_ptr[i + 1],
576                       insn_ptr[i + 2],
577                       insn_ptr[i + 3]);
578            }
579         }
580      }
581
582      brw_disassemble_inst(out, devinfo, insn, compacted, offset, root_label);
583
584      if (compacted) {
585         offset += sizeof(brw_compact_inst);
586      } else {
587         offset += sizeof(brw_inst);
588      }
589   }
590}
591
592static const struct opcode_desc opcode_descs[] = {
593   /* IR,                 HW,  name,      nsrc, ndst, gfx_vers */
594   { BRW_OPCODE_ILLEGAL,  0,   "illegal", 0,    0,    GFX_ALL },
595   { BRW_OPCODE_SYNC,     1,   "sync",    1,    0,    GFX_GE(GFX12) },
596   { BRW_OPCODE_MOV,      1,   "mov",     1,    1,    GFX_LT(GFX12) },
597   { BRW_OPCODE_MOV,      97,  "mov",     1,    1,    GFX_GE(GFX12) },
598   { BRW_OPCODE_SEL,      2,   "sel",     2,    1,    GFX_LT(GFX12) },
599   { BRW_OPCODE_SEL,      98,  "sel",     2,    1,    GFX_GE(GFX12) },
600   { BRW_OPCODE_MOVI,     3,   "movi",    2,    1,    GFX_GE(GFX45) & GFX_LT(GFX12) },
601   { BRW_OPCODE_MOVI,     99,  "movi",    2,    1,    GFX_GE(GFX12) },
602   { BRW_OPCODE_NOT,      4,   "not",     1,    1,    GFX_LT(GFX12) },
603   { BRW_OPCODE_NOT,      100, "not",     1,    1,    GFX_GE(GFX12) },
604   { BRW_OPCODE_AND,      5,   "and",     2,    1,    GFX_LT(GFX12) },
605   { BRW_OPCODE_AND,      101, "and",     2,    1,    GFX_GE(GFX12) },
606   { BRW_OPCODE_OR,       6,   "or",      2,    1,    GFX_LT(GFX12) },
607   { BRW_OPCODE_OR,       102, "or",      2,    1,    GFX_GE(GFX12) },
608   { BRW_OPCODE_XOR,      7,   "xor",     2,    1,    GFX_LT(GFX12) },
609   { BRW_OPCODE_XOR,      103, "xor",     2,    1,    GFX_GE(GFX12) },
610   { BRW_OPCODE_SHR,      8,   "shr",     2,    1,    GFX_LT(GFX12) },
611   { BRW_OPCODE_SHR,      104, "shr",     2,    1,    GFX_GE(GFX12) },
612   { BRW_OPCODE_SHL,      9,   "shl",     2,    1,    GFX_LT(GFX12) },
613   { BRW_OPCODE_SHL,      105, "shl",     2,    1,    GFX_GE(GFX12) },
614   { BRW_OPCODE_DIM,      10,  "dim",     1,    1,    GFX75 },
615   { BRW_OPCODE_SMOV,     10,  "smov",    0,    0,    GFX_GE(GFX8) & GFX_LT(GFX12) },
616   { BRW_OPCODE_SMOV,     106, "smov",    0,    0,    GFX_GE(GFX12) },
617   { BRW_OPCODE_ASR,      12,  "asr",     2,    1,    GFX_LT(GFX12) },
618   { BRW_OPCODE_ASR,      108, "asr",     2,    1,    GFX_GE(GFX12) },
619   { BRW_OPCODE_ROR,      14,  "ror",     2,    1,    GFX11 },
620   { BRW_OPCODE_ROR,      110, "ror",     2,    1,    GFX_GE(GFX12) },
621   { BRW_OPCODE_ROL,      15,  "rol",     2,    1,    GFX11 },
622   { BRW_OPCODE_ROL,      111, "rol",     2,    1,    GFX_GE(GFX12) },
623   { BRW_OPCODE_CMP,      16,  "cmp",     2,    1,    GFX_LT(GFX12) },
624   { BRW_OPCODE_CMP,      112, "cmp",     2,    1,    GFX_GE(GFX12) },
625   { BRW_OPCODE_CMPN,     17,  "cmpn",    2,    1,    GFX_LT(GFX12) },
626   { BRW_OPCODE_CMPN,     113, "cmpn",    2,    1,    GFX_GE(GFX12) },
627   { BRW_OPCODE_CSEL,     18,  "csel",    3,    1,    GFX_GE(GFX8) & GFX_LT(GFX12) },
628   { BRW_OPCODE_CSEL,     114, "csel",    3,    1,    GFX_GE(GFX12) },
629   { BRW_OPCODE_F32TO16,  19,  "f32to16", 1,    1,    GFX7 | GFX75 },
630   { BRW_OPCODE_F16TO32,  20,  "f16to32", 1,    1,    GFX7 | GFX75 },
631   { BRW_OPCODE_BFREV,    23,  "bfrev",   1,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
632   { BRW_OPCODE_BFREV,    119, "bfrev",   1,    1,    GFX_GE(GFX12) },
633   { BRW_OPCODE_BFE,      24,  "bfe",     3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
634   { BRW_OPCODE_BFE,      120, "bfe",     3,    1,    GFX_GE(GFX12) },
635   { BRW_OPCODE_BFI1,     25,  "bfi1",    2,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
636   { BRW_OPCODE_BFI1,     121, "bfi1",    2,    1,    GFX_GE(GFX12) },
637   { BRW_OPCODE_BFI2,     26,  "bfi2",    3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
638   { BRW_OPCODE_BFI2,     122, "bfi2",    3,    1,    GFX_GE(GFX12) },
639   { BRW_OPCODE_JMPI,     32,  "jmpi",    0,    0,    GFX_ALL },
640   { BRW_OPCODE_BRD,      33,  "brd",     0,    0,    GFX_GE(GFX7) },
641   { BRW_OPCODE_IF,       34,  "if",      0,    0,    GFX_ALL },
642   { BRW_OPCODE_IFF,      35,  "iff",     0,    0,    GFX_LE(GFX5) },
643   { BRW_OPCODE_BRC,      35,  "brc",     0,    0,    GFX_GE(GFX7) },
644   { BRW_OPCODE_ELSE,     36,  "else",    0,    0,    GFX_ALL },
645   { BRW_OPCODE_ENDIF,    37,  "endif",   0,    0,    GFX_ALL },
646   { BRW_OPCODE_DO,       38,  "do",      0,    0,    GFX_LE(GFX5) },
647   { BRW_OPCODE_CASE,     38,  "case",    0,    0,    GFX6 },
648   { BRW_OPCODE_WHILE,    39,  "while",   0,    0,    GFX_ALL },
649   { BRW_OPCODE_BREAK,    40,  "break",   0,    0,    GFX_ALL },
650   { BRW_OPCODE_CONTINUE, 41,  "cont",    0,    0,    GFX_ALL },
651   { BRW_OPCODE_HALT,     42,  "halt",    0,    0,    GFX_ALL },
652   { BRW_OPCODE_CALLA,    43,  "calla",   0,    0,    GFX_GE(GFX75) },
653   { BRW_OPCODE_MSAVE,    44,  "msave",   0,    0,    GFX_LE(GFX5) },
654   { BRW_OPCODE_CALL,     44,  "call",    0,    0,    GFX_GE(GFX6) },
655   { BRW_OPCODE_MREST,    45,  "mrest",   0,    0,    GFX_LE(GFX5) },
656   { BRW_OPCODE_RET,      45,  "ret",     0,    0,    GFX_GE(GFX6) },
657   { BRW_OPCODE_PUSH,     46,  "push",    0,    0,    GFX_LE(GFX5) },
658   { BRW_OPCODE_FORK,     46,  "fork",    0,    0,    GFX6 },
659   { BRW_OPCODE_GOTO,     46,  "goto",    0,    0,    GFX_GE(GFX8) },
660   { BRW_OPCODE_POP,      47,  "pop",     2,    0,    GFX_LE(GFX5) },
661   { BRW_OPCODE_WAIT,     48,  "wait",    0,    1,    GFX_LT(GFX12) },
662   { BRW_OPCODE_SEND,     49,  "send",    1,    1,    GFX_LT(GFX12) },
663   { BRW_OPCODE_SENDC,    50,  "sendc",   1,    1,    GFX_LT(GFX12) },
664   { BRW_OPCODE_SEND,     49,  "send",    2,    1,    GFX_GE(GFX12) },
665   { BRW_OPCODE_SENDC,    50,  "sendc",   2,    1,    GFX_GE(GFX12) },
666   { BRW_OPCODE_SENDS,    51,  "sends",   2,    1,    GFX_GE(GFX9) & GFX_LT(GFX12) },
667   { BRW_OPCODE_SENDSC,   52,  "sendsc",  2,    1,    GFX_GE(GFX9) & GFX_LT(GFX12) },
668   { BRW_OPCODE_MATH,     56,  "math",    2,    1,    GFX_GE(GFX6) },
669   { BRW_OPCODE_ADD,      64,  "add",     2,    1,    GFX_ALL },
670   { BRW_OPCODE_MUL,      65,  "mul",     2,    1,    GFX_ALL },
671   { BRW_OPCODE_AVG,      66,  "avg",     2,    1,    GFX_ALL },
672   { BRW_OPCODE_FRC,      67,  "frc",     1,    1,    GFX_ALL },
673   { BRW_OPCODE_RNDU,     68,  "rndu",    1,    1,    GFX_ALL },
674   { BRW_OPCODE_RNDD,     69,  "rndd",    1,    1,    GFX_ALL },
675   { BRW_OPCODE_RNDE,     70,  "rnde",    1,    1,    GFX_ALL },
676   { BRW_OPCODE_RNDZ,     71,  "rndz",    1,    1,    GFX_ALL },
677   { BRW_OPCODE_MAC,      72,  "mac",     2,    1,    GFX_ALL },
678   { BRW_OPCODE_MACH,     73,  "mach",    2,    1,    GFX_ALL },
679   { BRW_OPCODE_LZD,      74,  "lzd",     1,    1,    GFX_ALL },
680   { BRW_OPCODE_FBH,      75,  "fbh",     1,    1,    GFX_GE(GFX7) },
681   { BRW_OPCODE_FBL,      76,  "fbl",     1,    1,    GFX_GE(GFX7) },
682   { BRW_OPCODE_CBIT,     77,  "cbit",    1,    1,    GFX_GE(GFX7) },
683   { BRW_OPCODE_ADDC,     78,  "addc",    2,    1,    GFX_GE(GFX7) },
684   { BRW_OPCODE_SUBB,     79,  "subb",    2,    1,    GFX_GE(GFX7) },
685   { BRW_OPCODE_SAD2,     80,  "sad2",    2,    1,    GFX_ALL },
686   { BRW_OPCODE_SADA2,    81,  "sada2",   2,    1,    GFX_ALL },
687   { BRW_OPCODE_ADD3,     82,  "add3",    3,    1,    GFX_GE(GFX125) },
688   { BRW_OPCODE_DP4,      84,  "dp4",     2,    1,    GFX_LT(GFX11) },
689   { BRW_OPCODE_DPH,      85,  "dph",     2,    1,    GFX_LT(GFX11) },
690   { BRW_OPCODE_DP3,      86,  "dp3",     2,    1,    GFX_LT(GFX11) },
691   { BRW_OPCODE_DP2,      87,  "dp2",     2,    1,    GFX_LT(GFX11) },
692   { BRW_OPCODE_DP4A,     88,  "dp4a",    3,    1,    GFX_GE(GFX12) },
693   { BRW_OPCODE_LINE,     89,  "line",    2,    1,    GFX_LE(GFX10) },
694   { BRW_OPCODE_PLN,      90,  "pln",     2,    1,    GFX_GE(GFX45) & GFX_LE(GFX10) },
695   { BRW_OPCODE_MAD,      91,  "mad",     3,    1,    GFX_GE(GFX6) },
696   { BRW_OPCODE_LRP,      92,  "lrp",     3,    1,    GFX_GE(GFX6) & GFX_LE(GFX10) },
697   { BRW_OPCODE_MADM,     93,  "madm",    3,    1,    GFX_GE(GFX8) },
698   { BRW_OPCODE_NENOP,    125, "nenop",   0,    0,    GFX45 },
699   { BRW_OPCODE_NOP,      126, "nop",     0,    0,    GFX_LT(GFX12) },
700   { BRW_OPCODE_NOP,      96,  "nop",     0,    0,    GFX_GE(GFX12) }
701};
702
703/**
704 * Look up the opcode_descs[] entry with \p key member matching \p k which is
705 * supported by the device specified by \p devinfo, or NULL if there is no
706 * matching entry.
707 *
708 * This is implemented by using an index data structure (storage for which is
709 * provided by the caller as \p index_ver and \p index_descs) in order to
710 * provide efficient constant-time look-up.
711 */
712static const opcode_desc *
713lookup_opcode_desc(gfx_ver *index_ver,
714                   const opcode_desc **index_descs,
715                   unsigned index_size,
716                   unsigned opcode_desc::*key,
717                   const intel_device_info *devinfo,
718                   unsigned k)
719{
720   if (*index_ver != gfx_ver_from_devinfo(devinfo)) {
721      *index_ver = gfx_ver_from_devinfo(devinfo);
722
723      for (unsigned l = 0; l < index_size; l++)
724         index_descs[l] = NULL;
725
726      for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
727         if (opcode_descs[i].gfx_vers & *index_ver) {
728            const unsigned l = opcode_descs[i].*key;
729            assert(l < index_size && !index_descs[l]);
730            index_descs[l] = &opcode_descs[i];
731         }
732      }
733   }
734
735   if (k < index_size)
736      return index_descs[k];
737   else
738      return NULL;
739}
740
741/**
742 * Return the matching opcode_desc for the specified IR opcode and hardware
743 * generation, or NULL if the opcode is not supported by the device.
744 */
745const struct opcode_desc *
746brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode)
747{
748   static __thread gfx_ver index_ver = {};
749   static __thread const opcode_desc *index_descs[NUM_BRW_OPCODES];
750   return lookup_opcode_desc(&index_ver, index_descs, ARRAY_SIZE(index_descs),
751                             &opcode_desc::ir, devinfo, opcode);
752}
753
754/**
755 * Return the matching opcode_desc for the specified HW opcode and hardware
756 * generation, or NULL if the opcode is not supported by the device.
757 */
758const struct opcode_desc *
759brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw)
760{
761   static __thread gfx_ver index_ver = {};
762   static __thread const opcode_desc *index_descs[128];
763   return lookup_opcode_desc(&index_ver, index_descs, ARRAY_SIZE(index_descs),
764                             &opcode_desc::hw, devinfo, hw);
765}
766