1/*
2 * Copyright © 2015-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file brw_eu_validate.c
25 *
26 * This file implements a pass that validates shader assembly.
27 *
28 * The restrictions implemented herein are intended to verify that instructions
29 * in shader assembly do not violate restrictions documented in the graphics
30 * programming reference manuals.
31 *
32 * The restrictions are difficult for humans to quickly verify due to their
33 * complexity and abundance.
34 *
35 * It is critical that this code is thoroughly unit tested because false
36 * results will lead developers astray, which is worse than having no validator
37 * at all. Functional changes to this file without corresponding unit tests (in
38 * test_eu_validate.cpp) will be rejected.
39 */
40
41#include <stdlib.h>
42#include "brw_eu.h"
43
44/* We're going to do lots of string concatenation, so this should help. */
45struct string {
46   char *str;
47   size_t len;
48};
49
50static void
51cat(struct string *dest, const struct string src)
52{
53   dest->str = realloc(dest->str, dest->len + src.len + 1);
54   memcpy(dest->str + dest->len, src.str, src.len);
55   dest->str[dest->len + src.len] = '\0';
56   dest->len = dest->len + src.len;
57}
58#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
59
60static bool
61contains(const struct string haystack, const struct string needle)
62{
63   return haystack.str && memmem(haystack.str, haystack.len,
64                                 needle.str, needle.len) != NULL;
65}
66#define CONTAINS(haystack, needle) \
67   contains(haystack, (struct string){needle, strlen(needle)})
68
69#define error(str)   "\tERROR: " str "\n"
70#define ERROR_INDENT "\t       "
71
72#define ERROR(msg) ERROR_IF(true, msg)
73#define ERROR_IF(cond, msg)                             \
74   do {                                                 \
75      if ((cond) && !CONTAINS(error_msg, error(msg))) { \
76         CAT(error_msg, error(msg));                    \
77      }                                                 \
78   } while(0)
79
80#define CHECK(func, args...)                             \
81   do {                                                  \
82      struct string __msg = func(devinfo, inst, ##args); \
83      if (__msg.str) {                                   \
84         cat(&error_msg, __msg);                         \
85         free(__msg.str);                                \
86      }                                                  \
87   } while (0)
88
89#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
90#define WIDTH(width)   (1 << (width))
91
92static bool
93inst_is_send(const struct intel_device_info *devinfo, const brw_inst *inst)
94{
95   switch (brw_inst_opcode(devinfo, inst)) {
96   case BRW_OPCODE_SEND:
97   case BRW_OPCODE_SENDC:
98   case BRW_OPCODE_SENDS:
99   case BRW_OPCODE_SENDSC:
100      return true;
101   default:
102      return false;
103   }
104}
105
106static bool
107inst_is_split_send(const struct intel_device_info *devinfo,
108                   const brw_inst *inst)
109{
110   if (devinfo->ver >= 12) {
111      return inst_is_send(devinfo, inst);
112   } else {
113      switch (brw_inst_opcode(devinfo, inst)) {
114      case BRW_OPCODE_SENDS:
115      case BRW_OPCODE_SENDSC:
116         return true;
117      default:
118         return false;
119      }
120   }
121}
122
123static unsigned
124signed_type(unsigned type)
125{
126   switch (type) {
127   case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
128   case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
129   case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
130   case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
131   default:                   return type;
132   }
133}
134
135static enum brw_reg_type
136inst_dst_type(const struct intel_device_info *devinfo, const brw_inst *inst)
137{
138   return (devinfo->ver < 12 || !inst_is_send(devinfo, inst)) ?
139      brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D;
140}
141
142static bool
143inst_is_raw_move(const struct intel_device_info *devinfo, const brw_inst *inst)
144{
145   unsigned dst_type = signed_type(inst_dst_type(devinfo, inst));
146   unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
147
148   if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
149      /* FIXME: not strictly true */
150      if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
151          brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
152          brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
153         return false;
154      }
155   } else if (brw_inst_src0_negate(devinfo, inst) ||
156              brw_inst_src0_abs(devinfo, inst)) {
157      return false;
158   }
159
160   return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
161          brw_inst_saturate(devinfo, inst) == 0 &&
162          dst_type == src_type;
163}
164
165static bool
166dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
167{
168   return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
169          brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
170}
171
172static bool
173src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
174{
175   return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&
176          brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
177          brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
178}
179
180static bool
181src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
182{
183   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
184          brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
185}
186
187static bool
188src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
189{
190   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
191          (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
192}
193
194static bool
195src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
196{
197   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
198          (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
199}
200
201static bool
202src0_has_scalar_region(const struct intel_device_info *devinfo,
203                       const brw_inst *inst)
204{
205   return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
206          brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
207          brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
208}
209
210static bool
211src1_has_scalar_region(const struct intel_device_info *devinfo,
212                       const brw_inst *inst)
213{
214   return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
215          brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
216          brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
217}
218
219static unsigned
220num_sources_from_inst(const struct intel_device_info *devinfo,
221                      const brw_inst *inst)
222{
223   const struct opcode_desc *desc =
224      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
225   unsigned math_function;
226
227   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
228      math_function = brw_inst_math_function(devinfo, inst);
229   } else if (devinfo->ver < 6 &&
230              brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
231      if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
232         /* src1 must be a descriptor (including the information to determine
233          * that the SEND is doing an extended math operation), but src0 can
234          * actually be null since it serves as the source of the implicit GRF
235          * to MRF move.
236          *
237          * If we stop using that functionality, we'll have to revisit this.
238          */
239         return 2;
240      } else {
241         /* Send instructions are allowed to have null sources since they use
242          * the base_mrf field to specify which message register source.
243          */
244         return 0;
245      }
246   } else {
247      assert(desc->nsrc < 4);
248      return desc->nsrc;
249   }
250
251   switch (math_function) {
252   case BRW_MATH_FUNCTION_INV:
253   case BRW_MATH_FUNCTION_LOG:
254   case BRW_MATH_FUNCTION_EXP:
255   case BRW_MATH_FUNCTION_SQRT:
256   case BRW_MATH_FUNCTION_RSQ:
257   case BRW_MATH_FUNCTION_SIN:
258   case BRW_MATH_FUNCTION_COS:
259   case BRW_MATH_FUNCTION_SINCOS:
260   case GFX8_MATH_FUNCTION_INVM:
261   case GFX8_MATH_FUNCTION_RSQRTM:
262      return 1;
263   case BRW_MATH_FUNCTION_FDIV:
264   case BRW_MATH_FUNCTION_POW:
265   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
266   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
267   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
268      return 2;
269   default:
270      unreachable("not reached");
271   }
272}
273
274static struct string
275invalid_values(const struct intel_device_info *devinfo, const brw_inst *inst)
276{
277   unsigned num_sources = num_sources_from_inst(devinfo, inst);
278   struct string error_msg = { .str = NULL, .len = 0 };
279
280   switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {
281   case BRW_EXECUTE_1:
282   case BRW_EXECUTE_2:
283   case BRW_EXECUTE_4:
284   case BRW_EXECUTE_8:
285   case BRW_EXECUTE_16:
286   case BRW_EXECUTE_32:
287      break;
288   default:
289      ERROR("invalid execution size");
290      break;
291   }
292
293   if (inst_is_send(devinfo, inst))
294      return error_msg;
295
296   if (num_sources == 3) {
297      /* Nothing to test:
298       *    No 3-src instructions on Gfx4-5
299       *    No reg file bits on Gfx6-10 (align16)
300       *    No invalid encodings on Gfx10-12 (align1)
301       */
302   } else {
303      if (devinfo->ver > 6) {
304         ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
305                  (num_sources > 0 &&
306                   brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
307                  (num_sources > 1 &&
308                   brw_inst_src1_reg_file(devinfo, inst) == MRF),
309                  "invalid register file encoding");
310      }
311   }
312
313   if (error_msg.str)
314      return error_msg;
315
316   if (num_sources == 3) {
317      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
318         if (devinfo->ver >= 10) {
319            ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
320                     brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE ||
321                     brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE ||
322                     brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE,
323                     "invalid register type encoding");
324         } else {
325            ERROR("Align1 mode not allowed on Gen < 10");
326         }
327      } else {
328         ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
329                  brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
330                  "invalid register type encoding");
331      }
332   } else {
333      ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
334               (num_sources > 0 &&
335                brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
336               (num_sources > 1 &&
337                brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
338               "invalid register type encoding");
339   }
340
341   return error_msg;
342}
343
344static struct string
345sources_not_null(const struct intel_device_info *devinfo,
346                 const brw_inst *inst)
347{
348   unsigned num_sources = num_sources_from_inst(devinfo, inst);
349   struct string error_msg = { .str = NULL, .len = 0 };
350
351   /* Nothing to test. 3-src instructions can only have GRF sources, and
352    * there's no bit to control the file.
353    */
354   if (num_sources == 3)
355      return (struct string){};
356
357   /* Nothing to test.  Split sends can only encode a file in sources that are
358    * allowed to be NULL.
359    */
360   if (inst_is_split_send(devinfo, inst))
361      return (struct string){};
362
363   if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC)
364      ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
365
366   if (num_sources == 2)
367      ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
368
369   return error_msg;
370}
371
372static struct string
373alignment_supported(const struct intel_device_info *devinfo,
374                    const brw_inst *inst)
375{
376   struct string error_msg = { .str = NULL, .len = 0 };
377
378   ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
379            "Align16 not supported");
380
381   return error_msg;
382}
383
384static bool
385inst_uses_src_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
386{
387   /* Check instructions that use implicit accumulator sources */
388   switch (brw_inst_opcode(devinfo, inst)) {
389   case BRW_OPCODE_MAC:
390   case BRW_OPCODE_MACH:
391   case BRW_OPCODE_SADA2:
392      return true;
393   default:
394      break;
395   }
396
397   /* FIXME: support 3-src instructions */
398   unsigned num_sources = num_sources_from_inst(devinfo, inst);
399   assert(num_sources < 3);
400
401   return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
402}
403
404static struct string
405send_restrictions(const struct intel_device_info *devinfo,
406                  const brw_inst *inst)
407{
408   struct string error_msg = { .str = NULL, .len = 0 };
409
410   if (inst_is_split_send(devinfo, inst)) {
411      ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
412               brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
413               "src1 of split send must be a GRF or NULL");
414
415      ERROR_IF(brw_inst_eot(devinfo, inst) &&
416               brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
417               "send with EOT must use g112-g127");
418      ERROR_IF(brw_inst_eot(devinfo, inst) &&
419               brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
420               brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
421               "send with EOT must use g112-g127");
422
423      if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
424         /* Assume minimums if we don't know */
425         unsigned mlen = 1;
426         if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
427            const uint32_t desc = brw_inst_send_desc(devinfo, inst);
428            mlen = brw_message_desc_mlen(devinfo, desc);
429         }
430
431         unsigned ex_mlen = 1;
432         if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
433            const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);
434            ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
435         }
436         const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
437         const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
438         ERROR_IF((src0_reg_nr <= src1_reg_nr &&
439                   src1_reg_nr < src0_reg_nr + mlen) ||
440                  (src1_reg_nr <= src0_reg_nr &&
441                   src0_reg_nr < src1_reg_nr + ex_mlen),
442                   "split send payloads must not overlap");
443      }
444   } else if (inst_is_send(devinfo, inst)) {
445      ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
446               "send must use direct addressing");
447
448      if (devinfo->ver >= 7) {
449         ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
450                  "send from non-GRF");
451         ERROR_IF(brw_inst_eot(devinfo, inst) &&
452                  brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
453                  "send with EOT must use g112-g127");
454      }
455
456      if (devinfo->ver >= 8) {
457         ERROR_IF(!dst_is_null(devinfo, inst) &&
458                  (brw_inst_dst_da_reg_nr(devinfo, inst) +
459                   brw_inst_rlen(devinfo, inst) > 127) &&
460                  (brw_inst_src0_da_reg_nr(devinfo, inst) +
461                   brw_inst_mlen(devinfo, inst) >
462                   brw_inst_dst_da_reg_nr(devinfo, inst)),
463                  "r127 must not be used for return address when there is "
464                  "a src and dest overlap");
465      }
466   }
467
468   return error_msg;
469}
470
471static bool
472is_unsupported_inst(const struct intel_device_info *devinfo,
473                    const brw_inst *inst)
474{
475   return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL;
476}
477
478/**
479 * Returns whether a combination of two types would qualify as mixed float
480 * operation mode
481 */
482static inline bool
483types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
484{
485   return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||
486          (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);
487}
488
489static enum brw_reg_type
490execution_type_for_type(enum brw_reg_type type)
491{
492   switch (type) {
493   case BRW_REGISTER_TYPE_NF:
494   case BRW_REGISTER_TYPE_DF:
495   case BRW_REGISTER_TYPE_F:
496   case BRW_REGISTER_TYPE_HF:
497      return type;
498
499   case BRW_REGISTER_TYPE_VF:
500      return BRW_REGISTER_TYPE_F;
501
502   case BRW_REGISTER_TYPE_Q:
503   case BRW_REGISTER_TYPE_UQ:
504      return BRW_REGISTER_TYPE_Q;
505
506   case BRW_REGISTER_TYPE_D:
507   case BRW_REGISTER_TYPE_UD:
508      return BRW_REGISTER_TYPE_D;
509
510   case BRW_REGISTER_TYPE_W:
511   case BRW_REGISTER_TYPE_UW:
512   case BRW_REGISTER_TYPE_B:
513   case BRW_REGISTER_TYPE_UB:
514   case BRW_REGISTER_TYPE_V:
515   case BRW_REGISTER_TYPE_UV:
516      return BRW_REGISTER_TYPE_W;
517   }
518   unreachable("not reached");
519}
520
521/**
522 * Returns the execution type of an instruction \p inst
523 */
524static enum brw_reg_type
525execution_type(const struct intel_device_info *devinfo, const brw_inst *inst)
526{
527   unsigned num_sources = num_sources_from_inst(devinfo, inst);
528   enum brw_reg_type src0_exec_type, src1_exec_type;
529
530   /* Execution data type is independent of destination data type, except in
531    * mixed F/HF instructions.
532    */
533   enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst);
534
535   src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
536   if (num_sources == 1) {
537      if (src0_exec_type == BRW_REGISTER_TYPE_HF)
538         return dst_exec_type;
539      return src0_exec_type;
540   }
541
542   src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
543   if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
544       types_are_mixed_float(src0_exec_type, dst_exec_type) ||
545       types_are_mixed_float(src1_exec_type, dst_exec_type)) {
546      return BRW_REGISTER_TYPE_F;
547   }
548
549   if (src0_exec_type == src1_exec_type)
550      return src0_exec_type;
551
552   if (src0_exec_type == BRW_REGISTER_TYPE_NF ||
553       src1_exec_type == BRW_REGISTER_TYPE_NF)
554      return BRW_REGISTER_TYPE_NF;
555
556   /* Mixed operand types where one is float is float on Gen < 6
557    * (and not allowed on later platforms)
558    */
559   if (devinfo->ver < 6 &&
560       (src0_exec_type == BRW_REGISTER_TYPE_F ||
561        src1_exec_type == BRW_REGISTER_TYPE_F))
562      return BRW_REGISTER_TYPE_F;
563
564   if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
565       src1_exec_type == BRW_REGISTER_TYPE_Q)
566      return BRW_REGISTER_TYPE_Q;
567
568   if (src0_exec_type == BRW_REGISTER_TYPE_D ||
569       src1_exec_type == BRW_REGISTER_TYPE_D)
570      return BRW_REGISTER_TYPE_D;
571
572   if (src0_exec_type == BRW_REGISTER_TYPE_W ||
573       src1_exec_type == BRW_REGISTER_TYPE_W)
574      return BRW_REGISTER_TYPE_W;
575
576   if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
577       src1_exec_type == BRW_REGISTER_TYPE_DF)
578      return BRW_REGISTER_TYPE_DF;
579
580   unreachable("not reached");
581}
582
583/**
584 * Returns whether a region is packed
585 *
586 * A region is packed if its elements are adjacent in memory, with no
587 * intervening space, no overlap, and no replicated values.
588 */
589static bool
590is_packed(unsigned vstride, unsigned width, unsigned hstride)
591{
592   if (vstride == width) {
593      if (vstride == 1) {
594         return hstride == 0;
595      } else {
596         return hstride == 1;
597      }
598   }
599
600   return false;
601}
602
603/**
604 * Returns whether an instruction is an explicit or implicit conversion
605 * to/from half-float.
606 */
607static bool
608is_half_float_conversion(const struct intel_device_info *devinfo,
609                         const brw_inst *inst)
610{
611   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
612
613   unsigned num_sources = num_sources_from_inst(devinfo, inst);
614   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
615
616   if (dst_type != src0_type &&
617       (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
618      return true;
619   } else if (num_sources > 1) {
620      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
621      return dst_type != src1_type &&
622            (dst_type == BRW_REGISTER_TYPE_HF ||
623             src1_type == BRW_REGISTER_TYPE_HF);
624   }
625
626   return false;
627}
628
629/*
630 * Returns whether an instruction is using mixed float operation mode
631 */
632static bool
633is_mixed_float(const struct intel_device_info *devinfo, const brw_inst *inst)
634{
635   if (devinfo->ver < 8)
636      return false;
637
638   if (inst_is_send(devinfo, inst))
639      return false;
640
641   unsigned opcode = brw_inst_opcode(devinfo, inst);
642   const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
643   if (desc->ndst == 0)
644      return false;
645
646   /* FIXME: support 3-src instructions */
647   unsigned num_sources = num_sources_from_inst(devinfo, inst);
648   assert(num_sources < 3);
649
650   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
651   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
652
653   if (num_sources == 1)
654      return types_are_mixed_float(src0_type, dst_type);
655
656   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
657
658   return types_are_mixed_float(src0_type, src1_type) ||
659          types_are_mixed_float(src0_type, dst_type) ||
660          types_are_mixed_float(src1_type, dst_type);
661}
662
663/**
664 * Returns whether an instruction is an explicit or implicit conversion
665 * to/from byte.
666 */
667static bool
668is_byte_conversion(const struct intel_device_info *devinfo,
669                   const brw_inst *inst)
670{
671   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
672
673   unsigned num_sources = num_sources_from_inst(devinfo, inst);
674   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
675
676   if (dst_type != src0_type &&
677       (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
678      return true;
679   } else if (num_sources > 1) {
680      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
681      return dst_type != src1_type &&
682            (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
683   }
684
685   return false;
686}
687
688/**
689 * Checks restrictions listed in "General Restrictions Based on Operand Types"
690 * in the "Register Region Restrictions" section.
691 */
692static struct string
693general_restrictions_based_on_operand_types(const struct intel_device_info *devinfo,
694                                            const brw_inst *inst)
695{
696   const struct opcode_desc *desc =
697      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
698   unsigned num_sources = num_sources_from_inst(devinfo, inst);
699   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
700   struct string error_msg = { .str = NULL, .len = 0 };
701
702   if (inst_is_send(devinfo, inst))
703      return error_msg;
704
705   if (devinfo->ver >= 11) {
706      if (num_sources == 3) {
707         ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
708                  brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
709                  "Byte data type is not supported for src1/2 register regioning. This includes "
710                  "byte broadcast as well.");
711      }
712      if (num_sources == 2) {
713         ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
714                  "Byte data type is not supported for src1 register regioning. This includes "
715                  "byte broadcast as well.");
716      }
717   }
718
719   if (num_sources == 3)
720      return error_msg;
721
722   if (exec_size == 1)
723      return error_msg;
724
725   if (desc->ndst == 0)
726      return error_msg;
727
728   /* The PRMs say:
729    *
730    *    Where n is the largest element size in bytes for any source or
731    *    destination operand type, ExecSize * n must be <= 64.
732    *
733    * But we do not attempt to enforce it, because it is implied by other
734    * rules:
735    *
736    *    - that the destination stride must match the execution data type
737    *    - sources may not span more than two adjacent GRF registers
738    *    - destination may not span more than two adjacent GRF registers
739    *
740    * In fact, checking it would weaken testing of the other rules.
741    */
742
743   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
744   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
745   bool dst_type_is_byte =
746      inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B ||
747      inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB;
748
749   if (dst_type_is_byte) {
750      if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
751         if (!inst_is_raw_move(devinfo, inst))
752            ERROR("Only raw MOV supports a packed-byte destination");
753         return error_msg;
754      }
755   }
756
757   unsigned exec_type = execution_type(devinfo, inst);
758   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
759   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
760
761   /* On IVB/BYT, region parameters and execution size for DF are in terms of
762    * 32-bit elements, so they are doubled. For evaluating the validity of an
763    * instruction, we halve them.
764    */
765   if (devinfo->verx10 == 70 &&
766       exec_type_size == 8 && dst_type_size == 4)
767      dst_type_size = 8;
768
769   if (is_byte_conversion(devinfo, inst)) {
770      /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
771       *
772       *    "There is no direct conversion from B/UB to DF or DF to B/UB.
773       *     There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
774       *
775       * Even if these restrictions are listed for the MOV instruction, we
776       * validate this more generally, since there is the possibility
777       * of implicit conversions from other instructions.
778       */
779      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
780      enum brw_reg_type src1_type = num_sources > 1 ?
781                                    brw_inst_src1_type(devinfo, inst) : 0;
782
783      ERROR_IF(type_sz(dst_type) == 1 &&
784               (type_sz(src0_type) == 8 ||
785                (num_sources > 1 && type_sz(src1_type) == 8)),
786               "There are no direct conversions between 64-bit types and B/UB");
787
788      ERROR_IF(type_sz(dst_type) == 8 &&
789               (type_sz(src0_type) == 1 ||
790                (num_sources > 1 && type_sz(src1_type) == 1)),
791               "There are no direct conversions between 64-bit types and B/UB");
792   }
793
794   if (is_half_float_conversion(devinfo, inst)) {
795      /**
796       * A helper to validate used in the validation of the following restriction
797       * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
798       *
799       *    "There is no direct conversion from HF to DF or DF to HF.
800       *     There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
801       *
802       * Even if these restrictions are listed for the MOV instruction, we
803       * validate this more generally, since there is the possibility
804       * of implicit conversions from other instructions, such us implicit
805       * conversion from integer to HF with the ADD instruction in SKL+.
806       */
807      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
808      enum brw_reg_type src1_type = num_sources > 1 ?
809                                    brw_inst_src1_type(devinfo, inst) : 0;
810      ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
811               (type_sz(src0_type) == 8 ||
812                (num_sources > 1 && type_sz(src1_type) == 8)),
813               "There are no direct conversions between 64-bit types and HF");
814
815      ERROR_IF(type_sz(dst_type) == 8 &&
816               (src0_type == BRW_REGISTER_TYPE_HF ||
817                (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
818               "There are no direct conversions between 64-bit types and HF");
819
820      /* From the BDW+ PRM:
821       *
822       *   "Conversion between Integer and HF (Half Float) must be
823       *    DWord-aligned and strided by a DWord on the destination."
824       *
825       * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
826       *
827       *   "There is a relaxed alignment rule for word destinations. When
828       *    the destination type is word (UW, W, HF), destination data types
829       *    can be aligned to either the lowest word or the second lowest
830       *    word of the execution channel. This means the destination data
831       *    words can be either all in the even word locations or all in the
832       *    odd word locations."
833       *
834       * We do not implement the second rule as is though, since empirical
835       * testing shows inconsistencies:
836       *   - It suggests that packed 16-bit is not allowed, which is not true.
837       *   - It suggests that conversions from Q/DF to W (which need to be
838       *     64-bit aligned on the destination) are not possible, which is
839       *     not true.
840       *
841       * So from this rule we only validate the implication that conversions
842       * from F to HF need to be DWord strided (except in Align1 mixed
843       * float mode where packed fp16 destination is allowed so long as the
844       * destination is oword-aligned).
845       *
846       * Finally, we only validate this for Align1 because Align16 always
847       * requires packed destinations, so these restrictions can't possibly
848       * apply to Align16 mode.
849       */
850      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
851         if ((dst_type == BRW_REGISTER_TYPE_HF &&
852              (brw_reg_type_is_integer(src0_type) ||
853               (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
854             (brw_reg_type_is_integer(dst_type) &&
855              (src0_type == BRW_REGISTER_TYPE_HF ||
856               (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
857            ERROR_IF(dst_stride * dst_type_size != 4,
858                     "Conversions between integer and half-float must be "
859                     "strided by a DWord on the destination");
860
861            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
862            ERROR_IF(subreg % 4 != 0,
863                     "Conversions between integer and half-float must be "
864                     "aligned to a DWord on the destination");
865         } else if ((devinfo->is_cherryview || devinfo->ver >= 9) &&
866                    dst_type == BRW_REGISTER_TYPE_HF) {
867            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
868            ERROR_IF(dst_stride != 2 &&
869                     !(is_mixed_float(devinfo, inst) &&
870                       dst_stride == 1 && subreg % 16 == 0),
871                     "Conversions to HF must have either all words in even "
872                     "word locations or all words in odd word locations or "
873                     "be mixed-float with Oword-aligned packed destination");
874         }
875      }
876   }
877
878   /* There are special regioning rules for mixed-float mode in CHV and SKL that
879    * override the general rule for the ratio of sizes of the destination type
880    * and the execution type. We will add validation for those in a later patch.
881    */
882   bool validate_dst_size_and_exec_size_ratio =
883      !is_mixed_float(devinfo, inst) ||
884      !(devinfo->is_cherryview || devinfo->ver >= 9);
885
886   if (validate_dst_size_and_exec_size_ratio &&
887       exec_type_size > dst_type_size) {
888      if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
889         ERROR_IF(dst_stride * dst_type_size != exec_type_size,
890                  "Destination stride must be equal to the ratio of the sizes "
891                  "of the execution data type to the destination type");
892      }
893
894      unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
895
896      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
897          brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
898         /* The i965 PRM says:
899          *
900          *    Implementation Restriction: The relaxed alignment rule for byte
901          *    destination (#10.5) is not supported.
902          */
903         if ((devinfo->ver > 4 || devinfo->is_g4x) && dst_type_is_byte) {
904            ERROR_IF(subreg % exec_type_size != 0 &&
905                     subreg % exec_type_size != 1,
906                     "Destination subreg must be aligned to the size of the "
907                     "execution data type (or to the next lowest byte for byte "
908                     "destinations)");
909         } else {
910            ERROR_IF(subreg % exec_type_size != 0,
911                     "Destination subreg must be aligned to the size of the "
912                     "execution data type");
913         }
914      }
915   }
916
917   return error_msg;
918}
919
920/**
921 * Checks restrictions listed in "General Restrictions on Regioning Parameters"
922 * in the "Register Region Restrictions" section.
923 */
924static struct string
925general_restrictions_on_region_parameters(const struct intel_device_info *devinfo,
926                                          const brw_inst *inst)
927{
928   const struct opcode_desc *desc =
929      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
930   unsigned num_sources = num_sources_from_inst(devinfo, inst);
931   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
932   struct string error_msg = { .str = NULL, .len = 0 };
933
934   if (num_sources == 3)
935      return (struct string){};
936
937   /* Split sends don't have the bits in the instruction to encode regions so
938    * there's nothing to check.
939    */
940   if (inst_is_split_send(devinfo, inst))
941      return (struct string){};
942
943   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
944      if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
945         ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
946                  "Destination Horizontal Stride must be 1");
947
948      if (num_sources >= 1) {
949         if (devinfo->verx10 >= 75) {
950            ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
951                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
952                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
953                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
954                     "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
955         } else {
956            ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
957                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
958                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
959                     "In Align16 mode, only VertStride of 0 or 4 is allowed");
960         }
961      }
962
963      if (num_sources == 2) {
964         if (devinfo->verx10 >= 75) {
965            ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
966                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
967                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
968                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
969                     "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
970         } else {
971            ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
972                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
973                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
974                     "In Align16 mode, only VertStride of 0 or 4 is allowed");
975         }
976      }
977
978      return error_msg;
979   }
980
981   for (unsigned i = 0; i < num_sources; i++) {
982      unsigned vstride, width, hstride, element_size, subreg;
983      enum brw_reg_type type;
984
985#define DO_SRC(n)                                                              \
986      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
987          BRW_IMMEDIATE_VALUE)                                                 \
988         continue;                                                             \
989                                                                               \
990      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
991      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
992      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
993      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
994      element_size = brw_reg_type_to_size(type);                               \
995      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
996
997      if (i == 0) {
998         DO_SRC(0);
999      } else {
1000         DO_SRC(1);
1001      }
1002#undef DO_SRC
1003
1004      /* On IVB/BYT, region parameters and execution size for DF are in terms of
1005       * 32-bit elements, so they are doubled. For evaluating the validity of an
1006       * instruction, we halve them.
1007       */
1008      if (devinfo->verx10 == 70 &&
1009          element_size == 8)
1010         element_size = 4;
1011
1012      /* ExecSize must be greater than or equal to Width. */
1013      ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
1014                                  "to Width");
1015
1016      /* If ExecSize = Width and HorzStride ≠ 0,
1017       * VertStride must be set to Width * HorzStride.
1018       */
1019      if (exec_size == width && hstride != 0) {
1020         ERROR_IF(vstride != width * hstride,
1021                  "If ExecSize = Width and HorzStride ≠ 0, "
1022                  "VertStride must be set to Width * HorzStride");
1023      }
1024
1025      /* If Width = 1, HorzStride must be 0 regardless of the values of
1026       * ExecSize and VertStride.
1027       */
1028      if (width == 1) {
1029         ERROR_IF(hstride != 0,
1030                  "If Width = 1, HorzStride must be 0 regardless "
1031                  "of the values of ExecSize and VertStride");
1032      }
1033
1034      /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
1035      if (exec_size == 1 && width == 1) {
1036         ERROR_IF(vstride != 0 || hstride != 0,
1037                  "If ExecSize = Width = 1, both VertStride "
1038                  "and HorzStride must be 0");
1039      }
1040
1041      /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
1042       * value of ExecSize.
1043       */
1044      if (vstride == 0 && hstride == 0) {
1045         ERROR_IF(width != 1,
1046                  "If VertStride = HorzStride = 0, Width must be "
1047                  "1 regardless of the value of ExecSize");
1048      }
1049
1050      /* VertStride must be used to cross GRF register boundaries. This rule
1051       * implies that elements within a 'Width' cannot cross GRF boundaries.
1052       */
1053      const uint64_t mask = (1ULL << element_size) - 1;
1054      unsigned rowbase = subreg;
1055
1056      for (int y = 0; y < exec_size / width; y++) {
1057         uint64_t access_mask = 0;
1058         unsigned offset = rowbase;
1059
1060         for (int x = 0; x < width; x++) {
1061            access_mask |= mask << (offset % 64);
1062            offset += hstride * element_size;
1063         }
1064
1065         rowbase += vstride * element_size;
1066
1067         if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
1068            ERROR("VertStride must be used to cross GRF register boundaries");
1069            break;
1070         }
1071      }
1072   }
1073
1074   /* Dst.HorzStride must not be 0. */
1075   if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
1076      ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
1077               "Destination Horizontal Stride must not be 0");
1078   }
1079
1080   return error_msg;
1081}
1082
1083static struct string
1084special_restrictions_for_mixed_float_mode(const struct intel_device_info *devinfo,
1085                                          const brw_inst *inst)
1086{
1087   struct string error_msg = { .str = NULL, .len = 0 };
1088
1089   const unsigned opcode = brw_inst_opcode(devinfo, inst);
1090   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
1091   if (num_sources >= 3)
1092      return error_msg;
1093
1094   if (!is_mixed_float(devinfo, inst))
1095      return error_msg;
1096
1097   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1098   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
1099
1100   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1101   enum brw_reg_type src1_type = num_sources > 1 ?
1102                                 brw_inst_src1_type(devinfo, inst) : 0;
1103   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1104
1105   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1106   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1107
1108   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1109    * Float Operations:
1110    *
1111    *    "Indirect addressing on source is not supported when source and
1112    *     destination data types are mixed float."
1113    */
1114   ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
1115            (num_sources > 1 &&
1116             brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
1117            "Indirect addressing on source is not supported when source and "
1118            "destination data types are mixed float");
1119
1120   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1121    * Float Operations:
1122    *
1123    *    "No SIMD16 in mixed mode when destination is f32. Instruction
1124    *     execution size must be no more than 8."
1125    */
1126   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
1127            "Mixed float mode with 32-bit float destination is limited "
1128            "to SIMD8");
1129
1130   if (is_align16) {
1131      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1132       * Float Operations:
1133       *
1134       *   "In Align16 mode, when half float and float data types are mixed
1135       *    between source operands OR between source and destination operands,
1136       *    the register content are assumed to be packed."
1137       *
1138       * Since Align16 doesn't have a concept of horizontal stride (or width),
1139       * it means that vertical stride must always be 4, since 0 and 2 would
1140       * lead to replicated data, and any other value is disallowed in Align16.
1141       */
1142      ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1143               "Align16 mixed float mode assumes packed data (vstride must be 4");
1144
1145      ERROR_IF(num_sources >= 2 &&
1146               brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1147               "Align16 mixed float mode assumes packed data (vstride must be 4");
1148
1149      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1150       * Float Operations:
1151       *
1152       *   "For Align16 mixed mode, both input and output packed f16 data
1153       *    must be oword aligned, no oword crossing in packed f16."
1154       *
1155       * The previous rule requires that Align16 operands are always packed,
1156       * and since there is only one bit for Align16 subnr, which represents
1157       * offsets 0B and 16B, this rule is always enforced and we don't need to
1158       * validate it.
1159       */
1160
1161      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1162       * Float Operations:
1163       *
1164       *    "No SIMD16 in mixed mode when destination is packed f16 for both
1165       *     Align1 and Align16."
1166       *
1167       * And:
1168       *
1169       *   "In Align16 mode, when half float and float data types are mixed
1170       *    between source operands OR between source and destination operands,
1171       *    the register content are assumed to be packed."
1172       *
1173       * Which implies that SIMD16 is not available in Align16. This is further
1174       * confirmed by:
1175       *
1176       *    "For Align16 mixed mode, both input and output packed f16 data
1177       *     must be oword aligned, no oword crossing in packed f16"
1178       *
1179       * Since oword-aligned packed f16 data would cross oword boundaries when
1180       * the execution size is larger than 8.
1181       */
1182      ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1183
1184      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1185       * Float Operations:
1186       *
1187       *    "No accumulator read access for Align16 mixed float."
1188       */
1189      ERROR_IF(inst_uses_src_acc(devinfo, inst),
1190               "No accumulator read access for Align16 mixed float");
1191   } else {
1192      assert(!is_align16);
1193
1194      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1195       * Float Operations:
1196       *
1197       *    "No SIMD16 in mixed mode when destination is packed f16 for both
1198       *     Align1 and Align16."
1199       */
1200      ERROR_IF(exec_size > 8 && dst_is_packed &&
1201               dst_type == BRW_REGISTER_TYPE_HF,
1202               "Align1 mixed float mode is limited to SIMD8 when destination "
1203               "is packed half-float");
1204
1205      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1206       * Float Operations:
1207       *
1208       *    "Math operations for mixed mode:
1209       *     - In Align1, f16 inputs need to be strided"
1210       */
1211      if (opcode == BRW_OPCODE_MATH) {
1212         if (src0_type == BRW_REGISTER_TYPE_HF) {
1213            ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
1214                     "Align1 mixed mode math needs strided half-float inputs");
1215         }
1216
1217         if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
1218            ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
1219                     "Align1 mixed mode math needs strided half-float inputs");
1220         }
1221      }
1222
1223      if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
1224         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1225          * Float Operations:
1226          *
1227          *    "In Align1, destination stride can be smaller than execution
1228          *     type. When destination is stride of 1, 16 bit packed data is
1229          *     updated on the destination. However, output packed f16 data
1230          *     must be oword aligned, no oword crossing in packed f16."
1231          *
1232          * The requirement of not crossing oword boundaries for 16-bit oword
1233          * aligned data means that execution size is limited to 8.
1234          */
1235         unsigned subreg;
1236         if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
1237            subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1238         else
1239            subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
1240         ERROR_IF(subreg % 16 != 0,
1241                  "Align1 mixed mode packed half-float output must be "
1242                  "oword aligned");
1243         ERROR_IF(exec_size > 8,
1244                  "Align1 mixed mode packed half-float output must not "
1245                  "cross oword boundaries (max exec size is 8)");
1246
1247         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1248          * Float Operations:
1249          *
1250          *    "When source is float or half float from accumulator register and
1251          *     destination is half float with a stride of 1, the source must
1252          *     register aligned. i.e., source must have offset zero."
1253          *
1254          * Align16 mixed float mode doesn't allow accumulator access on sources,
1255          * so we only need to check this for Align1.
1256          */
1257         if (src0_is_acc(devinfo, inst) &&
1258             (src0_type == BRW_REGISTER_TYPE_F ||
1259              src0_type == BRW_REGISTER_TYPE_HF)) {
1260            ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1261                     "Mixed float mode requires register-aligned accumulator "
1262                     "source reads when destination is packed half-float");
1263
1264         }
1265
1266         if (num_sources > 1 &&
1267             src1_is_acc(devinfo, inst) &&
1268             (src1_type == BRW_REGISTER_TYPE_F ||
1269              src1_type == BRW_REGISTER_TYPE_HF)) {
1270            ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1271                     "Mixed float mode requires register-aligned accumulator "
1272                     "source reads when destination is packed half-float");
1273         }
1274      }
1275
1276      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1277       * Float Operations:
1278       *
1279       *    "No swizzle is allowed when an accumulator is used as an implicit
1280       *     source or an explicit source in an instruction. i.e. when
1281       *     destination is half float with an implicit accumulator source,
1282       *     destination stride needs to be 2."
1283       *
1284       * FIXME: it is not quite clear what the first sentence actually means
1285       *        or its link to the implication described after it, so we only
1286       *        validate the explicit implication, which is clearly described.
1287       */
1288      if (dst_type == BRW_REGISTER_TYPE_HF &&
1289          inst_uses_src_acc(devinfo, inst)) {
1290         ERROR_IF(dst_stride != 2,
1291                  "Mixed float mode with implicit/explicit accumulator "
1292                  "source and half-float destination requires a stride "
1293                  "of 2 on the destination");
1294      }
1295   }
1296
1297   return error_msg;
1298}
1299
1300/**
1301 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1302 *
1303 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1304 * a bitmask of bytes accessed by the region.
1305 *
1306 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1307 * instruction would be
1308 *
1309 *    access_mask[0] = 0x00000000000000F0
1310 *    access_mask[1] = 0x000000000000F000
1311 *    access_mask[2] = 0x0000000000F00000
1312 *    access_mask[3] = 0x00000000F0000000
1313 *    access_mask[4-31] = 0
1314 *
1315 * because the first execution channel accesses bytes 7-4 and the second
1316 * execution channel accesses bytes 15-12, etc.
1317 */
1318static void
1319align1_access_mask(uint64_t access_mask[static 32],
1320                   unsigned exec_size, unsigned element_size, unsigned subreg,
1321                   unsigned vstride, unsigned width, unsigned hstride)
1322{
1323   const uint64_t mask = (1ULL << element_size) - 1;
1324   unsigned rowbase = subreg;
1325   unsigned element = 0;
1326
1327   for (int y = 0; y < exec_size / width; y++) {
1328      unsigned offset = rowbase;
1329
1330      for (int x = 0; x < width; x++) {
1331         access_mask[element++] = mask << (offset % 64);
1332         offset += hstride * element_size;
1333      }
1334
1335      rowbase += vstride * element_size;
1336   }
1337
1338   assert(element == 0 || element == exec_size);
1339}
1340
1341/**
1342 * Returns the number of registers accessed according to the \p access_mask
1343 */
1344static int
1345registers_read(const uint64_t access_mask[static 32])
1346{
1347   int regs_read = 0;
1348
1349   for (unsigned i = 0; i < 32; i++) {
1350      if (access_mask[i] > 0xFFFFFFFF) {
1351         return 2;
1352      } else if (access_mask[i]) {
1353         regs_read = 1;
1354      }
1355   }
1356
1357   return regs_read;
1358}
1359
1360/**
1361 * Checks restrictions listed in "Region Alignment Rules" in the "Register
1362 * Region Restrictions" section.
1363 */
1364static struct string
1365region_alignment_rules(const struct intel_device_info *devinfo,
1366                       const brw_inst *inst)
1367{
1368   const struct opcode_desc *desc =
1369      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
1370   unsigned num_sources = num_sources_from_inst(devinfo, inst);
1371   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1372   uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1373   struct string error_msg = { .str = NULL, .len = 0 };
1374
1375   if (num_sources == 3)
1376      return (struct string){};
1377
1378   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
1379      return (struct string){};
1380
1381   if (inst_is_send(devinfo, inst))
1382      return (struct string){};
1383
1384   memset(dst_access_mask, 0, sizeof(dst_access_mask));
1385   memset(src0_access_mask, 0, sizeof(src0_access_mask));
1386   memset(src1_access_mask, 0, sizeof(src1_access_mask));
1387
1388   for (unsigned i = 0; i < num_sources; i++) {
1389      unsigned vstride, width, hstride, element_size, subreg;
1390      enum brw_reg_type type;
1391
1392      /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1393       * GRF registers.
1394       */
1395
1396#define DO_SRC(n)                                                              \
1397      if (brw_inst_src ## n ## _address_mode(devinfo, inst) !=                 \
1398          BRW_ADDRESS_DIRECT)                                                  \
1399         continue;                                                             \
1400                                                                               \
1401      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1402          BRW_IMMEDIATE_VALUE)                                                 \
1403         continue;                                                             \
1404                                                                               \
1405      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1406      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1407      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1408      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1409      element_size = brw_reg_type_to_size(type);                               \
1410      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1411      align1_access_mask(src ## n ## _access_mask,                             \
1412                         exec_size, element_size, subreg,                      \
1413                         vstride, width, hstride)
1414
1415      if (i == 0) {
1416         DO_SRC(0);
1417      } else {
1418         DO_SRC(1);
1419      }
1420#undef DO_SRC
1421
1422      unsigned num_vstride = exec_size / width;
1423      unsigned num_hstride = width;
1424      unsigned vstride_elements = (num_vstride - 1) * vstride;
1425      unsigned hstride_elements = (num_hstride - 1) * hstride;
1426      unsigned offset = (vstride_elements + hstride_elements) * element_size +
1427                        subreg;
1428      ERROR_IF(offset >= 64,
1429               "A source cannot span more than 2 adjacent GRF registers");
1430   }
1431
1432   if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1433      return error_msg;
1434
1435   unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1436   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1437   unsigned element_size = brw_reg_type_to_size(dst_type);
1438   unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1439   unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1440   ERROR_IF(offset >= 64,
1441            "A destination cannot span more than 2 adjacent GRF registers");
1442
1443   if (error_msg.str)
1444      return error_msg;
1445
1446   /* On IVB/BYT, region parameters and execution size for DF are in terms of
1447    * 32-bit elements, so they are doubled. For evaluating the validity of an
1448    * instruction, we halve them.
1449    */
1450   if (devinfo->verx10 == 70 &&
1451       element_size == 8)
1452      element_size = 4;
1453
1454   align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1455                      exec_size == 1 ? 0 : exec_size * stride,
1456                      exec_size == 1 ? 1 : exec_size,
1457                      exec_size == 1 ? 0 : stride);
1458
1459   unsigned dst_regs = registers_read(dst_access_mask);
1460   unsigned src0_regs = registers_read(src0_access_mask);
1461   unsigned src1_regs = registers_read(src1_access_mask);
1462
1463   /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
1464    *
1465    *    When an instruction has a source region spanning two registers and a
1466    *    destination region contained in one register, the number of elements
1467    *    must be the same between two sources and one of the following must be
1468    *    true:
1469    *
1470    *       1. The destination region is entirely contained in the lower OWord
1471    *          of a register.
1472    *       2. The destination region is entirely contained in the upper OWord
1473    *          of a register.
1474    *       3. The destination elements are evenly split between the two OWords
1475    *          of a register.
1476    */
1477   if (devinfo->ver <= 8) {
1478      if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
1479         unsigned upper_oword_writes = 0, lower_oword_writes = 0;
1480
1481         for (unsigned i = 0; i < exec_size; i++) {
1482            if (dst_access_mask[i] > 0x0000FFFF) {
1483               upper_oword_writes++;
1484            } else {
1485               assert(dst_access_mask[i] != 0);
1486               lower_oword_writes++;
1487            }
1488         }
1489
1490         ERROR_IF(lower_oword_writes != 0 &&
1491                  upper_oword_writes != 0 &&
1492                  upper_oword_writes != lower_oword_writes,
1493                  "Writes must be to only one OWord or "
1494                  "evenly split between OWords");
1495      }
1496   }
1497
1498   /* The IVB and HSW PRMs say:
1499    *
1500    *    When an instruction has a source region that spans two registers and
1501    *    the destination spans two registers, the destination elements must be
1502    *    evenly split between the two registers [...]
1503    *
1504    * The SNB PRM contains similar wording (but written in a much more
1505    * confusing manner).
1506    *
1507    * The BDW PRM says:
1508    *
1509    *    When destination spans two registers, the source may be one or two
1510    *    registers. The destination elements must be evenly split between the
1511    *    two registers.
1512    *
1513    * The SKL PRM says:
1514    *
1515    *    When destination of MATH instruction spans two registers, the
1516    *    destination elements must be evenly split between the two registers.
1517    *
1518    * It is not known whether this restriction applies to KBL other Gens after
1519    * SKL.
1520    */
1521   if (devinfo->ver <= 8 ||
1522       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
1523
1524      /* Nothing explicitly states that on Gen < 8 elements must be evenly
1525       * split between two destination registers in the two exceptional
1526       * source-region-spans-one-register cases, but since Broadwell requires
1527       * evenly split writes regardless of source region, we assume that it was
1528       * an oversight and require it.
1529       */
1530      if (dst_regs == 2) {
1531         unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1532
1533         for (unsigned i = 0; i < exec_size; i++) {
1534            if (dst_access_mask[i] > 0xFFFFFFFF) {
1535               upper_reg_writes++;
1536            } else {
1537               assert(dst_access_mask[i] != 0);
1538               lower_reg_writes++;
1539            }
1540         }
1541
1542         ERROR_IF(upper_reg_writes != lower_reg_writes,
1543                  "Writes must be evenly split between the two "
1544                  "destination registers");
1545      }
1546   }
1547
1548   /* The IVB and HSW PRMs say:
1549    *
1550    *    When an instruction has a source region that spans two registers and
1551    *    the destination spans two registers, the destination elements must be
1552    *    evenly split between the two registers and each destination register
1553    *    must be entirely derived from one source register.
1554    *
1555    *    Note: In such cases, the regioning parameters must ensure that the
1556    *    offset from the two source registers is the same.
1557    *
1558    * The SNB PRM contains similar wording (but written in a much more
1559    * confusing manner).
1560    *
1561    * There are effectively three rules stated here:
1562    *
1563    *    For an instruction with a source and a destination spanning two
1564    *    registers,
1565    *
1566    *       (1) destination elements must be evenly split between the two
1567    *           registers
1568    *       (2) all destination elements in a register must be derived
1569    *           from one source register
1570    *       (3) the offset (i.e. the starting location in each of the two
1571    *           registers spanned by a region) must be the same in the two
1572    *           registers spanned by a region
1573    *
1574    * It is impossible to violate rule (1) without violating (2) or (3), so we
1575    * do not attempt to validate it.
1576    */
1577   if (devinfo->ver <= 7 && dst_regs == 2) {
1578      for (unsigned i = 0; i < num_sources; i++) {
1579#define DO_SRC(n)                                                             \
1580         if (src ## n ## _regs <= 1)                                          \
1581            continue;                                                         \
1582                                                                              \
1583         for (unsigned i = 0; i < exec_size; i++) {                           \
1584            if ((dst_access_mask[i] > 0xFFFFFFFF) !=                          \
1585                (src ## n ## _access_mask[i] > 0xFFFFFFFF)) {                 \
1586               ERROR("Each destination register must be entirely derived "    \
1587                     "from one source register");                             \
1588               break;                                                         \
1589            }                                                                 \
1590         }                                                                    \
1591                                                                              \
1592         unsigned offset_0 =                                                  \
1593            brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);               \
1594         unsigned offset_1 = offset_0;                                        \
1595                                                                              \
1596         for (unsigned i = 0; i < exec_size; i++) {                           \
1597            if (src ## n ## _access_mask[i] > 0xFFFFFFFF) {                   \
1598               offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32;  \
1599               break;                                                         \
1600            }                                                                 \
1601         }                                                                    \
1602                                                                              \
1603         ERROR_IF(num_sources == 2 && offset_0 != offset_1,                   \
1604                  "The offset from the two source registers "                 \
1605                  "must be the same")
1606
1607         if (i == 0) {
1608            DO_SRC(0);
1609         } else {
1610            DO_SRC(1);
1611         }
1612#undef DO_SRC
1613      }
1614   }
1615
1616   /* The IVB and HSW PRMs say:
1617    *
1618    *    When destination spans two registers, the source MUST span two
1619    *    registers. The exception to the above rule:
1620    *        1. When source is scalar, the source registers are not
1621    *           incremented.
1622    *        2. When source is packed integer Word and destination is packed
1623    *           integer DWord, the source register is not incremented by the
1624    *           source sub register is incremented.
1625    *
1626    * The SNB PRM does not contain this rule, but the internal documentation
1627    * indicates that it applies to SNB as well. We assume that the rule applies
1628    * to Gen <= 5 although their PRMs do not state it.
1629    *
1630    * While the documentation explicitly says in exception (2) that the
1631    * destination must be an integer DWord, the hardware allows at least a
1632    * float destination type as well. We emit such instructions from
1633    *
1634    *    fs_visitor::emit_interpolation_setup_gfx6
1635    *    fs_visitor::emit_fragcoord_interpolation
1636    *
1637    * and have for years with no ill effects.
1638    *
1639    * Additionally the simulator source code indicates that the real condition
1640    * is that the size of the destination type is 4 bytes.
1641    */
1642   if (devinfo->ver <= 7 && dst_regs == 2) {
1643      enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1644      bool dst_is_packed_dword =
1645         is_packed(exec_size * stride, exec_size, stride) &&
1646         brw_reg_type_to_size(dst_type) == 4;
1647
1648      for (unsigned i = 0; i < num_sources; i++) {
1649#define DO_SRC(n)                                                                  \
1650         unsigned vstride, width, hstride;                                         \
1651         vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));           \
1652         width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));                \
1653         hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));           \
1654         bool src ## n ## _is_packed_word =                                        \
1655            is_packed(vstride, width, hstride) &&                                  \
1656            (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W ||   \
1657             brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW);   \
1658                                                                                   \
1659         ERROR_IF(src ## n ## _regs == 1 &&                                        \
1660                  !src ## n ## _has_scalar_region(devinfo, inst) &&                \
1661                  !(dst_is_packed_dword && src ## n ## _is_packed_word),           \
1662                  "When the destination spans two registers, the source must "     \
1663                  "span two registers\n" ERROR_INDENT "(exceptions for scalar "    \
1664                  "source and packed-word to packed-dword expansion)")
1665
1666         if (i == 0) {
1667            DO_SRC(0);
1668         } else {
1669            DO_SRC(1);
1670         }
1671#undef DO_SRC
1672      }
1673   }
1674
1675   return error_msg;
1676}
1677
1678static struct string
1679vector_immediate_restrictions(const struct intel_device_info *devinfo,
1680                              const brw_inst *inst)
1681{
1682   unsigned num_sources = num_sources_from_inst(devinfo, inst);
1683   struct string error_msg = { .str = NULL, .len = 0 };
1684
1685   if (num_sources == 3 || num_sources == 0)
1686      return (struct string){};
1687
1688   unsigned file = num_sources == 1 ?
1689                   brw_inst_src0_reg_file(devinfo, inst) :
1690                   brw_inst_src1_reg_file(devinfo, inst);
1691   if (file != BRW_IMMEDIATE_VALUE)
1692      return (struct string){};
1693
1694   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1695   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1696   unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1697                         brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1698   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1699   enum brw_reg_type type = num_sources == 1 ?
1700                            brw_inst_src0_type(devinfo, inst) :
1701                            brw_inst_src1_type(devinfo, inst);
1702
1703   /* The PRMs say:
1704    *
1705    *    When an immediate vector is used in an instruction, the destination
1706    *    must be 128-bit aligned with destination horizontal stride equivalent
1707    *    to a word for an immediate integer vector (v) and equivalent to a
1708    *    DWord for an immediate float vector (vf).
1709    *
1710    * The text has not been updated for the addition of the immediate unsigned
1711    * integer vector type (uv) on SNB, but presumably the same restriction
1712    * applies.
1713    */
1714   switch (type) {
1715   case BRW_REGISTER_TYPE_V:
1716   case BRW_REGISTER_TYPE_UV:
1717   case BRW_REGISTER_TYPE_VF:
1718      ERROR_IF(dst_subreg % (128 / 8) != 0,
1719               "Destination must be 128-bit aligned in order to use immediate "
1720               "vector types");
1721
1722      if (type == BRW_REGISTER_TYPE_VF) {
1723         ERROR_IF(dst_type_size * dst_stride != 4,
1724                  "Destination must have stride equivalent to dword in order "
1725                  "to use the VF type");
1726      } else {
1727         ERROR_IF(dst_type_size * dst_stride != 2,
1728                  "Destination must have stride equivalent to word in order "
1729                  "to use the V or UV type");
1730      }
1731      break;
1732   default:
1733      break;
1734   }
1735
1736   return error_msg;
1737}
1738
1739static struct string
1740special_requirements_for_handling_double_precision_data_types(
1741                                       const struct intel_device_info *devinfo,
1742                                       const brw_inst *inst)
1743{
1744   unsigned num_sources = num_sources_from_inst(devinfo, inst);
1745   struct string error_msg = { .str = NULL, .len = 0 };
1746
1747   if (num_sources == 3 || num_sources == 0)
1748      return (struct string){};
1749
1750   /* Split sends don't have types so there's no doubles there. */
1751   if (inst_is_split_send(devinfo, inst))
1752      return (struct string){};
1753
1754   enum brw_reg_type exec_type = execution_type(devinfo, inst);
1755   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
1756
1757   enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1758   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
1759   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1760   unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1761   unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1762   unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1763   unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1764
1765   bool is_integer_dword_multiply =
1766      devinfo->ver >= 8 &&
1767      brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&
1768      (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1769       brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
1770      (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1771       brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
1772
1773   const bool is_double_precision =
1774      dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
1775
1776   for (unsigned i = 0; i < num_sources; i++) {
1777      unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1778      bool is_scalar_region;
1779      enum brw_reg_file file;
1780      enum brw_reg_type type;
1781
1782#define DO_SRC(n)                                                              \
1783      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1784          BRW_IMMEDIATE_VALUE)                                                 \
1785         continue;                                                             \
1786                                                                               \
1787      is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst);        \
1788      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1789      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1790      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1791      file = brw_inst_src ## n ## _reg_file(devinfo, inst);                    \
1792      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1793      type_size = brw_reg_type_to_size(type);                                  \
1794      reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst);                    \
1795      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1796      address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1797
1798      if (i == 0) {
1799         DO_SRC(0);
1800      } else {
1801         DO_SRC(1);
1802      }
1803#undef DO_SRC
1804
1805      const unsigned src_stride = hstride * type_size;
1806      const unsigned dst_stride = dst_hstride * dst_type_size;
1807
1808      /* The PRMs say that for CHV, BXT:
1809       *
1810       *    When source or destination datatype is 64b or operation is integer
1811       *    DWord multiply, regioning in Align1 must follow these rules:
1812       *
1813       *    1. Source and Destination horizontal stride must be aligned to the
1814       *       same qword.
1815       *    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1816       *    3. Source and Destination offset must be the same, except the case
1817       *       of scalar source.
1818       *
1819       * We assume that the restriction applies to GLK as well.
1820       */
1821      if (is_double_precision &&
1822          brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1823          (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
1824         ERROR_IF(!is_scalar_region &&
1825                  (src_stride % 8 != 0 ||
1826                   dst_stride % 8 != 0 ||
1827                   src_stride != dst_stride),
1828                  "Source and destination horizontal stride must equal and a "
1829                  "multiple of a qword when the execution type is 64-bit");
1830
1831         ERROR_IF(vstride != width * hstride,
1832                  "Vstride must be Width * Hstride when the execution type is "
1833                  "64-bit");
1834
1835         ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1836                  "Source and destination offset must be the same when the "
1837                  "execution type is 64-bit");
1838      }
1839
1840      /* The PRMs say that for CHV, BXT:
1841       *
1842       *    When source or destination datatype is 64b or operation is integer
1843       *    DWord multiply, indirect addressing must not be used.
1844       *
1845       * We assume that the restriction applies to GLK as well.
1846       */
1847      if (is_double_precision &&
1848          (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
1849         ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1850                  BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1851                  "Indirect addressing is not allowed when the execution type "
1852                  "is 64-bit");
1853      }
1854
1855      /* The PRMs say that for CHV, BXT:
1856       *
1857       *    ARF registers must never be used with 64b datatype or when
1858       *    operation is integer DWord multiply.
1859       *
1860       * We assume that the restriction applies to GLK as well.
1861       *
1862       * We assume that the restriction does not apply to the null register.
1863       */
1864      if (is_double_precision &&
1865          (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
1866         ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||
1867                  brw_inst_acc_wr_control(devinfo, inst) ||
1868                  (BRW_ARCHITECTURE_REGISTER_FILE == file &&
1869                   reg != BRW_ARF_NULL) ||
1870                  (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
1871                   dst_reg != BRW_ARF_NULL),
1872                  "Architecture registers cannot be used when the execution "
1873                  "type is 64-bit");
1874      }
1875
1876      /* From the hardware spec section "Register Region Restrictions":
1877       *
1878       * "In case where source or destination datatype is 64b or operation is
1879       *  integer DWord multiply [or in case where a floating point data type
1880       *  is used as destination]:
1881       *
1882       *   1. Register Regioning patterns where register data bit locations
1883       *      are changed between source and destination are not supported on
1884       *      Src0 and Src1 except for broadcast of a scalar.
1885       *
1886       *   2. Explicit ARF registers except null and accumulator must not be
1887       *      used."
1888       */
1889      if (devinfo->verx10 >= 125 &&
1890          (brw_reg_type_is_floating_point(dst_type) ||
1891           is_double_precision)) {
1892         ERROR_IF(!is_scalar_region &&
1893                  (vstride != width * hstride ||
1894                   src_stride != dst_stride ||
1895                   subreg != dst_subreg),
1896                  "Register Regioning patterns where register data bit "
1897                  "locations are changed between source and destination are not "
1898                  "supported except for broadcast of a scalar.");
1899
1900         ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE &&
1901                   reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) ||
1902                  (dst_file == BRW_ARCHITECTURE_REGISTER_FILE &&
1903                   dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR),
1904                  "Explicit ARF registers except null and accumulator must not "
1905                  "be used.");
1906      }
1907
1908      /* From the hardware spec section "Register Region Restrictions":
1909       *
1910       * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and
1911       *  Quad-Word data must not be used."
1912       */
1913      if (devinfo->verx10 >= 125 &&
1914          (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) {
1915         ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
1916                  vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
1917                  "Vx1 and VxH indirect addressing for Float, Half-Float, "
1918                  "Double-Float and Quad-Word data must not be used");
1919      }
1920   }
1921
1922   /* The PRMs say that for BDW, SKL:
1923    *
1924    *    If Align16 is required for an operation with QW destination and non-QW
1925    *    source datatypes, the execution size cannot exceed 2.
1926    *
1927    * We assume that the restriction applies to all Gfx8+ parts.
1928    */
1929   if (is_double_precision && devinfo->ver >= 8) {
1930      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1931      enum brw_reg_type src1_type =
1932         num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
1933      unsigned src0_type_size = brw_reg_type_to_size(src0_type);
1934      unsigned src1_type_size = brw_reg_type_to_size(src1_type);
1935
1936      ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
1937               dst_type_size == 8 &&
1938               (src0_type_size != 8 || src1_type_size != 8) &&
1939               brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
1940               "In Align16 exec size cannot exceed 2 with a QWord destination "
1941               "and a non-QWord source");
1942   }
1943
1944   /* The PRMs say that for CHV, BXT:
1945    *
1946    *    When source or destination datatype is 64b or operation is integer
1947    *    DWord multiply, DepCtrl must not be used.
1948    *
1949    * We assume that the restriction applies to GLK as well.
1950    */
1951   if (is_double_precision &&
1952       (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
1953      ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
1954               brw_inst_no_dd_clear(devinfo, inst),
1955               "DepCtrl is not allowed when the execution type is 64-bit");
1956   }
1957
1958   return error_msg;
1959}
1960
1961static struct string
1962instruction_restrictions(const struct intel_device_info *devinfo,
1963                         const brw_inst *inst)
1964{
1965   struct string error_msg = { .str = NULL, .len = 0 };
1966
1967   /* From Wa_1604601757:
1968    *
1969    * "When multiplying a DW and any lower precision integer, source modifier
1970    *  is not supported."
1971    */
1972   if (devinfo->ver >= 12 &&
1973       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) {
1974      enum brw_reg_type exec_type = execution_type(devinfo, inst);
1975      const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||
1976         brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
1977         !(brw_inst_src0_negate(devinfo, inst) ||
1978           brw_inst_src0_abs(devinfo, inst));
1979      const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||
1980         brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
1981         !(brw_inst_src1_negate(devinfo, inst) ||
1982           brw_inst_src1_abs(devinfo, inst));
1983
1984      ERROR_IF(!brw_reg_type_is_floating_point(exec_type) &&
1985               type_sz(exec_type) == 4 && !(src0_valid && src1_valid),
1986               "When multiplying a DW and any lower precision integer, source "
1987               "modifier is not supported.");
1988   }
1989
1990   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMP ||
1991       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMPN) {
1992      if (devinfo->ver <= 7) {
1993         /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
1994          * ISA) says:
1995          *
1996          *    Accumulator cannot be destination, implicit or explicit. The
1997          *    destination must be a general register or the null register.
1998          *
1999          * Page 77 of the Haswell PRM Volume 2b contains the same text.  The
2000          * 965G PRMs contain similar text.
2001          *
2002          * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
2003          *
2004          *    For the cmp and cmpn instructions, remove the accumulator
2005          *    restrictions.
2006          */
2007         ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
2008                  brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
2009                  "Accumulator cannot be destination, implicit or explicit.");
2010      }
2011
2012      /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
2013       * says:
2014       *
2015       *    If the destination is the null register, the {Switch} instruction
2016       *    option must be used.
2017       *
2018       * Page 77 of the Haswell PRM Volume 2b contains the same text.
2019       */
2020      if (devinfo->ver == 7) {
2021         ERROR_IF(dst_is_null(devinfo, inst) &&
2022                  brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH,
2023                  "If the destination is the null register, the {Switch} "
2024                  "instruction option must be used.");
2025      }
2026   }
2027
2028   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
2029      unsigned math_function = brw_inst_math_function(devinfo, inst);
2030      switch (math_function) {
2031      case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
2032      case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
2033      case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: {
2034         /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
2035          *    INT DIV function does not support source modifiers.
2036          * Bspec 6647 extends it back to Ivy Bridge.
2037          */
2038         bool src0_valid = !brw_inst_src0_negate(devinfo, inst) &&
2039                           !brw_inst_src0_abs(devinfo, inst);
2040         bool src1_valid = !brw_inst_src1_negate(devinfo, inst) &&
2041                           !brw_inst_src1_abs(devinfo, inst);
2042         ERROR_IF(!src0_valid || !src1_valid,
2043                  "INT DIV function does not support source modifiers.");
2044         break;
2045      }
2046      default:
2047         break;
2048      }
2049   }
2050
2051   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DP4A) {
2052      /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says:
2053       *
2054       *    Only one of src0 or src1 operand may be an the (sic) accumulator
2055       *    register (acc#).
2056       */
2057      ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst),
2058               "Only one of src0 or src1 operand may be an accumulator "
2059               "register (acc#).");
2060
2061   }
2062
2063   return error_msg;
2064}
2065
2066static struct string
2067send_descriptor_restrictions(const struct intel_device_info *devinfo,
2068                             const brw_inst *inst)
2069{
2070   struct string error_msg = { .str = NULL, .len = 0 };
2071
2072   if (inst_is_split_send(devinfo, inst)) {
2073      /* We can only validate immediate descriptors */
2074      if (brw_inst_send_sel_reg32_desc(devinfo, inst))
2075         return error_msg;
2076   } else if (inst_is_send(devinfo, inst)) {
2077      /* We can only validate immediate descriptors */
2078      if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE)
2079         return error_msg;
2080   } else {
2081      return error_msg;
2082   }
2083
2084   const uint32_t desc = brw_inst_send_desc(devinfo, inst);
2085
2086   switch (brw_inst_sfid(devinfo, inst)) {
2087   case GFX12_SFID_TGM:
2088   case GFX12_SFID_SLM:
2089   case GFX12_SFID_UGM:
2090      ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC");
2091
2092      ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) &&
2093               lsc_msg_desc_transpose(devinfo, desc) &&
2094               brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1,
2095               "Transposed vectors are restricted to Exec_Mask = 1.");
2096      break;
2097
2098   default:
2099      break;
2100   }
2101
2102   return error_msg;
2103}
2104
2105bool
2106brw_validate_instruction(const struct intel_device_info *devinfo,
2107                         const brw_inst *inst, int offset,
2108                         struct disasm_info *disasm)
2109{
2110   struct string error_msg = { .str = NULL, .len = 0 };
2111
2112   if (is_unsupported_inst(devinfo, inst)) {
2113      ERROR("Instruction not supported on this Gen");
2114   } else {
2115      CHECK(invalid_values);
2116
2117      if (error_msg.str == NULL) {
2118         CHECK(sources_not_null);
2119         CHECK(send_restrictions);
2120         CHECK(alignment_supported);
2121         CHECK(general_restrictions_based_on_operand_types);
2122         CHECK(general_restrictions_on_region_parameters);
2123         CHECK(special_restrictions_for_mixed_float_mode);
2124         CHECK(region_alignment_rules);
2125         CHECK(vector_immediate_restrictions);
2126         CHECK(special_requirements_for_handling_double_precision_data_types);
2127         CHECK(instruction_restrictions);
2128         CHECK(send_descriptor_restrictions);
2129      }
2130   }
2131
2132   if (error_msg.str && disasm) {
2133      disasm_insert_error(disasm, offset, error_msg.str);
2134   }
2135   free(error_msg.str);
2136
2137   return error_msg.len == 0;
2138}
2139
2140bool
2141brw_validate_instructions(const struct intel_device_info *devinfo,
2142                          const void *assembly, int start_offset, int end_offset,
2143                          struct disasm_info *disasm)
2144{
2145   bool valid = true;
2146
2147   for (int src_offset = start_offset; src_offset < end_offset;) {
2148      const brw_inst *inst = assembly + src_offset;
2149      bool is_compact = brw_inst_cmpt_control(devinfo, inst);
2150      unsigned inst_size = is_compact ? sizeof(brw_compact_inst)
2151                                      : sizeof(brw_inst);
2152      brw_inst uncompacted;
2153
2154      if (is_compact) {
2155         brw_compact_inst *compacted = (void *)inst;
2156         brw_uncompact_instruction(devinfo, &uncompacted, compacted);
2157         inst = &uncompacted;
2158      }
2159
2160      bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm);
2161      valid = valid && v;
2162
2163      src_offset += inst_size;
2164   }
2165
2166   return valid;
2167}
2168