101e04c3fSmrg/*
29f464c52Smaya * Copyright © 2015-2019 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/** @file brw_eu_validate.c
2501e04c3fSmrg *
2601e04c3fSmrg * This file implements a pass that validates shader assembly.
279f464c52Smaya *
289f464c52Smaya * The restrictions implemented herein are intended to verify that instructions
299f464c52Smaya * in shader assembly do not violate restrictions documented in the graphics
309f464c52Smaya * programming reference manuals.
319f464c52Smaya *
329f464c52Smaya * The restrictions are difficult for humans to quickly verify due to their
339f464c52Smaya * complexity and abundance.
349f464c52Smaya *
359f464c52Smaya * It is critical that this code is thoroughly unit tested because false
369f464c52Smaya * results will lead developers astray, which is worse than having no validator
379f464c52Smaya * at all. Functional changes to this file without corresponding unit tests (in
389f464c52Smaya * test_eu_validate.cpp) will be rejected.
3901e04c3fSmrg */
4001e04c3fSmrg
417ec681f3Smrg#include <stdlib.h>
4201e04c3fSmrg#include "brw_eu.h"
4301e04c3fSmrg
4401e04c3fSmrg/* We're going to do lots of string concatenation, so this should help. */
4501e04c3fSmrgstruct string {
4601e04c3fSmrg   char *str;
4701e04c3fSmrg   size_t len;
4801e04c3fSmrg};
4901e04c3fSmrg
5001e04c3fSmrgstatic void
5101e04c3fSmrgcat(struct string *dest, const struct string src)
5201e04c3fSmrg{
5301e04c3fSmrg   dest->str = realloc(dest->str, dest->len + src.len + 1);
5401e04c3fSmrg   memcpy(dest->str + dest->len, src.str, src.len);
5501e04c3fSmrg   dest->str[dest->len + src.len] = '\0';
5601e04c3fSmrg   dest->len = dest->len + src.len;
5701e04c3fSmrg}
5801e04c3fSmrg#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
5901e04c3fSmrg
6001e04c3fSmrgstatic bool
6101e04c3fSmrgcontains(const struct string haystack, const struct string needle)
6201e04c3fSmrg{
6301e04c3fSmrg   return haystack.str && memmem(haystack.str, haystack.len,
6401e04c3fSmrg                                 needle.str, needle.len) != NULL;
6501e04c3fSmrg}
6601e04c3fSmrg#define CONTAINS(haystack, needle) \
6701e04c3fSmrg   contains(haystack, (struct string){needle, strlen(needle)})
6801e04c3fSmrg
6901e04c3fSmrg#define error(str)   "\tERROR: " str "\n"
7001e04c3fSmrg#define ERROR_INDENT "\t       "
7101e04c3fSmrg
7201e04c3fSmrg#define ERROR(msg) ERROR_IF(true, msg)
7301e04c3fSmrg#define ERROR_IF(cond, msg)                             \
7401e04c3fSmrg   do {                                                 \
7501e04c3fSmrg      if ((cond) && !CONTAINS(error_msg, error(msg))) { \
7601e04c3fSmrg         CAT(error_msg, error(msg));                    \
7701e04c3fSmrg      }                                                 \
7801e04c3fSmrg   } while(0)
7901e04c3fSmrg
8001e04c3fSmrg#define CHECK(func, args...)                             \
8101e04c3fSmrg   do {                                                  \
8201e04c3fSmrg      struct string __msg = func(devinfo, inst, ##args); \
8301e04c3fSmrg      if (__msg.str) {                                   \
8401e04c3fSmrg         cat(&error_msg, __msg);                         \
8501e04c3fSmrg         free(__msg.str);                                \
8601e04c3fSmrg      }                                                  \
8701e04c3fSmrg   } while (0)
8801e04c3fSmrg
8901e04c3fSmrg#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
9001e04c3fSmrg#define WIDTH(width)   (1 << (width))
9101e04c3fSmrg
9201e04c3fSmrgstatic bool
937ec681f3Smrginst_is_send(const struct intel_device_info *devinfo, const brw_inst *inst)
9401e04c3fSmrg{
9501e04c3fSmrg   switch (brw_inst_opcode(devinfo, inst)) {
9601e04c3fSmrg   case BRW_OPCODE_SEND:
9701e04c3fSmrg   case BRW_OPCODE_SENDC:
9801e04c3fSmrg   case BRW_OPCODE_SENDS:
9901e04c3fSmrg   case BRW_OPCODE_SENDSC:
10001e04c3fSmrg      return true;
10101e04c3fSmrg   default:
10201e04c3fSmrg      return false;
10301e04c3fSmrg   }
10401e04c3fSmrg}
10501e04c3fSmrg
1069f464c52Smayastatic bool
1077ec681f3Smrginst_is_split_send(const struct intel_device_info *devinfo,
1087ec681f3Smrg                   const brw_inst *inst)
1099f464c52Smaya{
1107ec681f3Smrg   if (devinfo->ver >= 12) {
1117ec681f3Smrg      return inst_is_send(devinfo, inst);
1127ec681f3Smrg   } else {
1137ec681f3Smrg      switch (brw_inst_opcode(devinfo, inst)) {
1147ec681f3Smrg      case BRW_OPCODE_SENDS:
1157ec681f3Smrg      case BRW_OPCODE_SENDSC:
1167ec681f3Smrg         return true;
1177ec681f3Smrg      default:
1187ec681f3Smrg         return false;
1197ec681f3Smrg      }
1209f464c52Smaya   }
1219f464c52Smaya}
1229f464c52Smaya
12301e04c3fSmrgstatic unsigned
12401e04c3fSmrgsigned_type(unsigned type)
12501e04c3fSmrg{
12601e04c3fSmrg   switch (type) {
12701e04c3fSmrg   case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
12801e04c3fSmrg   case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
12901e04c3fSmrg   case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
13001e04c3fSmrg   case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
13101e04c3fSmrg   default:                   return type;
13201e04c3fSmrg   }
13301e04c3fSmrg}
13401e04c3fSmrg
1357ec681f3Smrgstatic enum brw_reg_type
1367ec681f3Smrginst_dst_type(const struct intel_device_info *devinfo, const brw_inst *inst)
1377ec681f3Smrg{
1387ec681f3Smrg   return (devinfo->ver < 12 || !inst_is_send(devinfo, inst)) ?
1397ec681f3Smrg      brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D;
1407ec681f3Smrg}
1417ec681f3Smrg
14201e04c3fSmrgstatic bool
1437ec681f3Smrginst_is_raw_move(const struct intel_device_info *devinfo, const brw_inst *inst)
14401e04c3fSmrg{
1457ec681f3Smrg   unsigned dst_type = signed_type(inst_dst_type(devinfo, inst));
14601e04c3fSmrg   unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
14701e04c3fSmrg
14801e04c3fSmrg   if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
14901e04c3fSmrg      /* FIXME: not strictly true */
15001e04c3fSmrg      if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
15101e04c3fSmrg          brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
15201e04c3fSmrg          brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
15301e04c3fSmrg         return false;
15401e04c3fSmrg      }
15501e04c3fSmrg   } else if (brw_inst_src0_negate(devinfo, inst) ||
15601e04c3fSmrg              brw_inst_src0_abs(devinfo, inst)) {
15701e04c3fSmrg      return false;
15801e04c3fSmrg   }
15901e04c3fSmrg
16001e04c3fSmrg   return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
16101e04c3fSmrg          brw_inst_saturate(devinfo, inst) == 0 &&
16201e04c3fSmrg          dst_type == src_type;
16301e04c3fSmrg}
16401e04c3fSmrg
16501e04c3fSmrgstatic bool
1667ec681f3Smrgdst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
16701e04c3fSmrg{
16801e04c3fSmrg   return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
16901e04c3fSmrg          brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
17001e04c3fSmrg}
17101e04c3fSmrg
17201e04c3fSmrgstatic bool
1737ec681f3Smrgsrc0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
17401e04c3fSmrg{
1757ec681f3Smrg   return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&
1767ec681f3Smrg          brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
17701e04c3fSmrg          brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
17801e04c3fSmrg}
17901e04c3fSmrg
18001e04c3fSmrgstatic bool
1817ec681f3Smrgsrc1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
18201e04c3fSmrg{
18301e04c3fSmrg   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
18401e04c3fSmrg          brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
18501e04c3fSmrg}
18601e04c3fSmrg
1879f464c52Smayastatic bool
1887ec681f3Smrgsrc0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
1899f464c52Smaya{
1909f464c52Smaya   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
1919f464c52Smaya          (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
1929f464c52Smaya}
1939f464c52Smaya
1949f464c52Smayastatic bool
1957ec681f3Smrgsrc1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
1969f464c52Smaya{
1979f464c52Smaya   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
1989f464c52Smaya          (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
1999f464c52Smaya}
2009f464c52Smaya
20101e04c3fSmrgstatic bool
2027ec681f3Smrgsrc0_has_scalar_region(const struct intel_device_info *devinfo,
2037ec681f3Smrg                       const brw_inst *inst)
20401e04c3fSmrg{
20501e04c3fSmrg   return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
20601e04c3fSmrg          brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
20701e04c3fSmrg          brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
20801e04c3fSmrg}
20901e04c3fSmrg
21001e04c3fSmrgstatic bool
2117ec681f3Smrgsrc1_has_scalar_region(const struct intel_device_info *devinfo,
2127ec681f3Smrg                       const brw_inst *inst)
21301e04c3fSmrg{
21401e04c3fSmrg   return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
21501e04c3fSmrg          brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
21601e04c3fSmrg          brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
21701e04c3fSmrg}
21801e04c3fSmrg
21901e04c3fSmrgstatic unsigned
2207ec681f3Smrgnum_sources_from_inst(const struct intel_device_info *devinfo,
22101e04c3fSmrg                      const brw_inst *inst)
22201e04c3fSmrg{
22301e04c3fSmrg   const struct opcode_desc *desc =
22401e04c3fSmrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
22501e04c3fSmrg   unsigned math_function;
22601e04c3fSmrg
22701e04c3fSmrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
22801e04c3fSmrg      math_function = brw_inst_math_function(devinfo, inst);
2297ec681f3Smrg   } else if (devinfo->ver < 6 &&
23001e04c3fSmrg              brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
23101e04c3fSmrg      if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
23201e04c3fSmrg         /* src1 must be a descriptor (including the information to determine
23301e04c3fSmrg          * that the SEND is doing an extended math operation), but src0 can
23401e04c3fSmrg          * actually be null since it serves as the source of the implicit GRF
23501e04c3fSmrg          * to MRF move.
23601e04c3fSmrg          *
23701e04c3fSmrg          * If we stop using that functionality, we'll have to revisit this.
23801e04c3fSmrg          */
23901e04c3fSmrg         return 2;
24001e04c3fSmrg      } else {
24101e04c3fSmrg         /* Send instructions are allowed to have null sources since they use
24201e04c3fSmrg          * the base_mrf field to specify which message register source.
24301e04c3fSmrg          */
24401e04c3fSmrg         return 0;
24501e04c3fSmrg      }
24601e04c3fSmrg   } else {
24701e04c3fSmrg      assert(desc->nsrc < 4);
24801e04c3fSmrg      return desc->nsrc;
24901e04c3fSmrg   }
25001e04c3fSmrg
25101e04c3fSmrg   switch (math_function) {
25201e04c3fSmrg   case BRW_MATH_FUNCTION_INV:
25301e04c3fSmrg   case BRW_MATH_FUNCTION_LOG:
25401e04c3fSmrg   case BRW_MATH_FUNCTION_EXP:
25501e04c3fSmrg   case BRW_MATH_FUNCTION_SQRT:
25601e04c3fSmrg   case BRW_MATH_FUNCTION_RSQ:
25701e04c3fSmrg   case BRW_MATH_FUNCTION_SIN:
25801e04c3fSmrg   case BRW_MATH_FUNCTION_COS:
25901e04c3fSmrg   case BRW_MATH_FUNCTION_SINCOS:
2607ec681f3Smrg   case GFX8_MATH_FUNCTION_INVM:
2617ec681f3Smrg   case GFX8_MATH_FUNCTION_RSQRTM:
26201e04c3fSmrg      return 1;
26301e04c3fSmrg   case BRW_MATH_FUNCTION_FDIV:
26401e04c3fSmrg   case BRW_MATH_FUNCTION_POW:
26501e04c3fSmrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
26601e04c3fSmrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
26701e04c3fSmrg   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
26801e04c3fSmrg      return 2;
26901e04c3fSmrg   default:
27001e04c3fSmrg      unreachable("not reached");
27101e04c3fSmrg   }
27201e04c3fSmrg}
27301e04c3fSmrg
27401e04c3fSmrgstatic struct string
2757ec681f3Smrginvalid_values(const struct intel_device_info *devinfo, const brw_inst *inst)
2767ec681f3Smrg{
2777ec681f3Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
2787ec681f3Smrg   struct string error_msg = { .str = NULL, .len = 0 };
2797ec681f3Smrg
2807ec681f3Smrg   switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {
2817ec681f3Smrg   case BRW_EXECUTE_1:
2827ec681f3Smrg   case BRW_EXECUTE_2:
2837ec681f3Smrg   case BRW_EXECUTE_4:
2847ec681f3Smrg   case BRW_EXECUTE_8:
2857ec681f3Smrg   case BRW_EXECUTE_16:
2867ec681f3Smrg   case BRW_EXECUTE_32:
2877ec681f3Smrg      break;
2887ec681f3Smrg   default:
2897ec681f3Smrg      ERROR("invalid execution size");
2907ec681f3Smrg      break;
2917ec681f3Smrg   }
2927ec681f3Smrg
2937ec681f3Smrg   if (inst_is_send(devinfo, inst))
2947ec681f3Smrg      return error_msg;
2957ec681f3Smrg
2967ec681f3Smrg   if (num_sources == 3) {
2977ec681f3Smrg      /* Nothing to test:
2987ec681f3Smrg       *    No 3-src instructions on Gfx4-5
2997ec681f3Smrg       *    No reg file bits on Gfx6-10 (align16)
3007ec681f3Smrg       *    No invalid encodings on Gfx10-12 (align1)
3017ec681f3Smrg       */
3027ec681f3Smrg   } else {
3037ec681f3Smrg      if (devinfo->ver > 6) {
3047ec681f3Smrg         ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||
3057ec681f3Smrg                  (num_sources > 0 &&
3067ec681f3Smrg                   brw_inst_src0_reg_file(devinfo, inst) == MRF) ||
3077ec681f3Smrg                  (num_sources > 1 &&
3087ec681f3Smrg                   brw_inst_src1_reg_file(devinfo, inst) == MRF),
3097ec681f3Smrg                  "invalid register file encoding");
3107ec681f3Smrg      }
3117ec681f3Smrg   }
3127ec681f3Smrg
3137ec681f3Smrg   if (error_msg.str)
3147ec681f3Smrg      return error_msg;
3157ec681f3Smrg
3167ec681f3Smrg   if (num_sources == 3) {
3177ec681f3Smrg      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
3187ec681f3Smrg         if (devinfo->ver >= 10) {
3197ec681f3Smrg            ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
3207ec681f3Smrg                     brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE ||
3217ec681f3Smrg                     brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE ||
3227ec681f3Smrg                     brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE,
3237ec681f3Smrg                     "invalid register type encoding");
3247ec681f3Smrg         } else {
3257ec681f3Smrg            ERROR("Align1 mode not allowed on Gen < 10");
3267ec681f3Smrg         }
3277ec681f3Smrg      } else {
3287ec681f3Smrg         ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
3297ec681f3Smrg                  brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
3307ec681f3Smrg                  "invalid register type encoding");
3317ec681f3Smrg      }
3327ec681f3Smrg   } else {
3337ec681f3Smrg      ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
3347ec681f3Smrg               (num_sources > 0 &&
3357ec681f3Smrg                brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
3367ec681f3Smrg               (num_sources > 1 &&
3377ec681f3Smrg                brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
3387ec681f3Smrg               "invalid register type encoding");
3397ec681f3Smrg   }
3407ec681f3Smrg
3417ec681f3Smrg   return error_msg;
3427ec681f3Smrg}
3437ec681f3Smrg
3447ec681f3Smrgstatic struct string
3457ec681f3Smrgsources_not_null(const struct intel_device_info *devinfo,
34601e04c3fSmrg                 const brw_inst *inst)
34701e04c3fSmrg{
34801e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
34901e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
35001e04c3fSmrg
35101e04c3fSmrg   /* Nothing to test. 3-src instructions can only have GRF sources, and
35201e04c3fSmrg    * there's no bit to control the file.
35301e04c3fSmrg    */
35401e04c3fSmrg   if (num_sources == 3)
35501e04c3fSmrg      return (struct string){};
35601e04c3fSmrg
3579f464c52Smaya   /* Nothing to test.  Split sends can only encode a file in sources that are
3589f464c52Smaya    * allowed to be NULL.
3599f464c52Smaya    */
3609f464c52Smaya   if (inst_is_split_send(devinfo, inst))
3619f464c52Smaya      return (struct string){};
3629f464c52Smaya
3637ec681f3Smrg   if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC)
36401e04c3fSmrg      ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
36501e04c3fSmrg
36601e04c3fSmrg   if (num_sources == 2)
36701e04c3fSmrg      ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
36801e04c3fSmrg
36901e04c3fSmrg   return error_msg;
37001e04c3fSmrg}
37101e04c3fSmrg
3729f464c52Smayastatic struct string
3737ec681f3Smrgalignment_supported(const struct intel_device_info *devinfo,
3749f464c52Smaya                    const brw_inst *inst)
3759f464c52Smaya{
3769f464c52Smaya   struct string error_msg = { .str = NULL, .len = 0 };
3779f464c52Smaya
3787ec681f3Smrg   ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
3799f464c52Smaya            "Align16 not supported");
3809f464c52Smaya
3819f464c52Smaya   return error_msg;
3829f464c52Smaya}
3839f464c52Smaya
3849f464c52Smayastatic bool
3857ec681f3Smrginst_uses_src_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
3869f464c52Smaya{
3879f464c52Smaya   /* Check instructions that use implicit accumulator sources */
3889f464c52Smaya   switch (brw_inst_opcode(devinfo, inst)) {
3899f464c52Smaya   case BRW_OPCODE_MAC:
3909f464c52Smaya   case BRW_OPCODE_MACH:
3919f464c52Smaya   case BRW_OPCODE_SADA2:
3929f464c52Smaya      return true;
3937ec681f3Smrg   default:
3947ec681f3Smrg      break;
3959f464c52Smaya   }
3969f464c52Smaya
3979f464c52Smaya   /* FIXME: support 3-src instructions */
3989f464c52Smaya   unsigned num_sources = num_sources_from_inst(devinfo, inst);
3999f464c52Smaya   assert(num_sources < 3);
4009f464c52Smaya
4019f464c52Smaya   return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
4029f464c52Smaya}
4039f464c52Smaya
40401e04c3fSmrgstatic struct string
4057ec681f3Smrgsend_restrictions(const struct intel_device_info *devinfo,
40601e04c3fSmrg                  const brw_inst *inst)
40701e04c3fSmrg{
40801e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
40901e04c3fSmrg
4109f464c52Smaya   if (inst_is_split_send(devinfo, inst)) {
4119f464c52Smaya      ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
4129f464c52Smaya               brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
4139f464c52Smaya               "src1 of split send must be a GRF or NULL");
4149f464c52Smaya
4159f464c52Smaya      ERROR_IF(brw_inst_eot(devinfo, inst) &&
4169f464c52Smaya               brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
4179f464c52Smaya               "send with EOT must use g112-g127");
4189f464c52Smaya      ERROR_IF(brw_inst_eot(devinfo, inst) &&
4199f464c52Smaya               brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
4209f464c52Smaya               brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
4219f464c52Smaya               "send with EOT must use g112-g127");
4229f464c52Smaya
4239f464c52Smaya      if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
4249f464c52Smaya         /* Assume minimums if we don't know */
4259f464c52Smaya         unsigned mlen = 1;
4269f464c52Smaya         if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
4279f464c52Smaya            const uint32_t desc = brw_inst_send_desc(devinfo, inst);
4289f464c52Smaya            mlen = brw_message_desc_mlen(devinfo, desc);
4299f464c52Smaya         }
4309f464c52Smaya
4319f464c52Smaya         unsigned ex_mlen = 1;
4329f464c52Smaya         if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
4337ec681f3Smrg            const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);
4349f464c52Smaya            ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
4359f464c52Smaya         }
4369f464c52Smaya         const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
4379f464c52Smaya         const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
4389f464c52Smaya         ERROR_IF((src0_reg_nr <= src1_reg_nr &&
4399f464c52Smaya                   src1_reg_nr < src0_reg_nr + mlen) ||
4409f464c52Smaya                  (src1_reg_nr <= src0_reg_nr &&
4419f464c52Smaya                   src0_reg_nr < src1_reg_nr + ex_mlen),
4429f464c52Smaya                   "split send payloads must not overlap");
4439f464c52Smaya      }
4449f464c52Smaya   } else if (inst_is_send(devinfo, inst)) {
44501e04c3fSmrg      ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
44601e04c3fSmrg               "send must use direct addressing");
44701e04c3fSmrg
4487ec681f3Smrg      if (devinfo->ver >= 7) {
4497ec681f3Smrg         ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,
4507ec681f3Smrg                  "send from non-GRF");
45101e04c3fSmrg         ERROR_IF(brw_inst_eot(devinfo, inst) &&
45201e04c3fSmrg                  brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
45301e04c3fSmrg                  "send with EOT must use g112-g127");
45401e04c3fSmrg      }
45501e04c3fSmrg
4567ec681f3Smrg      if (devinfo->ver >= 8) {
45701e04c3fSmrg         ERROR_IF(!dst_is_null(devinfo, inst) &&
45801e04c3fSmrg                  (brw_inst_dst_da_reg_nr(devinfo, inst) +
45901e04c3fSmrg                   brw_inst_rlen(devinfo, inst) > 127) &&
46001e04c3fSmrg                  (brw_inst_src0_da_reg_nr(devinfo, inst) +
46101e04c3fSmrg                   brw_inst_mlen(devinfo, inst) >
46201e04c3fSmrg                   brw_inst_dst_da_reg_nr(devinfo, inst)),
46301e04c3fSmrg                  "r127 must not be used for return address when there is "
46401e04c3fSmrg                  "a src and dest overlap");
46501e04c3fSmrg      }
46601e04c3fSmrg   }
46701e04c3fSmrg
46801e04c3fSmrg   return error_msg;
46901e04c3fSmrg}
47001e04c3fSmrg
47101e04c3fSmrgstatic bool
4727ec681f3Smrgis_unsupported_inst(const struct intel_device_info *devinfo,
47301e04c3fSmrg                    const brw_inst *inst)
47401e04c3fSmrg{
4757ec681f3Smrg   return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL;
47601e04c3fSmrg}
47701e04c3fSmrg
4789f464c52Smaya/**
4799f464c52Smaya * Returns whether a combination of two types would qualify as mixed float
4809f464c52Smaya * operation mode
4819f464c52Smaya */
4829f464c52Smayastatic inline bool
4839f464c52Smayatypes_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
4849f464c52Smaya{
4859f464c52Smaya   return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||
4869f464c52Smaya          (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);
4879f464c52Smaya}
4889f464c52Smaya
48901e04c3fSmrgstatic enum brw_reg_type
49001e04c3fSmrgexecution_type_for_type(enum brw_reg_type type)
49101e04c3fSmrg{
49201e04c3fSmrg   switch (type) {
49301e04c3fSmrg   case BRW_REGISTER_TYPE_NF:
49401e04c3fSmrg   case BRW_REGISTER_TYPE_DF:
49501e04c3fSmrg   case BRW_REGISTER_TYPE_F:
49601e04c3fSmrg   case BRW_REGISTER_TYPE_HF:
49701e04c3fSmrg      return type;
49801e04c3fSmrg
49901e04c3fSmrg   case BRW_REGISTER_TYPE_VF:
50001e04c3fSmrg      return BRW_REGISTER_TYPE_F;
50101e04c3fSmrg
50201e04c3fSmrg   case BRW_REGISTER_TYPE_Q:
50301e04c3fSmrg   case BRW_REGISTER_TYPE_UQ:
50401e04c3fSmrg      return BRW_REGISTER_TYPE_Q;
50501e04c3fSmrg
50601e04c3fSmrg   case BRW_REGISTER_TYPE_D:
50701e04c3fSmrg   case BRW_REGISTER_TYPE_UD:
50801e04c3fSmrg      return BRW_REGISTER_TYPE_D;
50901e04c3fSmrg
51001e04c3fSmrg   case BRW_REGISTER_TYPE_W:
51101e04c3fSmrg   case BRW_REGISTER_TYPE_UW:
51201e04c3fSmrg   case BRW_REGISTER_TYPE_B:
51301e04c3fSmrg   case BRW_REGISTER_TYPE_UB:
51401e04c3fSmrg   case BRW_REGISTER_TYPE_V:
51501e04c3fSmrg   case BRW_REGISTER_TYPE_UV:
51601e04c3fSmrg      return BRW_REGISTER_TYPE_W;
51701e04c3fSmrg   }
51801e04c3fSmrg   unreachable("not reached");
51901e04c3fSmrg}
52001e04c3fSmrg
52101e04c3fSmrg/**
52201e04c3fSmrg * Returns the execution type of an instruction \p inst
52301e04c3fSmrg */
52401e04c3fSmrgstatic enum brw_reg_type
5257ec681f3Smrgexecution_type(const struct intel_device_info *devinfo, const brw_inst *inst)
52601e04c3fSmrg{
52701e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
52801e04c3fSmrg   enum brw_reg_type src0_exec_type, src1_exec_type;
52901e04c3fSmrg
53001e04c3fSmrg   /* Execution data type is independent of destination data type, except in
5319f464c52Smaya    * mixed F/HF instructions.
53201e04c3fSmrg    */
5337ec681f3Smrg   enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst);
53401e04c3fSmrg
53501e04c3fSmrg   src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
53601e04c3fSmrg   if (num_sources == 1) {
5379f464c52Smaya      if (src0_exec_type == BRW_REGISTER_TYPE_HF)
53801e04c3fSmrg         return dst_exec_type;
53901e04c3fSmrg      return src0_exec_type;
54001e04c3fSmrg   }
54101e04c3fSmrg
54201e04c3fSmrg   src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
5439f464c52Smaya   if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
5449f464c52Smaya       types_are_mixed_float(src0_exec_type, dst_exec_type) ||
5459f464c52Smaya       types_are_mixed_float(src1_exec_type, dst_exec_type)) {
5469f464c52Smaya      return BRW_REGISTER_TYPE_F;
5479f464c52Smaya   }
5489f464c52Smaya
54901e04c3fSmrg   if (src0_exec_type == src1_exec_type)
55001e04c3fSmrg      return src0_exec_type;
55101e04c3fSmrg
5527ec681f3Smrg   if (src0_exec_type == BRW_REGISTER_TYPE_NF ||
5537ec681f3Smrg       src1_exec_type == BRW_REGISTER_TYPE_NF)
5547ec681f3Smrg      return BRW_REGISTER_TYPE_NF;
5557ec681f3Smrg
55601e04c3fSmrg   /* Mixed operand types where one is float is float on Gen < 6
55701e04c3fSmrg    * (and not allowed on later platforms)
55801e04c3fSmrg    */
5597ec681f3Smrg   if (devinfo->ver < 6 &&
56001e04c3fSmrg       (src0_exec_type == BRW_REGISTER_TYPE_F ||
56101e04c3fSmrg        src1_exec_type == BRW_REGISTER_TYPE_F))
56201e04c3fSmrg      return BRW_REGISTER_TYPE_F;
56301e04c3fSmrg
56401e04c3fSmrg   if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
56501e04c3fSmrg       src1_exec_type == BRW_REGISTER_TYPE_Q)
56601e04c3fSmrg      return BRW_REGISTER_TYPE_Q;
56701e04c3fSmrg
56801e04c3fSmrg   if (src0_exec_type == BRW_REGISTER_TYPE_D ||
56901e04c3fSmrg       src1_exec_type == BRW_REGISTER_TYPE_D)
57001e04c3fSmrg      return BRW_REGISTER_TYPE_D;
57101e04c3fSmrg
57201e04c3fSmrg   if (src0_exec_type == BRW_REGISTER_TYPE_W ||
57301e04c3fSmrg       src1_exec_type == BRW_REGISTER_TYPE_W)
57401e04c3fSmrg      return BRW_REGISTER_TYPE_W;
57501e04c3fSmrg
57601e04c3fSmrg   if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
57701e04c3fSmrg       src1_exec_type == BRW_REGISTER_TYPE_DF)
57801e04c3fSmrg      return BRW_REGISTER_TYPE_DF;
57901e04c3fSmrg
5809f464c52Smaya   unreachable("not reached");
58101e04c3fSmrg}
58201e04c3fSmrg
58301e04c3fSmrg/**
58401e04c3fSmrg * Returns whether a region is packed
58501e04c3fSmrg *
58601e04c3fSmrg * A region is packed if its elements are adjacent in memory, with no
58701e04c3fSmrg * intervening space, no overlap, and no replicated values.
58801e04c3fSmrg */
58901e04c3fSmrgstatic bool
59001e04c3fSmrgis_packed(unsigned vstride, unsigned width, unsigned hstride)
59101e04c3fSmrg{
59201e04c3fSmrg   if (vstride == width) {
59301e04c3fSmrg      if (vstride == 1) {
59401e04c3fSmrg         return hstride == 0;
59501e04c3fSmrg      } else {
59601e04c3fSmrg         return hstride == 1;
59701e04c3fSmrg      }
59801e04c3fSmrg   }
59901e04c3fSmrg
60001e04c3fSmrg   return false;
60101e04c3fSmrg}
60201e04c3fSmrg
6039f464c52Smaya/**
6049f464c52Smaya * Returns whether an instruction is an explicit or implicit conversion
6059f464c52Smaya * to/from half-float.
6069f464c52Smaya */
6079f464c52Smayastatic bool
6087ec681f3Smrgis_half_float_conversion(const struct intel_device_info *devinfo,
6099f464c52Smaya                         const brw_inst *inst)
6109f464c52Smaya{
6119f464c52Smaya   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
6129f464c52Smaya
6139f464c52Smaya   unsigned num_sources = num_sources_from_inst(devinfo, inst);
6149f464c52Smaya   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
6159f464c52Smaya
6169f464c52Smaya   if (dst_type != src0_type &&
6179f464c52Smaya       (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
6189f464c52Smaya      return true;
6199f464c52Smaya   } else if (num_sources > 1) {
6209f464c52Smaya      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
6219f464c52Smaya      return dst_type != src1_type &&
6229f464c52Smaya            (dst_type == BRW_REGISTER_TYPE_HF ||
6239f464c52Smaya             src1_type == BRW_REGISTER_TYPE_HF);
6249f464c52Smaya   }
6259f464c52Smaya
6269f464c52Smaya   return false;
6279f464c52Smaya}
6289f464c52Smaya
6299f464c52Smaya/*
6309f464c52Smaya * Returns whether an instruction is using mixed float operation mode
6319f464c52Smaya */
6329f464c52Smayastatic bool
6337ec681f3Smrgis_mixed_float(const struct intel_device_info *devinfo, const brw_inst *inst)
6349f464c52Smaya{
6357ec681f3Smrg   if (devinfo->ver < 8)
6369f464c52Smaya      return false;
6379f464c52Smaya
6389f464c52Smaya   if (inst_is_send(devinfo, inst))
6399f464c52Smaya      return false;
6409f464c52Smaya
6419f464c52Smaya   unsigned opcode = brw_inst_opcode(devinfo, inst);
6429f464c52Smaya   const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
6439f464c52Smaya   if (desc->ndst == 0)
6449f464c52Smaya      return false;
6459f464c52Smaya
6469f464c52Smaya   /* FIXME: support 3-src instructions */
6479f464c52Smaya   unsigned num_sources = num_sources_from_inst(devinfo, inst);
6489f464c52Smaya   assert(num_sources < 3);
6499f464c52Smaya
6509f464c52Smaya   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
6519f464c52Smaya   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
6529f464c52Smaya
6539f464c52Smaya   if (num_sources == 1)
6549f464c52Smaya      return types_are_mixed_float(src0_type, dst_type);
6559f464c52Smaya
6569f464c52Smaya   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
6579f464c52Smaya
6589f464c52Smaya   return types_are_mixed_float(src0_type, src1_type) ||
6599f464c52Smaya          types_are_mixed_float(src0_type, dst_type) ||
6609f464c52Smaya          types_are_mixed_float(src1_type, dst_type);
6619f464c52Smaya}
6629f464c52Smaya
6639f464c52Smaya/**
6649f464c52Smaya * Returns whether an instruction is an explicit or implicit conversion
6659f464c52Smaya * to/from byte.
6669f464c52Smaya */
6679f464c52Smayastatic bool
6687ec681f3Smrgis_byte_conversion(const struct intel_device_info *devinfo,
6699f464c52Smaya                   const brw_inst *inst)
6709f464c52Smaya{
6719f464c52Smaya   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
6729f464c52Smaya
6739f464c52Smaya   unsigned num_sources = num_sources_from_inst(devinfo, inst);
6749f464c52Smaya   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
6759f464c52Smaya
6769f464c52Smaya   if (dst_type != src0_type &&
6779f464c52Smaya       (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
6789f464c52Smaya      return true;
6799f464c52Smaya   } else if (num_sources > 1) {
6809f464c52Smaya      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
6819f464c52Smaya      return dst_type != src1_type &&
6829f464c52Smaya            (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
6839f464c52Smaya   }
6849f464c52Smaya
6859f464c52Smaya   return false;
6869f464c52Smaya}
6879f464c52Smaya
68801e04c3fSmrg/**
68901e04c3fSmrg * Checks restrictions listed in "General Restrictions Based on Operand Types"
69001e04c3fSmrg * in the "Register Region Restrictions" section.
69101e04c3fSmrg */
69201e04c3fSmrgstatic struct string
6937ec681f3Smrggeneral_restrictions_based_on_operand_types(const struct intel_device_info *devinfo,
69401e04c3fSmrg                                            const brw_inst *inst)
69501e04c3fSmrg{
69601e04c3fSmrg   const struct opcode_desc *desc =
69701e04c3fSmrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
69801e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
69901e04c3fSmrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
70001e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
70101e04c3fSmrg
7027ec681f3Smrg   if (inst_is_send(devinfo, inst))
7037ec681f3Smrg      return error_msg;
7047ec681f3Smrg
7057ec681f3Smrg   if (devinfo->ver >= 11) {
7069f464c52Smaya      if (num_sources == 3) {
7079f464c52Smaya         ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
7089f464c52Smaya                  brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
7099f464c52Smaya                  "Byte data type is not supported for src1/2 register regioning. This includes "
7109f464c52Smaya                  "byte broadcast as well.");
7119f464c52Smaya      }
7129f464c52Smaya      if (num_sources == 2) {
7139f464c52Smaya         ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
7149f464c52Smaya                  "Byte data type is not supported for src1 register regioning. This includes "
7159f464c52Smaya                  "byte broadcast as well.");
7169f464c52Smaya      }
7179f464c52Smaya   }
7189f464c52Smaya
71901e04c3fSmrg   if (num_sources == 3)
7209f464c52Smaya      return error_msg;
72101e04c3fSmrg
72201e04c3fSmrg   if (exec_size == 1)
7239f464c52Smaya      return error_msg;
72401e04c3fSmrg
72501e04c3fSmrg   if (desc->ndst == 0)
7269f464c52Smaya      return error_msg;
72701e04c3fSmrg
72801e04c3fSmrg   /* The PRMs say:
72901e04c3fSmrg    *
73001e04c3fSmrg    *    Where n is the largest element size in bytes for any source or
73101e04c3fSmrg    *    destination operand type, ExecSize * n must be <= 64.
73201e04c3fSmrg    *
73301e04c3fSmrg    * But we do not attempt to enforce it, because it is implied by other
73401e04c3fSmrg    * rules:
73501e04c3fSmrg    *
73601e04c3fSmrg    *    - that the destination stride must match the execution data type
73701e04c3fSmrg    *    - sources may not span more than two adjacent GRF registers
73801e04c3fSmrg    *    - destination may not span more than two adjacent GRF registers
73901e04c3fSmrg    *
74001e04c3fSmrg    * In fact, checking it would weaken testing of the other rules.
74101e04c3fSmrg    */
74201e04c3fSmrg
74301e04c3fSmrg   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
7447ec681f3Smrg   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
74501e04c3fSmrg   bool dst_type_is_byte =
7467ec681f3Smrg      inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B ||
7477ec681f3Smrg      inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB;
74801e04c3fSmrg
74901e04c3fSmrg   if (dst_type_is_byte) {
75001e04c3fSmrg      if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
7519f464c52Smaya         if (!inst_is_raw_move(devinfo, inst))
75201e04c3fSmrg            ERROR("Only raw MOV supports a packed-byte destination");
7539f464c52Smaya         return error_msg;
75401e04c3fSmrg      }
75501e04c3fSmrg   }
75601e04c3fSmrg
75701e04c3fSmrg   unsigned exec_type = execution_type(devinfo, inst);
75801e04c3fSmrg   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
75901e04c3fSmrg   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
76001e04c3fSmrg
76101e04c3fSmrg   /* On IVB/BYT, region parameters and execution size for DF are in terms of
76201e04c3fSmrg    * 32-bit elements, so they are doubled. For evaluating the validity of an
76301e04c3fSmrg    * instruction, we halve them.
76401e04c3fSmrg    */
7657ec681f3Smrg   if (devinfo->verx10 == 70 &&
76601e04c3fSmrg       exec_type_size == 8 && dst_type_size == 4)
76701e04c3fSmrg      dst_type_size = 8;
76801e04c3fSmrg
7699f464c52Smaya   if (is_byte_conversion(devinfo, inst)) {
7709f464c52Smaya      /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
7719f464c52Smaya       *
7729f464c52Smaya       *    "There is no direct conversion from B/UB to DF or DF to B/UB.
7739f464c52Smaya       *     There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
7749f464c52Smaya       *
7759f464c52Smaya       * Even if these restrictions are listed for the MOV instruction, we
7769f464c52Smaya       * validate this more generally, since there is the possibility
7779f464c52Smaya       * of implicit conversions from other instructions.
7789f464c52Smaya       */
7799f464c52Smaya      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
7809f464c52Smaya      enum brw_reg_type src1_type = num_sources > 1 ?
7819f464c52Smaya                                    brw_inst_src1_type(devinfo, inst) : 0;
7829f464c52Smaya
7839f464c52Smaya      ERROR_IF(type_sz(dst_type) == 1 &&
7849f464c52Smaya               (type_sz(src0_type) == 8 ||
7859f464c52Smaya                (num_sources > 1 && type_sz(src1_type) == 8)),
7869f464c52Smaya               "There are no direct conversions between 64-bit types and B/UB");
7879f464c52Smaya
7889f464c52Smaya      ERROR_IF(type_sz(dst_type) == 8 &&
7899f464c52Smaya               (type_sz(src0_type) == 1 ||
7909f464c52Smaya                (num_sources > 1 && type_sz(src1_type) == 1)),
7919f464c52Smaya               "There are no direct conversions between 64-bit types and B/UB");
7929f464c52Smaya   }
7939f464c52Smaya
7949f464c52Smaya   if (is_half_float_conversion(devinfo, inst)) {
7959f464c52Smaya      /**
7969f464c52Smaya       * A helper to validate used in the validation of the following restriction
7979f464c52Smaya       * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
7989f464c52Smaya       *
7999f464c52Smaya       *    "There is no direct conversion from HF to DF or DF to HF.
8009f464c52Smaya       *     There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
8019f464c52Smaya       *
8029f464c52Smaya       * Even if these restrictions are listed for the MOV instruction, we
8039f464c52Smaya       * validate this more generally, since there is the possibility
8049f464c52Smaya       * of implicit conversions from other instructions, such us implicit
8059f464c52Smaya       * conversion from integer to HF with the ADD instruction in SKL+.
8069f464c52Smaya       */
8079f464c52Smaya      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
8089f464c52Smaya      enum brw_reg_type src1_type = num_sources > 1 ?
8099f464c52Smaya                                    brw_inst_src1_type(devinfo, inst) : 0;
8109f464c52Smaya      ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
8119f464c52Smaya               (type_sz(src0_type) == 8 ||
8129f464c52Smaya                (num_sources > 1 && type_sz(src1_type) == 8)),
8139f464c52Smaya               "There are no direct conversions between 64-bit types and HF");
8149f464c52Smaya
8159f464c52Smaya      ERROR_IF(type_sz(dst_type) == 8 &&
8169f464c52Smaya               (src0_type == BRW_REGISTER_TYPE_HF ||
8179f464c52Smaya                (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
8189f464c52Smaya               "There are no direct conversions between 64-bit types and HF");
8199f464c52Smaya
8209f464c52Smaya      /* From the BDW+ PRM:
8219f464c52Smaya       *
8229f464c52Smaya       *   "Conversion between Integer and HF (Half Float) must be
8239f464c52Smaya       *    DWord-aligned and strided by a DWord on the destination."
8249f464c52Smaya       *
8259f464c52Smaya       * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
8269f464c52Smaya       *
8279f464c52Smaya       *   "There is a relaxed alignment rule for word destinations. When
8289f464c52Smaya       *    the destination type is word (UW, W, HF), destination data types
8299f464c52Smaya       *    can be aligned to either the lowest word or the second lowest
8309f464c52Smaya       *    word of the execution channel. This means the destination data
8319f464c52Smaya       *    words can be either all in the even word locations or all in the
8329f464c52Smaya       *    odd word locations."
8339f464c52Smaya       *
8349f464c52Smaya       * We do not implement the second rule as is though, since empirical
8359f464c52Smaya       * testing shows inconsistencies:
8369f464c52Smaya       *   - It suggests that packed 16-bit is not allowed, which is not true.
8379f464c52Smaya       *   - It suggests that conversions from Q/DF to W (which need to be
8389f464c52Smaya       *     64-bit aligned on the destination) are not possible, which is
8399f464c52Smaya       *     not true.
8409f464c52Smaya       *
8419f464c52Smaya       * So from this rule we only validate the implication that conversions
8429f464c52Smaya       * from F to HF need to be DWord strided (except in Align1 mixed
8439f464c52Smaya       * float mode where packed fp16 destination is allowed so long as the
8449f464c52Smaya       * destination is oword-aligned).
8459f464c52Smaya       *
8469f464c52Smaya       * Finally, we only validate this for Align1 because Align16 always
8479f464c52Smaya       * requires packed destinations, so these restrictions can't possibly
8489f464c52Smaya       * apply to Align16 mode.
8499f464c52Smaya       */
8509f464c52Smaya      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
8519f464c52Smaya         if ((dst_type == BRW_REGISTER_TYPE_HF &&
8529f464c52Smaya              (brw_reg_type_is_integer(src0_type) ||
8539f464c52Smaya               (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
8549f464c52Smaya             (brw_reg_type_is_integer(dst_type) &&
8559f464c52Smaya              (src0_type == BRW_REGISTER_TYPE_HF ||
8569f464c52Smaya               (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
8579f464c52Smaya            ERROR_IF(dst_stride * dst_type_size != 4,
8589f464c52Smaya                     "Conversions between integer and half-float must be "
8599f464c52Smaya                     "strided by a DWord on the destination");
8609f464c52Smaya
8619f464c52Smaya            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
8629f464c52Smaya            ERROR_IF(subreg % 4 != 0,
8639f464c52Smaya                     "Conversions between integer and half-float must be "
8649f464c52Smaya                     "aligned to a DWord on the destination");
8657ec681f3Smrg         } else if ((devinfo->is_cherryview || devinfo->ver >= 9) &&
8669f464c52Smaya                    dst_type == BRW_REGISTER_TYPE_HF) {
8679f464c52Smaya            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
8689f464c52Smaya            ERROR_IF(dst_stride != 2 &&
8699f464c52Smaya                     !(is_mixed_float(devinfo, inst) &&
8709f464c52Smaya                       dst_stride == 1 && subreg % 16 == 0),
8719f464c52Smaya                     "Conversions to HF must have either all words in even "
8729f464c52Smaya                     "word locations or all words in odd word locations or "
8739f464c52Smaya                     "be mixed-float with Oword-aligned packed destination");
8749f464c52Smaya         }
8759f464c52Smaya      }
8769f464c52Smaya   }
8779f464c52Smaya
8789f464c52Smaya   /* There are special regioning rules for mixed-float mode in CHV and SKL that
8799f464c52Smaya    * override the general rule for the ratio of sizes of the destination type
8809f464c52Smaya    * and the execution type. We will add validation for those in a later patch.
8819f464c52Smaya    */
8829f464c52Smaya   bool validate_dst_size_and_exec_size_ratio =
8839f464c52Smaya      !is_mixed_float(devinfo, inst) ||
8847ec681f3Smrg      !(devinfo->is_cherryview || devinfo->ver >= 9);
8859f464c52Smaya
8869f464c52Smaya   if (validate_dst_size_and_exec_size_ratio &&
8879f464c52Smaya       exec_type_size > dst_type_size) {
88801e04c3fSmrg      if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
88901e04c3fSmrg         ERROR_IF(dst_stride * dst_type_size != exec_type_size,
89001e04c3fSmrg                  "Destination stride must be equal to the ratio of the sizes "
89101e04c3fSmrg                  "of the execution data type to the destination type");
89201e04c3fSmrg      }
89301e04c3fSmrg
89401e04c3fSmrg      unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
89501e04c3fSmrg
89601e04c3fSmrg      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
89701e04c3fSmrg          brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
89801e04c3fSmrg         /* The i965 PRM says:
89901e04c3fSmrg          *
90001e04c3fSmrg          *    Implementation Restriction: The relaxed alignment rule for byte
90101e04c3fSmrg          *    destination (#10.5) is not supported.
90201e04c3fSmrg          */
9037ec681f3Smrg         if ((devinfo->ver > 4 || devinfo->is_g4x) && dst_type_is_byte) {
90401e04c3fSmrg            ERROR_IF(subreg % exec_type_size != 0 &&
90501e04c3fSmrg                     subreg % exec_type_size != 1,
90601e04c3fSmrg                     "Destination subreg must be aligned to the size of the "
90701e04c3fSmrg                     "execution data type (or to the next lowest byte for byte "
90801e04c3fSmrg                     "destinations)");
90901e04c3fSmrg         } else {
91001e04c3fSmrg            ERROR_IF(subreg % exec_type_size != 0,
91101e04c3fSmrg                     "Destination subreg must be aligned to the size of the "
91201e04c3fSmrg                     "execution data type");
91301e04c3fSmrg         }
91401e04c3fSmrg      }
91501e04c3fSmrg   }
91601e04c3fSmrg
91701e04c3fSmrg   return error_msg;
91801e04c3fSmrg}
91901e04c3fSmrg
92001e04c3fSmrg/**
92101e04c3fSmrg * Checks restrictions listed in "General Restrictions on Regioning Parameters"
92201e04c3fSmrg * in the "Register Region Restrictions" section.
92301e04c3fSmrg */
92401e04c3fSmrgstatic struct string
9257ec681f3Smrggeneral_restrictions_on_region_parameters(const struct intel_device_info *devinfo,
92601e04c3fSmrg                                          const brw_inst *inst)
92701e04c3fSmrg{
92801e04c3fSmrg   const struct opcode_desc *desc =
92901e04c3fSmrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
93001e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
93101e04c3fSmrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
93201e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
93301e04c3fSmrg
93401e04c3fSmrg   if (num_sources == 3)
93501e04c3fSmrg      return (struct string){};
93601e04c3fSmrg
9379f464c52Smaya   /* Split sends don't have the bits in the instruction to encode regions so
9389f464c52Smaya    * there's nothing to check.
9399f464c52Smaya    */
9409f464c52Smaya   if (inst_is_split_send(devinfo, inst))
9419f464c52Smaya      return (struct string){};
9429f464c52Smaya
94301e04c3fSmrg   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
94401e04c3fSmrg      if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
94501e04c3fSmrg         ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
94601e04c3fSmrg                  "Destination Horizontal Stride must be 1");
94701e04c3fSmrg
94801e04c3fSmrg      if (num_sources >= 1) {
9497ec681f3Smrg         if (devinfo->verx10 >= 75) {
95001e04c3fSmrg            ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
95101e04c3fSmrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
95201e04c3fSmrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
95301e04c3fSmrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
95401e04c3fSmrg                     "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
95501e04c3fSmrg         } else {
95601e04c3fSmrg            ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
95701e04c3fSmrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
95801e04c3fSmrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
95901e04c3fSmrg                     "In Align16 mode, only VertStride of 0 or 4 is allowed");
96001e04c3fSmrg         }
96101e04c3fSmrg      }
96201e04c3fSmrg
96301e04c3fSmrg      if (num_sources == 2) {
9647ec681f3Smrg         if (devinfo->verx10 >= 75) {
96501e04c3fSmrg            ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
96601e04c3fSmrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
96701e04c3fSmrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
96801e04c3fSmrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
96901e04c3fSmrg                     "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
97001e04c3fSmrg         } else {
97101e04c3fSmrg            ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
97201e04c3fSmrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
97301e04c3fSmrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
97401e04c3fSmrg                     "In Align16 mode, only VertStride of 0 or 4 is allowed");
97501e04c3fSmrg         }
97601e04c3fSmrg      }
97701e04c3fSmrg
97801e04c3fSmrg      return error_msg;
97901e04c3fSmrg   }
98001e04c3fSmrg
98101e04c3fSmrg   for (unsigned i = 0; i < num_sources; i++) {
98201e04c3fSmrg      unsigned vstride, width, hstride, element_size, subreg;
98301e04c3fSmrg      enum brw_reg_type type;
98401e04c3fSmrg
98501e04c3fSmrg#define DO_SRC(n)                                                              \
98601e04c3fSmrg      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
98701e04c3fSmrg          BRW_IMMEDIATE_VALUE)                                                 \
98801e04c3fSmrg         continue;                                                             \
98901e04c3fSmrg                                                                               \
99001e04c3fSmrg      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
99101e04c3fSmrg      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
99201e04c3fSmrg      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
99301e04c3fSmrg      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
99401e04c3fSmrg      element_size = brw_reg_type_to_size(type);                               \
99501e04c3fSmrg      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
99601e04c3fSmrg
99701e04c3fSmrg      if (i == 0) {
99801e04c3fSmrg         DO_SRC(0);
99901e04c3fSmrg      } else {
100001e04c3fSmrg         DO_SRC(1);
100101e04c3fSmrg      }
100201e04c3fSmrg#undef DO_SRC
100301e04c3fSmrg
100401e04c3fSmrg      /* On IVB/BYT, region parameters and execution size for DF are in terms of
100501e04c3fSmrg       * 32-bit elements, so they are doubled. For evaluating the validity of an
100601e04c3fSmrg       * instruction, we halve them.
100701e04c3fSmrg       */
10087ec681f3Smrg      if (devinfo->verx10 == 70 &&
100901e04c3fSmrg          element_size == 8)
101001e04c3fSmrg         element_size = 4;
101101e04c3fSmrg
101201e04c3fSmrg      /* ExecSize must be greater than or equal to Width. */
101301e04c3fSmrg      ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
101401e04c3fSmrg                                  "to Width");
101501e04c3fSmrg
101601e04c3fSmrg      /* If ExecSize = Width and HorzStride ≠ 0,
101701e04c3fSmrg       * VertStride must be set to Width * HorzStride.
101801e04c3fSmrg       */
101901e04c3fSmrg      if (exec_size == width && hstride != 0) {
102001e04c3fSmrg         ERROR_IF(vstride != width * hstride,
102101e04c3fSmrg                  "If ExecSize = Width and HorzStride ≠ 0, "
102201e04c3fSmrg                  "VertStride must be set to Width * HorzStride");
102301e04c3fSmrg      }
102401e04c3fSmrg
102501e04c3fSmrg      /* If Width = 1, HorzStride must be 0 regardless of the values of
102601e04c3fSmrg       * ExecSize and VertStride.
102701e04c3fSmrg       */
102801e04c3fSmrg      if (width == 1) {
102901e04c3fSmrg         ERROR_IF(hstride != 0,
103001e04c3fSmrg                  "If Width = 1, HorzStride must be 0 regardless "
103101e04c3fSmrg                  "of the values of ExecSize and VertStride");
103201e04c3fSmrg      }
103301e04c3fSmrg
103401e04c3fSmrg      /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
103501e04c3fSmrg      if (exec_size == 1 && width == 1) {
103601e04c3fSmrg         ERROR_IF(vstride != 0 || hstride != 0,
103701e04c3fSmrg                  "If ExecSize = Width = 1, both VertStride "
103801e04c3fSmrg                  "and HorzStride must be 0");
103901e04c3fSmrg      }
104001e04c3fSmrg
104101e04c3fSmrg      /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
104201e04c3fSmrg       * value of ExecSize.
104301e04c3fSmrg       */
104401e04c3fSmrg      if (vstride == 0 && hstride == 0) {
104501e04c3fSmrg         ERROR_IF(width != 1,
104601e04c3fSmrg                  "If VertStride = HorzStride = 0, Width must be "
104701e04c3fSmrg                  "1 regardless of the value of ExecSize");
104801e04c3fSmrg      }
104901e04c3fSmrg
105001e04c3fSmrg      /* VertStride must be used to cross GRF register boundaries. This rule
105101e04c3fSmrg       * implies that elements within a 'Width' cannot cross GRF boundaries.
105201e04c3fSmrg       */
105301e04c3fSmrg      const uint64_t mask = (1ULL << element_size) - 1;
105401e04c3fSmrg      unsigned rowbase = subreg;
105501e04c3fSmrg
105601e04c3fSmrg      for (int y = 0; y < exec_size / width; y++) {
105701e04c3fSmrg         uint64_t access_mask = 0;
105801e04c3fSmrg         unsigned offset = rowbase;
105901e04c3fSmrg
106001e04c3fSmrg         for (int x = 0; x < width; x++) {
10617ec681f3Smrg            access_mask |= mask << (offset % 64);
106201e04c3fSmrg            offset += hstride * element_size;
106301e04c3fSmrg         }
106401e04c3fSmrg
106501e04c3fSmrg         rowbase += vstride * element_size;
106601e04c3fSmrg
106701e04c3fSmrg         if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
106801e04c3fSmrg            ERROR("VertStride must be used to cross GRF register boundaries");
106901e04c3fSmrg            break;
107001e04c3fSmrg         }
107101e04c3fSmrg      }
107201e04c3fSmrg   }
107301e04c3fSmrg
107401e04c3fSmrg   /* Dst.HorzStride must not be 0. */
107501e04c3fSmrg   if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
107601e04c3fSmrg      ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
107701e04c3fSmrg               "Destination Horizontal Stride must not be 0");
107801e04c3fSmrg   }
107901e04c3fSmrg
108001e04c3fSmrg   return error_msg;
108101e04c3fSmrg}
108201e04c3fSmrg
10839f464c52Smayastatic struct string
10847ec681f3Smrgspecial_restrictions_for_mixed_float_mode(const struct intel_device_info *devinfo,
10859f464c52Smaya                                          const brw_inst *inst)
10869f464c52Smaya{
10879f464c52Smaya   struct string error_msg = { .str = NULL, .len = 0 };
10889f464c52Smaya
10899f464c52Smaya   const unsigned opcode = brw_inst_opcode(devinfo, inst);
10909f464c52Smaya   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
10919f464c52Smaya   if (num_sources >= 3)
10929f464c52Smaya      return error_msg;
10939f464c52Smaya
10949f464c52Smaya   if (!is_mixed_float(devinfo, inst))
10959f464c52Smaya      return error_msg;
10969f464c52Smaya
10979f464c52Smaya   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
10989f464c52Smaya   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
10999f464c52Smaya
11009f464c52Smaya   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
11019f464c52Smaya   enum brw_reg_type src1_type = num_sources > 1 ?
11029f464c52Smaya                                 brw_inst_src1_type(devinfo, inst) : 0;
11039f464c52Smaya   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
11049f464c52Smaya
11059f464c52Smaya   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
11069f464c52Smaya   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
11079f464c52Smaya
11089f464c52Smaya   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11099f464c52Smaya    * Float Operations:
11109f464c52Smaya    *
11119f464c52Smaya    *    "Indirect addressing on source is not supported when source and
11129f464c52Smaya    *     destination data types are mixed float."
11139f464c52Smaya    */
11149f464c52Smaya   ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
11159f464c52Smaya            (num_sources > 1 &&
11169f464c52Smaya             brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
11179f464c52Smaya            "Indirect addressing on source is not supported when source and "
11189f464c52Smaya            "destination data types are mixed float");
11199f464c52Smaya
11209f464c52Smaya   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11219f464c52Smaya    * Float Operations:
11229f464c52Smaya    *
11239f464c52Smaya    *    "No SIMD16 in mixed mode when destination is f32. Instruction
11249f464c52Smaya    *     execution size must be no more than 8."
11259f464c52Smaya    */
11269f464c52Smaya   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
11279f464c52Smaya            "Mixed float mode with 32-bit float destination is limited "
11289f464c52Smaya            "to SIMD8");
11299f464c52Smaya
11309f464c52Smaya   if (is_align16) {
11319f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11329f464c52Smaya       * Float Operations:
11339f464c52Smaya       *
11349f464c52Smaya       *   "In Align16 mode, when half float and float data types are mixed
11359f464c52Smaya       *    between source operands OR between source and destination operands,
11369f464c52Smaya       *    the register content are assumed to be packed."
11379f464c52Smaya       *
11389f464c52Smaya       * Since Align16 doesn't have a concept of horizontal stride (or width),
11399f464c52Smaya       * it means that vertical stride must always be 4, since 0 and 2 would
11409f464c52Smaya       * lead to replicated data, and any other value is disallowed in Align16.
11419f464c52Smaya       */
11429f464c52Smaya      ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
11439f464c52Smaya               "Align16 mixed float mode assumes packed data (vstride must be 4");
11449f464c52Smaya
11459f464c52Smaya      ERROR_IF(num_sources >= 2 &&
11469f464c52Smaya               brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
11479f464c52Smaya               "Align16 mixed float mode assumes packed data (vstride must be 4");
11489f464c52Smaya
11499f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11509f464c52Smaya       * Float Operations:
11519f464c52Smaya       *
11529f464c52Smaya       *   "For Align16 mixed mode, both input and output packed f16 data
11539f464c52Smaya       *    must be oword aligned, no oword crossing in packed f16."
11549f464c52Smaya       *
11559f464c52Smaya       * The previous rule requires that Align16 operands are always packed,
11569f464c52Smaya       * and since there is only one bit for Align16 subnr, which represents
11579f464c52Smaya       * offsets 0B and 16B, this rule is always enforced and we don't need to
11589f464c52Smaya       * validate it.
11599f464c52Smaya       */
11609f464c52Smaya
11619f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11629f464c52Smaya       * Float Operations:
11639f464c52Smaya       *
11649f464c52Smaya       *    "No SIMD16 in mixed mode when destination is packed f16 for both
11659f464c52Smaya       *     Align1 and Align16."
11669f464c52Smaya       *
11679f464c52Smaya       * And:
11689f464c52Smaya       *
11699f464c52Smaya       *   "In Align16 mode, when half float and float data types are mixed
11709f464c52Smaya       *    between source operands OR between source and destination operands,
11719f464c52Smaya       *    the register content are assumed to be packed."
11729f464c52Smaya       *
11739f464c52Smaya       * Which implies that SIMD16 is not available in Align16. This is further
11749f464c52Smaya       * confirmed by:
11759f464c52Smaya       *
11769f464c52Smaya       *    "For Align16 mixed mode, both input and output packed f16 data
11779f464c52Smaya       *     must be oword aligned, no oword crossing in packed f16"
11789f464c52Smaya       *
11799f464c52Smaya       * Since oword-aligned packed f16 data would cross oword boundaries when
11809f464c52Smaya       * the execution size is larger than 8.
11819f464c52Smaya       */
11829f464c52Smaya      ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
11839f464c52Smaya
11849f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11859f464c52Smaya       * Float Operations:
11869f464c52Smaya       *
11879f464c52Smaya       *    "No accumulator read access for Align16 mixed float."
11889f464c52Smaya       */
11899f464c52Smaya      ERROR_IF(inst_uses_src_acc(devinfo, inst),
11909f464c52Smaya               "No accumulator read access for Align16 mixed float");
11919f464c52Smaya   } else {
11929f464c52Smaya      assert(!is_align16);
11939f464c52Smaya
11949f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
11959f464c52Smaya       * Float Operations:
11969f464c52Smaya       *
11979f464c52Smaya       *    "No SIMD16 in mixed mode when destination is packed f16 for both
11989f464c52Smaya       *     Align1 and Align16."
11999f464c52Smaya       */
12009f464c52Smaya      ERROR_IF(exec_size > 8 && dst_is_packed &&
12019f464c52Smaya               dst_type == BRW_REGISTER_TYPE_HF,
12029f464c52Smaya               "Align1 mixed float mode is limited to SIMD8 when destination "
12039f464c52Smaya               "is packed half-float");
12049f464c52Smaya
12059f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
12069f464c52Smaya       * Float Operations:
12079f464c52Smaya       *
12089f464c52Smaya       *    "Math operations for mixed mode:
12099f464c52Smaya       *     - In Align1, f16 inputs need to be strided"
12109f464c52Smaya       */
12119f464c52Smaya      if (opcode == BRW_OPCODE_MATH) {
12129f464c52Smaya         if (src0_type == BRW_REGISTER_TYPE_HF) {
12139f464c52Smaya            ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
12149f464c52Smaya                     "Align1 mixed mode math needs strided half-float inputs");
12159f464c52Smaya         }
12169f464c52Smaya
12179f464c52Smaya         if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
12189f464c52Smaya            ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
12199f464c52Smaya                     "Align1 mixed mode math needs strided half-float inputs");
12209f464c52Smaya         }
12219f464c52Smaya      }
12229f464c52Smaya
12239f464c52Smaya      if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
12249f464c52Smaya         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
12259f464c52Smaya          * Float Operations:
12269f464c52Smaya          *
12279f464c52Smaya          *    "In Align1, destination stride can be smaller than execution
12289f464c52Smaya          *     type. When destination is stride of 1, 16 bit packed data is
12299f464c52Smaya          *     updated on the destination. However, output packed f16 data
12309f464c52Smaya          *     must be oword aligned, no oword crossing in packed f16."
12319f464c52Smaya          *
12329f464c52Smaya          * The requirement of not crossing oword boundaries for 16-bit oword
12339f464c52Smaya          * aligned data means that execution size is limited to 8.
12349f464c52Smaya          */
12359f464c52Smaya         unsigned subreg;
12369f464c52Smaya         if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
12379f464c52Smaya            subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
12389f464c52Smaya         else
12399f464c52Smaya            subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
12409f464c52Smaya         ERROR_IF(subreg % 16 != 0,
12419f464c52Smaya                  "Align1 mixed mode packed half-float output must be "
12429f464c52Smaya                  "oword aligned");
12439f464c52Smaya         ERROR_IF(exec_size > 8,
12449f464c52Smaya                  "Align1 mixed mode packed half-float output must not "
12459f464c52Smaya                  "cross oword boundaries (max exec size is 8)");
12469f464c52Smaya
12479f464c52Smaya         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
12489f464c52Smaya          * Float Operations:
12499f464c52Smaya          *
12509f464c52Smaya          *    "When source is float or half float from accumulator register and
12519f464c52Smaya          *     destination is half float with a stride of 1, the source must
12529f464c52Smaya          *     register aligned. i.e., source must have offset zero."
12539f464c52Smaya          *
12549f464c52Smaya          * Align16 mixed float mode doesn't allow accumulator access on sources,
12559f464c52Smaya          * so we only need to check this for Align1.
12569f464c52Smaya          */
12579f464c52Smaya         if (src0_is_acc(devinfo, inst) &&
12589f464c52Smaya             (src0_type == BRW_REGISTER_TYPE_F ||
12599f464c52Smaya              src0_type == BRW_REGISTER_TYPE_HF)) {
12609f464c52Smaya            ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
12619f464c52Smaya                     "Mixed float mode requires register-aligned accumulator "
12629f464c52Smaya                     "source reads when destination is packed half-float");
12639f464c52Smaya
12649f464c52Smaya         }
12659f464c52Smaya
12669f464c52Smaya         if (num_sources > 1 &&
12679f464c52Smaya             src1_is_acc(devinfo, inst) &&
12689f464c52Smaya             (src1_type == BRW_REGISTER_TYPE_F ||
12699f464c52Smaya              src1_type == BRW_REGISTER_TYPE_HF)) {
12709f464c52Smaya            ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
12719f464c52Smaya                     "Mixed float mode requires register-aligned accumulator "
12729f464c52Smaya                     "source reads when destination is packed half-float");
12739f464c52Smaya         }
12749f464c52Smaya      }
12759f464c52Smaya
12769f464c52Smaya      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
12779f464c52Smaya       * Float Operations:
12789f464c52Smaya       *
12799f464c52Smaya       *    "No swizzle is allowed when an accumulator is used as an implicit
12809f464c52Smaya       *     source or an explicit source in an instruction. i.e. when
12819f464c52Smaya       *     destination is half float with an implicit accumulator source,
12829f464c52Smaya       *     destination stride needs to be 2."
12839f464c52Smaya       *
12849f464c52Smaya       * FIXME: it is not quite clear what the first sentence actually means
12859f464c52Smaya       *        or its link to the implication described after it, so we only
12869f464c52Smaya       *        validate the explicit implication, which is clearly described.
12879f464c52Smaya       */
12889f464c52Smaya      if (dst_type == BRW_REGISTER_TYPE_HF &&
12899f464c52Smaya          inst_uses_src_acc(devinfo, inst)) {
12909f464c52Smaya         ERROR_IF(dst_stride != 2,
12919f464c52Smaya                  "Mixed float mode with implicit/explicit accumulator "
12929f464c52Smaya                  "source and half-float destination requires a stride "
12939f464c52Smaya                  "of 2 on the destination");
12949f464c52Smaya      }
12959f464c52Smaya   }
12969f464c52Smaya
12979f464c52Smaya   return error_msg;
12989f464c52Smaya}
12999f464c52Smaya
130001e04c3fSmrg/**
130101e04c3fSmrg * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
130201e04c3fSmrg *
130301e04c3fSmrg * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
130401e04c3fSmrg * a bitmask of bytes accessed by the region.
130501e04c3fSmrg *
130601e04c3fSmrg * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
130701e04c3fSmrg * instruction would be
130801e04c3fSmrg *
130901e04c3fSmrg *    access_mask[0] = 0x00000000000000F0
131001e04c3fSmrg *    access_mask[1] = 0x000000000000F000
131101e04c3fSmrg *    access_mask[2] = 0x0000000000F00000
131201e04c3fSmrg *    access_mask[3] = 0x00000000F0000000
131301e04c3fSmrg *    access_mask[4-31] = 0
131401e04c3fSmrg *
131501e04c3fSmrg * because the first execution channel accesses bytes 7-4 and the second
131601e04c3fSmrg * execution channel accesses bytes 15-12, etc.
131701e04c3fSmrg */
131801e04c3fSmrgstatic void
131901e04c3fSmrgalign1_access_mask(uint64_t access_mask[static 32],
132001e04c3fSmrg                   unsigned exec_size, unsigned element_size, unsigned subreg,
132101e04c3fSmrg                   unsigned vstride, unsigned width, unsigned hstride)
132201e04c3fSmrg{
132301e04c3fSmrg   const uint64_t mask = (1ULL << element_size) - 1;
132401e04c3fSmrg   unsigned rowbase = subreg;
132501e04c3fSmrg   unsigned element = 0;
132601e04c3fSmrg
132701e04c3fSmrg   for (int y = 0; y < exec_size / width; y++) {
132801e04c3fSmrg      unsigned offset = rowbase;
132901e04c3fSmrg
133001e04c3fSmrg      for (int x = 0; x < width; x++) {
13317ec681f3Smrg         access_mask[element++] = mask << (offset % 64);
133201e04c3fSmrg         offset += hstride * element_size;
133301e04c3fSmrg      }
133401e04c3fSmrg
133501e04c3fSmrg      rowbase += vstride * element_size;
133601e04c3fSmrg   }
133701e04c3fSmrg
133801e04c3fSmrg   assert(element == 0 || element == exec_size);
133901e04c3fSmrg}
134001e04c3fSmrg
134101e04c3fSmrg/**
134201e04c3fSmrg * Returns the number of registers accessed according to the \p access_mask
134301e04c3fSmrg */
134401e04c3fSmrgstatic int
134501e04c3fSmrgregisters_read(const uint64_t access_mask[static 32])
134601e04c3fSmrg{
134701e04c3fSmrg   int regs_read = 0;
134801e04c3fSmrg
134901e04c3fSmrg   for (unsigned i = 0; i < 32; i++) {
135001e04c3fSmrg      if (access_mask[i] > 0xFFFFFFFF) {
135101e04c3fSmrg         return 2;
135201e04c3fSmrg      } else if (access_mask[i]) {
135301e04c3fSmrg         regs_read = 1;
135401e04c3fSmrg      }
135501e04c3fSmrg   }
135601e04c3fSmrg
135701e04c3fSmrg   return regs_read;
135801e04c3fSmrg}
135901e04c3fSmrg
136001e04c3fSmrg/**
136101e04c3fSmrg * Checks restrictions listed in "Region Alignment Rules" in the "Register
136201e04c3fSmrg * Region Restrictions" section.
136301e04c3fSmrg */
136401e04c3fSmrgstatic struct string
13657ec681f3Smrgregion_alignment_rules(const struct intel_device_info *devinfo,
136601e04c3fSmrg                       const brw_inst *inst)
136701e04c3fSmrg{
136801e04c3fSmrg   const struct opcode_desc *desc =
136901e04c3fSmrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
137001e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
137101e04c3fSmrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
137201e04c3fSmrg   uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
137301e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
137401e04c3fSmrg
137501e04c3fSmrg   if (num_sources == 3)
137601e04c3fSmrg      return (struct string){};
137701e04c3fSmrg
137801e04c3fSmrg   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
137901e04c3fSmrg      return (struct string){};
138001e04c3fSmrg
138101e04c3fSmrg   if (inst_is_send(devinfo, inst))
138201e04c3fSmrg      return (struct string){};
138301e04c3fSmrg
138401e04c3fSmrg   memset(dst_access_mask, 0, sizeof(dst_access_mask));
138501e04c3fSmrg   memset(src0_access_mask, 0, sizeof(src0_access_mask));
138601e04c3fSmrg   memset(src1_access_mask, 0, sizeof(src1_access_mask));
138701e04c3fSmrg
138801e04c3fSmrg   for (unsigned i = 0; i < num_sources; i++) {
138901e04c3fSmrg      unsigned vstride, width, hstride, element_size, subreg;
139001e04c3fSmrg      enum brw_reg_type type;
139101e04c3fSmrg
139201e04c3fSmrg      /* In Direct Addressing mode, a source cannot span more than 2 adjacent
139301e04c3fSmrg       * GRF registers.
139401e04c3fSmrg       */
139501e04c3fSmrg
139601e04c3fSmrg#define DO_SRC(n)                                                              \
139701e04c3fSmrg      if (brw_inst_src ## n ## _address_mode(devinfo, inst) !=                 \
139801e04c3fSmrg          BRW_ADDRESS_DIRECT)                                                  \
139901e04c3fSmrg         continue;                                                             \
140001e04c3fSmrg                                                                               \
140101e04c3fSmrg      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
140201e04c3fSmrg          BRW_IMMEDIATE_VALUE)                                                 \
140301e04c3fSmrg         continue;                                                             \
140401e04c3fSmrg                                                                               \
140501e04c3fSmrg      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
140601e04c3fSmrg      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
140701e04c3fSmrg      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
140801e04c3fSmrg      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
140901e04c3fSmrg      element_size = brw_reg_type_to_size(type);                               \
141001e04c3fSmrg      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
141101e04c3fSmrg      align1_access_mask(src ## n ## _access_mask,                             \
141201e04c3fSmrg                         exec_size, element_size, subreg,                      \
141301e04c3fSmrg                         vstride, width, hstride)
141401e04c3fSmrg
141501e04c3fSmrg      if (i == 0) {
141601e04c3fSmrg         DO_SRC(0);
141701e04c3fSmrg      } else {
141801e04c3fSmrg         DO_SRC(1);
141901e04c3fSmrg      }
142001e04c3fSmrg#undef DO_SRC
142101e04c3fSmrg
142201e04c3fSmrg      unsigned num_vstride = exec_size / width;
142301e04c3fSmrg      unsigned num_hstride = width;
142401e04c3fSmrg      unsigned vstride_elements = (num_vstride - 1) * vstride;
142501e04c3fSmrg      unsigned hstride_elements = (num_hstride - 1) * hstride;
142601e04c3fSmrg      unsigned offset = (vstride_elements + hstride_elements) * element_size +
142701e04c3fSmrg                        subreg;
142801e04c3fSmrg      ERROR_IF(offset >= 64,
142901e04c3fSmrg               "A source cannot span more than 2 adjacent GRF registers");
143001e04c3fSmrg   }
143101e04c3fSmrg
143201e04c3fSmrg   if (desc->ndst == 0 || dst_is_null(devinfo, inst))
143301e04c3fSmrg      return error_msg;
143401e04c3fSmrg
143501e04c3fSmrg   unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
14367ec681f3Smrg   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
143701e04c3fSmrg   unsigned element_size = brw_reg_type_to_size(dst_type);
143801e04c3fSmrg   unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
143901e04c3fSmrg   unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
144001e04c3fSmrg   ERROR_IF(offset >= 64,
144101e04c3fSmrg            "A destination cannot span more than 2 adjacent GRF registers");
144201e04c3fSmrg
144301e04c3fSmrg   if (error_msg.str)
144401e04c3fSmrg      return error_msg;
144501e04c3fSmrg
144601e04c3fSmrg   /* On IVB/BYT, region parameters and execution size for DF are in terms of
144701e04c3fSmrg    * 32-bit elements, so they are doubled. For evaluating the validity of an
144801e04c3fSmrg    * instruction, we halve them.
144901e04c3fSmrg    */
14507ec681f3Smrg   if (devinfo->verx10 == 70 &&
145101e04c3fSmrg       element_size == 8)
145201e04c3fSmrg      element_size = 4;
145301e04c3fSmrg
145401e04c3fSmrg   align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
145501e04c3fSmrg                      exec_size == 1 ? 0 : exec_size * stride,
145601e04c3fSmrg                      exec_size == 1 ? 1 : exec_size,
145701e04c3fSmrg                      exec_size == 1 ? 0 : stride);
145801e04c3fSmrg
145901e04c3fSmrg   unsigned dst_regs = registers_read(dst_access_mask);
146001e04c3fSmrg   unsigned src0_regs = registers_read(src0_access_mask);
146101e04c3fSmrg   unsigned src1_regs = registers_read(src1_access_mask);
146201e04c3fSmrg
146301e04c3fSmrg   /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
146401e04c3fSmrg    *
146501e04c3fSmrg    *    When an instruction has a source region spanning two registers and a
146601e04c3fSmrg    *    destination region contained in one register, the number of elements
146701e04c3fSmrg    *    must be the same between two sources and one of the following must be
146801e04c3fSmrg    *    true:
146901e04c3fSmrg    *
147001e04c3fSmrg    *       1. The destination region is entirely contained in the lower OWord
147101e04c3fSmrg    *          of a register.
147201e04c3fSmrg    *       2. The destination region is entirely contained in the upper OWord
147301e04c3fSmrg    *          of a register.
147401e04c3fSmrg    *       3. The destination elements are evenly split between the two OWords
147501e04c3fSmrg    *          of a register.
147601e04c3fSmrg    */
14777ec681f3Smrg   if (devinfo->ver <= 8) {
147801e04c3fSmrg      if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
147901e04c3fSmrg         unsigned upper_oword_writes = 0, lower_oword_writes = 0;
148001e04c3fSmrg
148101e04c3fSmrg         for (unsigned i = 0; i < exec_size; i++) {
148201e04c3fSmrg            if (dst_access_mask[i] > 0x0000FFFF) {
148301e04c3fSmrg               upper_oword_writes++;
148401e04c3fSmrg            } else {
148501e04c3fSmrg               assert(dst_access_mask[i] != 0);
148601e04c3fSmrg               lower_oword_writes++;
148701e04c3fSmrg            }
148801e04c3fSmrg         }
148901e04c3fSmrg
149001e04c3fSmrg         ERROR_IF(lower_oword_writes != 0 &&
149101e04c3fSmrg                  upper_oword_writes != 0 &&
149201e04c3fSmrg                  upper_oword_writes != lower_oword_writes,
149301e04c3fSmrg                  "Writes must be to only one OWord or "
149401e04c3fSmrg                  "evenly split between OWords");
149501e04c3fSmrg      }
149601e04c3fSmrg   }
149701e04c3fSmrg
149801e04c3fSmrg   /* The IVB and HSW PRMs say:
149901e04c3fSmrg    *
150001e04c3fSmrg    *    When an instruction has a source region that spans two registers and
150101e04c3fSmrg    *    the destination spans two registers, the destination elements must be
150201e04c3fSmrg    *    evenly split between the two registers [...]
150301e04c3fSmrg    *
150401e04c3fSmrg    * The SNB PRM contains similar wording (but written in a much more
150501e04c3fSmrg    * confusing manner).
150601e04c3fSmrg    *
150701e04c3fSmrg    * The BDW PRM says:
150801e04c3fSmrg    *
150901e04c3fSmrg    *    When destination spans two registers, the source may be one or two
151001e04c3fSmrg    *    registers. The destination elements must be evenly split between the
151101e04c3fSmrg    *    two registers.
151201e04c3fSmrg    *
151301e04c3fSmrg    * The SKL PRM says:
151401e04c3fSmrg    *
151501e04c3fSmrg    *    When destination of MATH instruction spans two registers, the
151601e04c3fSmrg    *    destination elements must be evenly split between the two registers.
151701e04c3fSmrg    *
151801e04c3fSmrg    * It is not known whether this restriction applies to KBL other Gens after
151901e04c3fSmrg    * SKL.
152001e04c3fSmrg    */
15217ec681f3Smrg   if (devinfo->ver <= 8 ||
152201e04c3fSmrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
152301e04c3fSmrg
152401e04c3fSmrg      /* Nothing explicitly states that on Gen < 8 elements must be evenly
152501e04c3fSmrg       * split between two destination registers in the two exceptional
152601e04c3fSmrg       * source-region-spans-one-register cases, but since Broadwell requires
152701e04c3fSmrg       * evenly split writes regardless of source region, we assume that it was
152801e04c3fSmrg       * an oversight and require it.
152901e04c3fSmrg       */
153001e04c3fSmrg      if (dst_regs == 2) {
153101e04c3fSmrg         unsigned upper_reg_writes = 0, lower_reg_writes = 0;
153201e04c3fSmrg
153301e04c3fSmrg         for (unsigned i = 0; i < exec_size; i++) {
153401e04c3fSmrg            if (dst_access_mask[i] > 0xFFFFFFFF) {
153501e04c3fSmrg               upper_reg_writes++;
153601e04c3fSmrg            } else {
153701e04c3fSmrg               assert(dst_access_mask[i] != 0);
153801e04c3fSmrg               lower_reg_writes++;
153901e04c3fSmrg            }
154001e04c3fSmrg         }
154101e04c3fSmrg
154201e04c3fSmrg         ERROR_IF(upper_reg_writes != lower_reg_writes,
154301e04c3fSmrg                  "Writes must be evenly split between the two "
154401e04c3fSmrg                  "destination registers");
154501e04c3fSmrg      }
154601e04c3fSmrg   }
154701e04c3fSmrg
154801e04c3fSmrg   /* The IVB and HSW PRMs say:
154901e04c3fSmrg    *
155001e04c3fSmrg    *    When an instruction has a source region that spans two registers and
155101e04c3fSmrg    *    the destination spans two registers, the destination elements must be
155201e04c3fSmrg    *    evenly split between the two registers and each destination register
155301e04c3fSmrg    *    must be entirely derived from one source register.
155401e04c3fSmrg    *
155501e04c3fSmrg    *    Note: In such cases, the regioning parameters must ensure that the
155601e04c3fSmrg    *    offset from the two source registers is the same.
155701e04c3fSmrg    *
155801e04c3fSmrg    * The SNB PRM contains similar wording (but written in a much more
155901e04c3fSmrg    * confusing manner).
156001e04c3fSmrg    *
156101e04c3fSmrg    * There are effectively three rules stated here:
156201e04c3fSmrg    *
156301e04c3fSmrg    *    For an instruction with a source and a destination spanning two
156401e04c3fSmrg    *    registers,
156501e04c3fSmrg    *
156601e04c3fSmrg    *       (1) destination elements must be evenly split between the two
156701e04c3fSmrg    *           registers
156801e04c3fSmrg    *       (2) all destination elements in a register must be derived
156901e04c3fSmrg    *           from one source register
157001e04c3fSmrg    *       (3) the offset (i.e. the starting location in each of the two
157101e04c3fSmrg    *           registers spanned by a region) must be the same in the two
157201e04c3fSmrg    *           registers spanned by a region
157301e04c3fSmrg    *
157401e04c3fSmrg    * It is impossible to violate rule (1) without violating (2) or (3), so we
157501e04c3fSmrg    * do not attempt to validate it.
157601e04c3fSmrg    */
15777ec681f3Smrg   if (devinfo->ver <= 7 && dst_regs == 2) {
157801e04c3fSmrg      for (unsigned i = 0; i < num_sources; i++) {
157901e04c3fSmrg#define DO_SRC(n)                                                             \
158001e04c3fSmrg         if (src ## n ## _regs <= 1)                                          \
158101e04c3fSmrg            continue;                                                         \
158201e04c3fSmrg                                                                              \
158301e04c3fSmrg         for (unsigned i = 0; i < exec_size; i++) {                           \
158401e04c3fSmrg            if ((dst_access_mask[i] > 0xFFFFFFFF) !=                          \
158501e04c3fSmrg                (src ## n ## _access_mask[i] > 0xFFFFFFFF)) {                 \
158601e04c3fSmrg               ERROR("Each destination register must be entirely derived "    \
158701e04c3fSmrg                     "from one source register");                             \
158801e04c3fSmrg               break;                                                         \
158901e04c3fSmrg            }                                                                 \
159001e04c3fSmrg         }                                                                    \
159101e04c3fSmrg                                                                              \
159201e04c3fSmrg         unsigned offset_0 =                                                  \
159301e04c3fSmrg            brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);               \
159401e04c3fSmrg         unsigned offset_1 = offset_0;                                        \
159501e04c3fSmrg                                                                              \
159601e04c3fSmrg         for (unsigned i = 0; i < exec_size; i++) {                           \
159701e04c3fSmrg            if (src ## n ## _access_mask[i] > 0xFFFFFFFF) {                   \
159801e04c3fSmrg               offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32;  \
159901e04c3fSmrg               break;                                                         \
160001e04c3fSmrg            }                                                                 \
160101e04c3fSmrg         }                                                                    \
160201e04c3fSmrg                                                                              \
160301e04c3fSmrg         ERROR_IF(num_sources == 2 && offset_0 != offset_1,                   \
160401e04c3fSmrg                  "The offset from the two source registers "                 \
160501e04c3fSmrg                  "must be the same")
160601e04c3fSmrg
160701e04c3fSmrg         if (i == 0) {
160801e04c3fSmrg            DO_SRC(0);
160901e04c3fSmrg         } else {
161001e04c3fSmrg            DO_SRC(1);
161101e04c3fSmrg         }
161201e04c3fSmrg#undef DO_SRC
161301e04c3fSmrg      }
161401e04c3fSmrg   }
161501e04c3fSmrg
161601e04c3fSmrg   /* The IVB and HSW PRMs say:
161701e04c3fSmrg    *
161801e04c3fSmrg    *    When destination spans two registers, the source MUST span two
161901e04c3fSmrg    *    registers. The exception to the above rule:
162001e04c3fSmrg    *        1. When source is scalar, the source registers are not
162101e04c3fSmrg    *           incremented.
162201e04c3fSmrg    *        2. When source is packed integer Word and destination is packed
162301e04c3fSmrg    *           integer DWord, the source register is not incremented by the
162401e04c3fSmrg    *           source sub register is incremented.
162501e04c3fSmrg    *
162601e04c3fSmrg    * The SNB PRM does not contain this rule, but the internal documentation
162701e04c3fSmrg    * indicates that it applies to SNB as well. We assume that the rule applies
162801e04c3fSmrg    * to Gen <= 5 although their PRMs do not state it.
162901e04c3fSmrg    *
163001e04c3fSmrg    * While the documentation explicitly says in exception (2) that the
163101e04c3fSmrg    * destination must be an integer DWord, the hardware allows at least a
163201e04c3fSmrg    * float destination type as well. We emit such instructions from
163301e04c3fSmrg    *
16347ec681f3Smrg    *    fs_visitor::emit_interpolation_setup_gfx6
163501e04c3fSmrg    *    fs_visitor::emit_fragcoord_interpolation
163601e04c3fSmrg    *
163701e04c3fSmrg    * and have for years with no ill effects.
163801e04c3fSmrg    *
163901e04c3fSmrg    * Additionally the simulator source code indicates that the real condition
164001e04c3fSmrg    * is that the size of the destination type is 4 bytes.
164101e04c3fSmrg    */
16427ec681f3Smrg   if (devinfo->ver <= 7 && dst_regs == 2) {
16437ec681f3Smrg      enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
164401e04c3fSmrg      bool dst_is_packed_dword =
164501e04c3fSmrg         is_packed(exec_size * stride, exec_size, stride) &&
164601e04c3fSmrg         brw_reg_type_to_size(dst_type) == 4;
164701e04c3fSmrg
164801e04c3fSmrg      for (unsigned i = 0; i < num_sources; i++) {
164901e04c3fSmrg#define DO_SRC(n)                                                                  \
165001e04c3fSmrg         unsigned vstride, width, hstride;                                         \
165101e04c3fSmrg         vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));           \
165201e04c3fSmrg         width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));                \
165301e04c3fSmrg         hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));           \
165401e04c3fSmrg         bool src ## n ## _is_packed_word =                                        \
165501e04c3fSmrg            is_packed(vstride, width, hstride) &&                                  \
165601e04c3fSmrg            (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W ||   \
165701e04c3fSmrg             brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW);   \
165801e04c3fSmrg                                                                                   \
165901e04c3fSmrg         ERROR_IF(src ## n ## _regs == 1 &&                                        \
166001e04c3fSmrg                  !src ## n ## _has_scalar_region(devinfo, inst) &&                \
166101e04c3fSmrg                  !(dst_is_packed_dword && src ## n ## _is_packed_word),           \
166201e04c3fSmrg                  "When the destination spans two registers, the source must "     \
166301e04c3fSmrg                  "span two registers\n" ERROR_INDENT "(exceptions for scalar "    \
166401e04c3fSmrg                  "source and packed-word to packed-dword expansion)")
166501e04c3fSmrg
166601e04c3fSmrg         if (i == 0) {
166701e04c3fSmrg            DO_SRC(0);
166801e04c3fSmrg         } else {
166901e04c3fSmrg            DO_SRC(1);
167001e04c3fSmrg         }
167101e04c3fSmrg#undef DO_SRC
167201e04c3fSmrg      }
167301e04c3fSmrg   }
167401e04c3fSmrg
167501e04c3fSmrg   return error_msg;
167601e04c3fSmrg}
167701e04c3fSmrg
167801e04c3fSmrgstatic struct string
16797ec681f3Smrgvector_immediate_restrictions(const struct intel_device_info *devinfo,
168001e04c3fSmrg                              const brw_inst *inst)
168101e04c3fSmrg{
168201e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
168301e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
168401e04c3fSmrg
168501e04c3fSmrg   if (num_sources == 3 || num_sources == 0)
168601e04c3fSmrg      return (struct string){};
168701e04c3fSmrg
168801e04c3fSmrg   unsigned file = num_sources == 1 ?
168901e04c3fSmrg                   brw_inst_src0_reg_file(devinfo, inst) :
169001e04c3fSmrg                   brw_inst_src1_reg_file(devinfo, inst);
169101e04c3fSmrg   if (file != BRW_IMMEDIATE_VALUE)
169201e04c3fSmrg      return (struct string){};
169301e04c3fSmrg
16947ec681f3Smrg   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
169501e04c3fSmrg   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
169601e04c3fSmrg   unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
169701e04c3fSmrg                         brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
169801e04c3fSmrg   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
169901e04c3fSmrg   enum brw_reg_type type = num_sources == 1 ?
170001e04c3fSmrg                            brw_inst_src0_type(devinfo, inst) :
170101e04c3fSmrg                            brw_inst_src1_type(devinfo, inst);
170201e04c3fSmrg
170301e04c3fSmrg   /* The PRMs say:
170401e04c3fSmrg    *
170501e04c3fSmrg    *    When an immediate vector is used in an instruction, the destination
170601e04c3fSmrg    *    must be 128-bit aligned with destination horizontal stride equivalent
170701e04c3fSmrg    *    to a word for an immediate integer vector (v) and equivalent to a
170801e04c3fSmrg    *    DWord for an immediate float vector (vf).
170901e04c3fSmrg    *
171001e04c3fSmrg    * The text has not been updated for the addition of the immediate unsigned
171101e04c3fSmrg    * integer vector type (uv) on SNB, but presumably the same restriction
171201e04c3fSmrg    * applies.
171301e04c3fSmrg    */
171401e04c3fSmrg   switch (type) {
171501e04c3fSmrg   case BRW_REGISTER_TYPE_V:
171601e04c3fSmrg   case BRW_REGISTER_TYPE_UV:
171701e04c3fSmrg   case BRW_REGISTER_TYPE_VF:
171801e04c3fSmrg      ERROR_IF(dst_subreg % (128 / 8) != 0,
171901e04c3fSmrg               "Destination must be 128-bit aligned in order to use immediate "
172001e04c3fSmrg               "vector types");
172101e04c3fSmrg
172201e04c3fSmrg      if (type == BRW_REGISTER_TYPE_VF) {
172301e04c3fSmrg         ERROR_IF(dst_type_size * dst_stride != 4,
172401e04c3fSmrg                  "Destination must have stride equivalent to dword in order "
172501e04c3fSmrg                  "to use the VF type");
172601e04c3fSmrg      } else {
172701e04c3fSmrg         ERROR_IF(dst_type_size * dst_stride != 2,
172801e04c3fSmrg                  "Destination must have stride equivalent to word in order "
172901e04c3fSmrg                  "to use the V or UV type");
173001e04c3fSmrg      }
173101e04c3fSmrg      break;
173201e04c3fSmrg   default:
173301e04c3fSmrg      break;
173401e04c3fSmrg   }
173501e04c3fSmrg
173601e04c3fSmrg   return error_msg;
173701e04c3fSmrg}
173801e04c3fSmrg
173901e04c3fSmrgstatic struct string
174001e04c3fSmrgspecial_requirements_for_handling_double_precision_data_types(
17417ec681f3Smrg                                       const struct intel_device_info *devinfo,
174201e04c3fSmrg                                       const brw_inst *inst)
174301e04c3fSmrg{
174401e04c3fSmrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
174501e04c3fSmrg   struct string error_msg = { .str = NULL, .len = 0 };
174601e04c3fSmrg
174701e04c3fSmrg   if (num_sources == 3 || num_sources == 0)
174801e04c3fSmrg      return (struct string){};
174901e04c3fSmrg
17509f464c52Smaya   /* Split sends don't have types so there's no doubles there. */
17519f464c52Smaya   if (inst_is_split_send(devinfo, inst))
17529f464c52Smaya      return (struct string){};
17539f464c52Smaya
175401e04c3fSmrg   enum brw_reg_type exec_type = execution_type(devinfo, inst);
175501e04c3fSmrg   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
175601e04c3fSmrg
175701e04c3fSmrg   enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
17587ec681f3Smrg   enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);
175901e04c3fSmrg   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
176001e04c3fSmrg   unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
176101e04c3fSmrg   unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
176201e04c3fSmrg   unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
176301e04c3fSmrg   unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
176401e04c3fSmrg
176501e04c3fSmrg   bool is_integer_dword_multiply =
17667ec681f3Smrg      devinfo->ver >= 8 &&
176701e04c3fSmrg      brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&
176801e04c3fSmrg      (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
176901e04c3fSmrg       brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
177001e04c3fSmrg      (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
177101e04c3fSmrg       brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
177201e04c3fSmrg
17737ec681f3Smrg   const bool is_double_precision =
17747ec681f3Smrg      dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
177501e04c3fSmrg
177601e04c3fSmrg   for (unsigned i = 0; i < num_sources; i++) {
177701e04c3fSmrg      unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
177801e04c3fSmrg      bool is_scalar_region;
177901e04c3fSmrg      enum brw_reg_file file;
178001e04c3fSmrg      enum brw_reg_type type;
178101e04c3fSmrg
178201e04c3fSmrg#define DO_SRC(n)                                                              \
178301e04c3fSmrg      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
178401e04c3fSmrg          BRW_IMMEDIATE_VALUE)                                                 \
178501e04c3fSmrg         continue;                                                             \
178601e04c3fSmrg                                                                               \
178701e04c3fSmrg      is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst);        \
178801e04c3fSmrg      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
178901e04c3fSmrg      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
179001e04c3fSmrg      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
179101e04c3fSmrg      file = brw_inst_src ## n ## _reg_file(devinfo, inst);                    \
179201e04c3fSmrg      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
179301e04c3fSmrg      type_size = brw_reg_type_to_size(type);                                  \
179401e04c3fSmrg      reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst);                    \
179501e04c3fSmrg      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
179601e04c3fSmrg      address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
179701e04c3fSmrg
179801e04c3fSmrg      if (i == 0) {
179901e04c3fSmrg         DO_SRC(0);
180001e04c3fSmrg      } else {
180101e04c3fSmrg         DO_SRC(1);
180201e04c3fSmrg      }
180301e04c3fSmrg#undef DO_SRC
180401e04c3fSmrg
18057ec681f3Smrg      const unsigned src_stride = hstride * type_size;
18067ec681f3Smrg      const unsigned dst_stride = dst_hstride * dst_type_size;
18077ec681f3Smrg
180801e04c3fSmrg      /* The PRMs say that for CHV, BXT:
180901e04c3fSmrg       *
181001e04c3fSmrg       *    When source or destination datatype is 64b or operation is integer
181101e04c3fSmrg       *    DWord multiply, regioning in Align1 must follow these rules:
181201e04c3fSmrg       *
181301e04c3fSmrg       *    1. Source and Destination horizontal stride must be aligned to the
181401e04c3fSmrg       *       same qword.
181501e04c3fSmrg       *    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
181601e04c3fSmrg       *    3. Source and Destination offset must be the same, except the case
181701e04c3fSmrg       *       of scalar source.
181801e04c3fSmrg       *
181901e04c3fSmrg       * We assume that the restriction applies to GLK as well.
182001e04c3fSmrg       */
18217ec681f3Smrg      if (is_double_precision &&
18227ec681f3Smrg          brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
18237ec681f3Smrg          (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
182401e04c3fSmrg         ERROR_IF(!is_scalar_region &&
182501e04c3fSmrg                  (src_stride % 8 != 0 ||
182601e04c3fSmrg                   dst_stride % 8 != 0 ||
182701e04c3fSmrg                   src_stride != dst_stride),
182801e04c3fSmrg                  "Source and destination horizontal stride must equal and a "
182901e04c3fSmrg                  "multiple of a qword when the execution type is 64-bit");
183001e04c3fSmrg
183101e04c3fSmrg         ERROR_IF(vstride != width * hstride,
183201e04c3fSmrg                  "Vstride must be Width * Hstride when the execution type is "
183301e04c3fSmrg                  "64-bit");
183401e04c3fSmrg
183501e04c3fSmrg         ERROR_IF(!is_scalar_region && dst_subreg != subreg,
183601e04c3fSmrg                  "Source and destination offset must be the same when the "
183701e04c3fSmrg                  "execution type is 64-bit");
183801e04c3fSmrg      }
183901e04c3fSmrg
184001e04c3fSmrg      /* The PRMs say that for CHV, BXT:
184101e04c3fSmrg       *
184201e04c3fSmrg       *    When source or destination datatype is 64b or operation is integer
184301e04c3fSmrg       *    DWord multiply, indirect addressing must not be used.
184401e04c3fSmrg       *
184501e04c3fSmrg       * We assume that the restriction applies to GLK as well.
184601e04c3fSmrg       */
18477ec681f3Smrg      if (is_double_precision &&
18487ec681f3Smrg          (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
184901e04c3fSmrg         ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
185001e04c3fSmrg                  BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
185101e04c3fSmrg                  "Indirect addressing is not allowed when the execution type "
185201e04c3fSmrg                  "is 64-bit");
185301e04c3fSmrg      }
185401e04c3fSmrg
185501e04c3fSmrg      /* The PRMs say that for CHV, BXT:
185601e04c3fSmrg       *
185701e04c3fSmrg       *    ARF registers must never be used with 64b datatype or when
185801e04c3fSmrg       *    operation is integer DWord multiply.
185901e04c3fSmrg       *
186001e04c3fSmrg       * We assume that the restriction applies to GLK as well.
186101e04c3fSmrg       *
186201e04c3fSmrg       * We assume that the restriction does not apply to the null register.
186301e04c3fSmrg       */
18647ec681f3Smrg      if (is_double_precision &&
18657ec681f3Smrg          (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
186601e04c3fSmrg         ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||
186701e04c3fSmrg                  brw_inst_acc_wr_control(devinfo, inst) ||
186801e04c3fSmrg                  (BRW_ARCHITECTURE_REGISTER_FILE == file &&
186901e04c3fSmrg                   reg != BRW_ARF_NULL) ||
187001e04c3fSmrg                  (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
187101e04c3fSmrg                   dst_reg != BRW_ARF_NULL),
187201e04c3fSmrg                  "Architecture registers cannot be used when the execution "
187301e04c3fSmrg                  "type is 64-bit");
187401e04c3fSmrg      }
18757ec681f3Smrg
18767ec681f3Smrg      /* From the hardware spec section "Register Region Restrictions":
18777ec681f3Smrg       *
18787ec681f3Smrg       * "In case where source or destination datatype is 64b or operation is
18797ec681f3Smrg       *  integer DWord multiply [or in case where a floating point data type
18807ec681f3Smrg       *  is used as destination]:
18817ec681f3Smrg       *
18827ec681f3Smrg       *   1. Register Regioning patterns where register data bit locations
18837ec681f3Smrg       *      are changed between source and destination are not supported on
18847ec681f3Smrg       *      Src0 and Src1 except for broadcast of a scalar.
18857ec681f3Smrg       *
18867ec681f3Smrg       *   2. Explicit ARF registers except null and accumulator must not be
18877ec681f3Smrg       *      used."
18887ec681f3Smrg       */
18897ec681f3Smrg      if (devinfo->verx10 >= 125 &&
18907ec681f3Smrg          (brw_reg_type_is_floating_point(dst_type) ||
18917ec681f3Smrg           is_double_precision)) {
18927ec681f3Smrg         ERROR_IF(!is_scalar_region &&
18937ec681f3Smrg                  (vstride != width * hstride ||
18947ec681f3Smrg                   src_stride != dst_stride ||
18957ec681f3Smrg                   subreg != dst_subreg),
18967ec681f3Smrg                  "Register Regioning patterns where register data bit "
18977ec681f3Smrg                  "locations are changed between source and destination are not "
18987ec681f3Smrg                  "supported except for broadcast of a scalar.");
18997ec681f3Smrg
19007ec681f3Smrg         ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE &&
19017ec681f3Smrg                   reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) ||
19027ec681f3Smrg                  (dst_file == BRW_ARCHITECTURE_REGISTER_FILE &&
19037ec681f3Smrg                   dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR),
19047ec681f3Smrg                  "Explicit ARF registers except null and accumulator must not "
19057ec681f3Smrg                  "be used.");
19067ec681f3Smrg      }
19077ec681f3Smrg
19087ec681f3Smrg      /* From the hardware spec section "Register Region Restrictions":
19097ec681f3Smrg       *
19107ec681f3Smrg       * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and
19117ec681f3Smrg       *  Quad-Word data must not be used."
19127ec681f3Smrg       */
19137ec681f3Smrg      if (devinfo->verx10 >= 125 &&
19147ec681f3Smrg          (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) {
19157ec681f3Smrg         ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
19167ec681f3Smrg                  vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
19177ec681f3Smrg                  "Vx1 and VxH indirect addressing for Float, Half-Float, "
19187ec681f3Smrg                  "Double-Float and Quad-Word data must not be used");
19197ec681f3Smrg      }
192001e04c3fSmrg   }
192101e04c3fSmrg
192201e04c3fSmrg   /* The PRMs say that for BDW, SKL:
192301e04c3fSmrg    *
192401e04c3fSmrg    *    If Align16 is required for an operation with QW destination and non-QW
192501e04c3fSmrg    *    source datatypes, the execution size cannot exceed 2.
192601e04c3fSmrg    *
19277ec681f3Smrg    * We assume that the restriction applies to all Gfx8+ parts.
192801e04c3fSmrg    */
19297ec681f3Smrg   if (is_double_precision && devinfo->ver >= 8) {
193001e04c3fSmrg      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
193101e04c3fSmrg      enum brw_reg_type src1_type =
193201e04c3fSmrg         num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
193301e04c3fSmrg      unsigned src0_type_size = brw_reg_type_to_size(src0_type);
193401e04c3fSmrg      unsigned src1_type_size = brw_reg_type_to_size(src1_type);
193501e04c3fSmrg
193601e04c3fSmrg      ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
193701e04c3fSmrg               dst_type_size == 8 &&
193801e04c3fSmrg               (src0_type_size != 8 || src1_type_size != 8) &&
193901e04c3fSmrg               brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
194001e04c3fSmrg               "In Align16 exec size cannot exceed 2 with a QWord destination "
194101e04c3fSmrg               "and a non-QWord source");
194201e04c3fSmrg   }
194301e04c3fSmrg
194401e04c3fSmrg   /* The PRMs say that for CHV, BXT:
194501e04c3fSmrg    *
194601e04c3fSmrg    *    When source or destination datatype is 64b or operation is integer
194701e04c3fSmrg    *    DWord multiply, DepCtrl must not be used.
194801e04c3fSmrg    *
194901e04c3fSmrg    * We assume that the restriction applies to GLK as well.
195001e04c3fSmrg    */
19517ec681f3Smrg   if (is_double_precision &&
19527ec681f3Smrg       (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {
195301e04c3fSmrg      ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
195401e04c3fSmrg               brw_inst_no_dd_clear(devinfo, inst),
195501e04c3fSmrg               "DepCtrl is not allowed when the execution type is 64-bit");
195601e04c3fSmrg   }
195701e04c3fSmrg
195801e04c3fSmrg   return error_msg;
195901e04c3fSmrg}
196001e04c3fSmrg
19617ec681f3Smrgstatic struct string
19627ec681f3Smrginstruction_restrictions(const struct intel_device_info *devinfo,
19637ec681f3Smrg                         const brw_inst *inst)
196401e04c3fSmrg{
19657ec681f3Smrg   struct string error_msg = { .str = NULL, .len = 0 };
196601e04c3fSmrg
19677ec681f3Smrg   /* From Wa_1604601757:
19687ec681f3Smrg    *
19697ec681f3Smrg    * "When multiplying a DW and any lower precision integer, source modifier
19707ec681f3Smrg    *  is not supported."
19717ec681f3Smrg    */
19727ec681f3Smrg   if (devinfo->ver >= 12 &&
19737ec681f3Smrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) {
19747ec681f3Smrg      enum brw_reg_type exec_type = execution_type(devinfo, inst);
19757ec681f3Smrg      const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||
19767ec681f3Smrg         brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
19777ec681f3Smrg         !(brw_inst_src0_negate(devinfo, inst) ||
19787ec681f3Smrg           brw_inst_src0_abs(devinfo, inst));
19797ec681f3Smrg      const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||
19807ec681f3Smrg         brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||
19817ec681f3Smrg         !(brw_inst_src1_negate(devinfo, inst) ||
19827ec681f3Smrg           brw_inst_src1_abs(devinfo, inst));
19837ec681f3Smrg
19847ec681f3Smrg      ERROR_IF(!brw_reg_type_is_floating_point(exec_type) &&
19857ec681f3Smrg               type_sz(exec_type) == 4 && !(src0_valid && src1_valid),
19867ec681f3Smrg               "When multiplying a DW and any lower precision integer, source "
19877ec681f3Smrg               "modifier is not supported.");
19887ec681f3Smrg   }
198901e04c3fSmrg
19907ec681f3Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMP ||
19917ec681f3Smrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMPN) {
19927ec681f3Smrg      if (devinfo->ver <= 7) {
19937ec681f3Smrg         /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
19947ec681f3Smrg          * ISA) says:
19957ec681f3Smrg          *
19967ec681f3Smrg          *    Accumulator cannot be destination, implicit or explicit. The
19977ec681f3Smrg          *    destination must be a general register or the null register.
19987ec681f3Smrg          *
19997ec681f3Smrg          * Page 77 of the Haswell PRM Volume 2b contains the same text.  The
20007ec681f3Smrg          * 965G PRMs contain similar text.
20017ec681f3Smrg          *
20027ec681f3Smrg          * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
20037ec681f3Smrg          *
20047ec681f3Smrg          *    For the cmp and cmpn instructions, remove the accumulator
20057ec681f3Smrg          *    restrictions.
20067ec681f3Smrg          */
20077ec681f3Smrg         ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
20087ec681f3Smrg                  brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,
20097ec681f3Smrg                  "Accumulator cannot be destination, implicit or explicit.");
201001e04c3fSmrg      }
201101e04c3fSmrg
20127ec681f3Smrg      /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
20137ec681f3Smrg       * says:
20147ec681f3Smrg       *
20157ec681f3Smrg       *    If the destination is the null register, the {Switch} instruction
20167ec681f3Smrg       *    option must be used.
20177ec681f3Smrg       *
20187ec681f3Smrg       * Page 77 of the Haswell PRM Volume 2b contains the same text.
20197ec681f3Smrg       */
20207ec681f3Smrg      if (devinfo->ver == 7) {
20217ec681f3Smrg         ERROR_IF(dst_is_null(devinfo, inst) &&
20227ec681f3Smrg                  brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH,
20237ec681f3Smrg                  "If the destination is the null register, the {Switch} "
20247ec681f3Smrg                  "instruction option must be used.");
20257ec681f3Smrg      }
20267ec681f3Smrg   }
20277ec681f3Smrg
20287ec681f3Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
20297ec681f3Smrg      unsigned math_function = brw_inst_math_function(devinfo, inst);
20307ec681f3Smrg      switch (math_function) {
20317ec681f3Smrg      case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
20327ec681f3Smrg      case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
20337ec681f3Smrg      case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: {
20347ec681f3Smrg         /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
20357ec681f3Smrg          *    INT DIV function does not support source modifiers.
20367ec681f3Smrg          * Bspec 6647 extends it back to Ivy Bridge.
20377ec681f3Smrg          */
20387ec681f3Smrg         bool src0_valid = !brw_inst_src0_negate(devinfo, inst) &&
20397ec681f3Smrg                           !brw_inst_src0_abs(devinfo, inst);
20407ec681f3Smrg         bool src1_valid = !brw_inst_src1_negate(devinfo, inst) &&
20417ec681f3Smrg                           !brw_inst_src1_abs(devinfo, inst);
20427ec681f3Smrg         ERROR_IF(!src0_valid || !src1_valid,
20437ec681f3Smrg                  "INT DIV function does not support source modifiers.");
20447ec681f3Smrg         break;
20457ec681f3Smrg      }
20467ec681f3Smrg      default:
20477ec681f3Smrg         break;
20487ec681f3Smrg      }
20497ec681f3Smrg   }
20507ec681f3Smrg
20517ec681f3Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DP4A) {
20527ec681f3Smrg      /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says:
20537ec681f3Smrg       *
20547ec681f3Smrg       *    Only one of src0 or src1 operand may be an the (sic) accumulator
20557ec681f3Smrg       *    register (acc#).
20567ec681f3Smrg       */
20577ec681f3Smrg      ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst),
20587ec681f3Smrg               "Only one of src0 or src1 operand may be an accumulator "
20597ec681f3Smrg               "register (acc#).");
20607ec681f3Smrg
20617ec681f3Smrg   }
20627ec681f3Smrg
20637ec681f3Smrg   return error_msg;
20647ec681f3Smrg}
20657ec681f3Smrg
20667ec681f3Smrgstatic struct string
20677ec681f3Smrgsend_descriptor_restrictions(const struct intel_device_info *devinfo,
20687ec681f3Smrg                             const brw_inst *inst)
20697ec681f3Smrg{
20707ec681f3Smrg   struct string error_msg = { .str = NULL, .len = 0 };
20717ec681f3Smrg
20727ec681f3Smrg   if (inst_is_split_send(devinfo, inst)) {
20737ec681f3Smrg      /* We can only validate immediate descriptors */
20747ec681f3Smrg      if (brw_inst_send_sel_reg32_desc(devinfo, inst))
20757ec681f3Smrg         return error_msg;
20767ec681f3Smrg   } else if (inst_is_send(devinfo, inst)) {
20777ec681f3Smrg      /* We can only validate immediate descriptors */
20787ec681f3Smrg      if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE)
20797ec681f3Smrg         return error_msg;
20807ec681f3Smrg   } else {
20817ec681f3Smrg      return error_msg;
20827ec681f3Smrg   }
20837ec681f3Smrg
20847ec681f3Smrg   const uint32_t desc = brw_inst_send_desc(devinfo, inst);
20857ec681f3Smrg
20867ec681f3Smrg   switch (brw_inst_sfid(devinfo, inst)) {
20877ec681f3Smrg   case GFX12_SFID_TGM:
20887ec681f3Smrg   case GFX12_SFID_SLM:
20897ec681f3Smrg   case GFX12_SFID_UGM:
20907ec681f3Smrg      ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC");
20917ec681f3Smrg
20927ec681f3Smrg      ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) &&
20937ec681f3Smrg               lsc_msg_desc_transpose(devinfo, desc) &&
20947ec681f3Smrg               brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1,
20957ec681f3Smrg               "Transposed vectors are restricted to Exec_Mask = 1.");
20967ec681f3Smrg      break;
20977ec681f3Smrg
20987ec681f3Smrg   default:
20997ec681f3Smrg      break;
21007ec681f3Smrg   }
21017ec681f3Smrg
21027ec681f3Smrg   return error_msg;
21037ec681f3Smrg}
21047ec681f3Smrg
21057ec681f3Smrgbool
21067ec681f3Smrgbrw_validate_instruction(const struct intel_device_info *devinfo,
21077ec681f3Smrg                         const brw_inst *inst, int offset,
21087ec681f3Smrg                         struct disasm_info *disasm)
21097ec681f3Smrg{
21107ec681f3Smrg   struct string error_msg = { .str = NULL, .len = 0 };
21117ec681f3Smrg
21127ec681f3Smrg   if (is_unsupported_inst(devinfo, inst)) {
21137ec681f3Smrg      ERROR("Instruction not supported on this Gen");
21147ec681f3Smrg   } else {
21157ec681f3Smrg      CHECK(invalid_values);
21167ec681f3Smrg
21177ec681f3Smrg      if (error_msg.str == NULL) {
211801e04c3fSmrg         CHECK(sources_not_null);
211901e04c3fSmrg         CHECK(send_restrictions);
21209f464c52Smaya         CHECK(alignment_supported);
212101e04c3fSmrg         CHECK(general_restrictions_based_on_operand_types);
212201e04c3fSmrg         CHECK(general_restrictions_on_region_parameters);
21239f464c52Smaya         CHECK(special_restrictions_for_mixed_float_mode);
212401e04c3fSmrg         CHECK(region_alignment_rules);
212501e04c3fSmrg         CHECK(vector_immediate_restrictions);
212601e04c3fSmrg         CHECK(special_requirements_for_handling_double_precision_data_types);
21277ec681f3Smrg         CHECK(instruction_restrictions);
21287ec681f3Smrg         CHECK(send_descriptor_restrictions);
212901e04c3fSmrg      }
21307ec681f3Smrg   }
213101e04c3fSmrg
21327ec681f3Smrg   if (error_msg.str && disasm) {
21337ec681f3Smrg      disasm_insert_error(disasm, offset, error_msg.str);
21347ec681f3Smrg   }
21357ec681f3Smrg   free(error_msg.str);
21367ec681f3Smrg
21377ec681f3Smrg   return error_msg.len == 0;
21387ec681f3Smrg}
21397ec681f3Smrg
21407ec681f3Smrgbool
21417ec681f3Smrgbrw_validate_instructions(const struct intel_device_info *devinfo,
21427ec681f3Smrg                          const void *assembly, int start_offset, int end_offset,
21437ec681f3Smrg                          struct disasm_info *disasm)
21447ec681f3Smrg{
21457ec681f3Smrg   bool valid = true;
21467ec681f3Smrg
21477ec681f3Smrg   for (int src_offset = start_offset; src_offset < end_offset;) {
21487ec681f3Smrg      const brw_inst *inst = assembly + src_offset;
21497ec681f3Smrg      bool is_compact = brw_inst_cmpt_control(devinfo, inst);
21507ec681f3Smrg      unsigned inst_size = is_compact ? sizeof(brw_compact_inst)
21517ec681f3Smrg                                      : sizeof(brw_inst);
21527ec681f3Smrg      brw_inst uncompacted;
215301e04c3fSmrg
215401e04c3fSmrg      if (is_compact) {
21557ec681f3Smrg         brw_compact_inst *compacted = (void *)inst;
21567ec681f3Smrg         brw_uncompact_instruction(devinfo, &uncompacted, compacted);
21577ec681f3Smrg         inst = &uncompacted;
215801e04c3fSmrg      }
21597ec681f3Smrg
21607ec681f3Smrg      bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm);
21617ec681f3Smrg      valid = valid && v;
21627ec681f3Smrg
21637ec681f3Smrg      src_offset += inst_size;
216401e04c3fSmrg   }
216501e04c3fSmrg
216601e04c3fSmrg   return valid;
216701e04c3fSmrg}
2168