1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015-2019 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/** @file brw_eu_validate.c
25b8e80941Smrg *
26b8e80941Smrg * This file implements a pass that validates shader assembly.
27b8e80941Smrg *
28b8e80941Smrg * The restrictions implemented herein are intended to verify that instructions
29b8e80941Smrg * in shader assembly do not violate restrictions documented in the graphics
30b8e80941Smrg * programming reference manuals.
31b8e80941Smrg *
32b8e80941Smrg * The restrictions are difficult for humans to quickly verify due to their
33b8e80941Smrg * complexity and abundance.
34b8e80941Smrg *
35b8e80941Smrg * It is critical that this code is thoroughly unit tested because false
36b8e80941Smrg * results will lead developers astray, which is worse than having no validator
37b8e80941Smrg * at all. Functional changes to this file without corresponding unit tests (in
38b8e80941Smrg * test_eu_validate.cpp) will be rejected.
39b8e80941Smrg */
40b8e80941Smrg
41b8e80941Smrg#include "brw_eu.h"
42b8e80941Smrg
43b8e80941Smrg/* We're going to do lots of string concatenation, so this should help. */
44b8e80941Smrgstruct string {
45b8e80941Smrg   char *str;
46b8e80941Smrg   size_t len;
47b8e80941Smrg};
48b8e80941Smrg
49b8e80941Smrgstatic void
50b8e80941Smrgcat(struct string *dest, const struct string src)
51b8e80941Smrg{
52b8e80941Smrg   dest->str = realloc(dest->str, dest->len + src.len + 1);
53b8e80941Smrg   memcpy(dest->str + dest->len, src.str, src.len);
54b8e80941Smrg   dest->str[dest->len + src.len] = '\0';
55b8e80941Smrg   dest->len = dest->len + src.len;
56b8e80941Smrg}
57b8e80941Smrg#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
58b8e80941Smrg
59b8e80941Smrgstatic bool
60b8e80941Smrgcontains(const struct string haystack, const struct string needle)
61b8e80941Smrg{
62b8e80941Smrg   return haystack.str && memmem(haystack.str, haystack.len,
63b8e80941Smrg                                 needle.str, needle.len) != NULL;
64b8e80941Smrg}
65b8e80941Smrg#define CONTAINS(haystack, needle) \
66b8e80941Smrg   contains(haystack, (struct string){needle, strlen(needle)})
67b8e80941Smrg
68b8e80941Smrg#define error(str)   "\tERROR: " str "\n"
69b8e80941Smrg#define ERROR_INDENT "\t       "
70b8e80941Smrg
71b8e80941Smrg#define ERROR(msg) ERROR_IF(true, msg)
72b8e80941Smrg#define ERROR_IF(cond, msg)                             \
73b8e80941Smrg   do {                                                 \
74b8e80941Smrg      if ((cond) && !CONTAINS(error_msg, error(msg))) { \
75b8e80941Smrg         CAT(error_msg, error(msg));                    \
76b8e80941Smrg      }                                                 \
77b8e80941Smrg   } while(0)
78b8e80941Smrg
79b8e80941Smrg#define CHECK(func, args...)                             \
80b8e80941Smrg   do {                                                  \
81b8e80941Smrg      struct string __msg = func(devinfo, inst, ##args); \
82b8e80941Smrg      if (__msg.str) {                                   \
83b8e80941Smrg         cat(&error_msg, __msg);                         \
84b8e80941Smrg         free(__msg.str);                                \
85b8e80941Smrg      }                                                  \
86b8e80941Smrg   } while (0)
87b8e80941Smrg
88b8e80941Smrg#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
89b8e80941Smrg#define WIDTH(width)   (1 << (width))
90b8e80941Smrg
91b8e80941Smrgstatic bool
92b8e80941Smrginst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst)
93b8e80941Smrg{
94b8e80941Smrg   switch (brw_inst_opcode(devinfo, inst)) {
95b8e80941Smrg   case BRW_OPCODE_SEND:
96b8e80941Smrg   case BRW_OPCODE_SENDC:
97b8e80941Smrg   case BRW_OPCODE_SENDS:
98b8e80941Smrg   case BRW_OPCODE_SENDSC:
99b8e80941Smrg      return true;
100b8e80941Smrg   default:
101b8e80941Smrg      return false;
102b8e80941Smrg   }
103b8e80941Smrg}
104b8e80941Smrg
105b8e80941Smrgstatic bool
106b8e80941Smrginst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst)
107b8e80941Smrg{
108b8e80941Smrg   switch (brw_inst_opcode(devinfo, inst)) {
109b8e80941Smrg   case BRW_OPCODE_SENDS:
110b8e80941Smrg   case BRW_OPCODE_SENDSC:
111b8e80941Smrg      return true;
112b8e80941Smrg   default:
113b8e80941Smrg      return false;
114b8e80941Smrg   }
115b8e80941Smrg}
116b8e80941Smrg
117b8e80941Smrgstatic unsigned
118b8e80941Smrgsigned_type(unsigned type)
119b8e80941Smrg{
120b8e80941Smrg   switch (type) {
121b8e80941Smrg   case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;
122b8e80941Smrg   case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;
123b8e80941Smrg   case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;
124b8e80941Smrg   case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;
125b8e80941Smrg   default:                   return type;
126b8e80941Smrg   }
127b8e80941Smrg}
128b8e80941Smrg
129b8e80941Smrgstatic bool
130b8e80941Smrginst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst)
131b8e80941Smrg{
132b8e80941Smrg   unsigned dst_type = signed_type(brw_inst_dst_type(devinfo, inst));
133b8e80941Smrg   unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
134b8e80941Smrg
135b8e80941Smrg   if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
136b8e80941Smrg      /* FIXME: not strictly true */
137b8e80941Smrg      if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||
138b8e80941Smrg          brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||
139b8e80941Smrg          brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {
140b8e80941Smrg         return false;
141b8e80941Smrg      }
142b8e80941Smrg   } else if (brw_inst_src0_negate(devinfo, inst) ||
143b8e80941Smrg              brw_inst_src0_abs(devinfo, inst)) {
144b8e80941Smrg      return false;
145b8e80941Smrg   }
146b8e80941Smrg
147b8e80941Smrg   return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&
148b8e80941Smrg          brw_inst_saturate(devinfo, inst) == 0 &&
149b8e80941Smrg          dst_type == src_type;
150b8e80941Smrg}
151b8e80941Smrg
152b8e80941Smrgstatic bool
153b8e80941Smrgdst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
154b8e80941Smrg{
155b8e80941Smrg   return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
156b8e80941Smrg          brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
157b8e80941Smrg}
158b8e80941Smrg
159b8e80941Smrgstatic bool
160b8e80941Smrgsrc0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
161b8e80941Smrg{
162b8e80941Smrg   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
163b8e80941Smrg          brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
164b8e80941Smrg}
165b8e80941Smrg
166b8e80941Smrgstatic bool
167b8e80941Smrgsrc1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
168b8e80941Smrg{
169b8e80941Smrg   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
170b8e80941Smrg          brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
171b8e80941Smrg}
172b8e80941Smrg
173b8e80941Smrgstatic bool
174b8e80941Smrgsrc0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
175b8e80941Smrg{
176b8e80941Smrg   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
177b8e80941Smrg          (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
178b8e80941Smrg}
179b8e80941Smrg
180b8e80941Smrgstatic bool
181b8e80941Smrgsrc1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
182b8e80941Smrg{
183b8e80941Smrg   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
184b8e80941Smrg          (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
185b8e80941Smrg}
186b8e80941Smrg
187b8e80941Smrgstatic bool
188b8e80941Smrgsrc0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
189b8e80941Smrg{
190b8e80941Smrg   return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE;
191b8e80941Smrg}
192b8e80941Smrg
193b8e80941Smrgstatic bool
194b8e80941Smrgsrc0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst)
195b8e80941Smrg{
196b8e80941Smrg   return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
197b8e80941Smrg          brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
198b8e80941Smrg          brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
199b8e80941Smrg}
200b8e80941Smrg
201b8e80941Smrgstatic bool
202b8e80941Smrgsrc1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst)
203b8e80941Smrg{
204b8e80941Smrg   return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
205b8e80941Smrg          brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
206b8e80941Smrg          brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
207b8e80941Smrg}
208b8e80941Smrg
209b8e80941Smrgstatic unsigned
210b8e80941Smrgnum_sources_from_inst(const struct gen_device_info *devinfo,
211b8e80941Smrg                      const brw_inst *inst)
212b8e80941Smrg{
213b8e80941Smrg   const struct opcode_desc *desc =
214b8e80941Smrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
215b8e80941Smrg   unsigned math_function;
216b8e80941Smrg
217b8e80941Smrg   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
218b8e80941Smrg      math_function = brw_inst_math_function(devinfo, inst);
219b8e80941Smrg   } else if (devinfo->gen < 6 &&
220b8e80941Smrg              brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
221b8e80941Smrg      if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
222b8e80941Smrg         /* src1 must be a descriptor (including the information to determine
223b8e80941Smrg          * that the SEND is doing an extended math operation), but src0 can
224b8e80941Smrg          * actually be null since it serves as the source of the implicit GRF
225b8e80941Smrg          * to MRF move.
226b8e80941Smrg          *
227b8e80941Smrg          * If we stop using that functionality, we'll have to revisit this.
228b8e80941Smrg          */
229b8e80941Smrg         return 2;
230b8e80941Smrg      } else {
231b8e80941Smrg         /* Send instructions are allowed to have null sources since they use
232b8e80941Smrg          * the base_mrf field to specify which message register source.
233b8e80941Smrg          */
234b8e80941Smrg         return 0;
235b8e80941Smrg      }
236b8e80941Smrg   } else {
237b8e80941Smrg      assert(desc->nsrc < 4);
238b8e80941Smrg      return desc->nsrc;
239b8e80941Smrg   }
240b8e80941Smrg
241b8e80941Smrg   switch (math_function) {
242b8e80941Smrg   case BRW_MATH_FUNCTION_INV:
243b8e80941Smrg   case BRW_MATH_FUNCTION_LOG:
244b8e80941Smrg   case BRW_MATH_FUNCTION_EXP:
245b8e80941Smrg   case BRW_MATH_FUNCTION_SQRT:
246b8e80941Smrg   case BRW_MATH_FUNCTION_RSQ:
247b8e80941Smrg   case BRW_MATH_FUNCTION_SIN:
248b8e80941Smrg   case BRW_MATH_FUNCTION_COS:
249b8e80941Smrg   case BRW_MATH_FUNCTION_SINCOS:
250b8e80941Smrg   case GEN8_MATH_FUNCTION_INVM:
251b8e80941Smrg   case GEN8_MATH_FUNCTION_RSQRTM:
252b8e80941Smrg      return 1;
253b8e80941Smrg   case BRW_MATH_FUNCTION_FDIV:
254b8e80941Smrg   case BRW_MATH_FUNCTION_POW:
255b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
256b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
257b8e80941Smrg   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
258b8e80941Smrg      return 2;
259b8e80941Smrg   default:
260b8e80941Smrg      unreachable("not reached");
261b8e80941Smrg   }
262b8e80941Smrg}
263b8e80941Smrg
264b8e80941Smrgstatic struct string
265b8e80941Smrgsources_not_null(const struct gen_device_info *devinfo,
266b8e80941Smrg                 const brw_inst *inst)
267b8e80941Smrg{
268b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
269b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
270b8e80941Smrg
271b8e80941Smrg   /* Nothing to test. 3-src instructions can only have GRF sources, and
272b8e80941Smrg    * there's no bit to control the file.
273b8e80941Smrg    */
274b8e80941Smrg   if (num_sources == 3)
275b8e80941Smrg      return (struct string){};
276b8e80941Smrg
277b8e80941Smrg   /* Nothing to test.  Split sends can only encode a file in sources that are
278b8e80941Smrg    * allowed to be NULL.
279b8e80941Smrg    */
280b8e80941Smrg   if (inst_is_split_send(devinfo, inst))
281b8e80941Smrg      return (struct string){};
282b8e80941Smrg
283b8e80941Smrg   if (num_sources >= 1)
284b8e80941Smrg      ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
285b8e80941Smrg
286b8e80941Smrg   if (num_sources == 2)
287b8e80941Smrg      ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
288b8e80941Smrg
289b8e80941Smrg   return error_msg;
290b8e80941Smrg}
291b8e80941Smrg
292b8e80941Smrgstatic struct string
293b8e80941Smrgalignment_supported(const struct gen_device_info *devinfo,
294b8e80941Smrg                    const brw_inst *inst)
295b8e80941Smrg{
296b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
297b8e80941Smrg
298b8e80941Smrg   ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
299b8e80941Smrg            "Align16 not supported");
300b8e80941Smrg
301b8e80941Smrg   return error_msg;
302b8e80941Smrg}
303b8e80941Smrg
304b8e80941Smrgstatic bool
305b8e80941Smrginst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
306b8e80941Smrg{
307b8e80941Smrg   /* Check instructions that use implicit accumulator sources */
308b8e80941Smrg   switch (brw_inst_opcode(devinfo, inst)) {
309b8e80941Smrg   case BRW_OPCODE_MAC:
310b8e80941Smrg   case BRW_OPCODE_MACH:
311b8e80941Smrg   case BRW_OPCODE_SADA2:
312b8e80941Smrg      return true;
313b8e80941Smrg   }
314b8e80941Smrg
315b8e80941Smrg   /* FIXME: support 3-src instructions */
316b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
317b8e80941Smrg   assert(num_sources < 3);
318b8e80941Smrg
319b8e80941Smrg   return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
320b8e80941Smrg}
321b8e80941Smrg
322b8e80941Smrgstatic struct string
323b8e80941Smrgsend_restrictions(const struct gen_device_info *devinfo,
324b8e80941Smrg                  const brw_inst *inst)
325b8e80941Smrg{
326b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
327b8e80941Smrg
328b8e80941Smrg   if (inst_is_split_send(devinfo, inst)) {
329b8e80941Smrg      ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
330b8e80941Smrg               brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
331b8e80941Smrg               "src1 of split send must be a GRF or NULL");
332b8e80941Smrg
333b8e80941Smrg      ERROR_IF(brw_inst_eot(devinfo, inst) &&
334b8e80941Smrg               brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
335b8e80941Smrg               "send with EOT must use g112-g127");
336b8e80941Smrg      ERROR_IF(brw_inst_eot(devinfo, inst) &&
337b8e80941Smrg               brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
338b8e80941Smrg               brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
339b8e80941Smrg               "send with EOT must use g112-g127");
340b8e80941Smrg
341b8e80941Smrg      if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
342b8e80941Smrg         /* Assume minimums if we don't know */
343b8e80941Smrg         unsigned mlen = 1;
344b8e80941Smrg         if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
345b8e80941Smrg            const uint32_t desc = brw_inst_send_desc(devinfo, inst);
346b8e80941Smrg            mlen = brw_message_desc_mlen(devinfo, desc);
347b8e80941Smrg         }
348b8e80941Smrg
349b8e80941Smrg         unsigned ex_mlen = 1;
350b8e80941Smrg         if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
351b8e80941Smrg            const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst);
352b8e80941Smrg            ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
353b8e80941Smrg         }
354b8e80941Smrg         const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
355b8e80941Smrg         const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
356b8e80941Smrg         ERROR_IF((src0_reg_nr <= src1_reg_nr &&
357b8e80941Smrg                   src1_reg_nr < src0_reg_nr + mlen) ||
358b8e80941Smrg                  (src1_reg_nr <= src0_reg_nr &&
359b8e80941Smrg                   src0_reg_nr < src1_reg_nr + ex_mlen),
360b8e80941Smrg                   "split send payloads must not overlap");
361b8e80941Smrg      }
362b8e80941Smrg   } else if (inst_is_send(devinfo, inst)) {
363b8e80941Smrg      ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
364b8e80941Smrg               "send must use direct addressing");
365b8e80941Smrg
366b8e80941Smrg      if (devinfo->gen >= 7) {
367b8e80941Smrg         ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF");
368b8e80941Smrg         ERROR_IF(brw_inst_eot(devinfo, inst) &&
369b8e80941Smrg                  brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
370b8e80941Smrg                  "send with EOT must use g112-g127");
371b8e80941Smrg      }
372b8e80941Smrg
373b8e80941Smrg      if (devinfo->gen >= 8) {
374b8e80941Smrg         ERROR_IF(!dst_is_null(devinfo, inst) &&
375b8e80941Smrg                  (brw_inst_dst_da_reg_nr(devinfo, inst) +
376b8e80941Smrg                   brw_inst_rlen(devinfo, inst) > 127) &&
377b8e80941Smrg                  (brw_inst_src0_da_reg_nr(devinfo, inst) +
378b8e80941Smrg                   brw_inst_mlen(devinfo, inst) >
379b8e80941Smrg                   brw_inst_dst_da_reg_nr(devinfo, inst)),
380b8e80941Smrg                  "r127 must not be used for return address when there is "
381b8e80941Smrg                  "a src and dest overlap");
382b8e80941Smrg      }
383b8e80941Smrg   }
384b8e80941Smrg
385b8e80941Smrg   return error_msg;
386b8e80941Smrg}
387b8e80941Smrg
388b8e80941Smrgstatic bool
389b8e80941Smrgis_unsupported_inst(const struct gen_device_info *devinfo,
390b8e80941Smrg                    const brw_inst *inst)
391b8e80941Smrg{
392b8e80941Smrg   return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL;
393b8e80941Smrg}
394b8e80941Smrg
395b8e80941Smrg/**
396b8e80941Smrg * Returns whether a combination of two types would qualify as mixed float
397b8e80941Smrg * operation mode
398b8e80941Smrg */
399b8e80941Smrgstatic inline bool
400b8e80941Smrgtypes_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
401b8e80941Smrg{
402b8e80941Smrg   return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||
403b8e80941Smrg          (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);
404b8e80941Smrg}
405b8e80941Smrg
406b8e80941Smrgstatic enum brw_reg_type
407b8e80941Smrgexecution_type_for_type(enum brw_reg_type type)
408b8e80941Smrg{
409b8e80941Smrg   switch (type) {
410b8e80941Smrg   case BRW_REGISTER_TYPE_NF:
411b8e80941Smrg   case BRW_REGISTER_TYPE_DF:
412b8e80941Smrg   case BRW_REGISTER_TYPE_F:
413b8e80941Smrg   case BRW_REGISTER_TYPE_HF:
414b8e80941Smrg      return type;
415b8e80941Smrg
416b8e80941Smrg   case BRW_REGISTER_TYPE_VF:
417b8e80941Smrg      return BRW_REGISTER_TYPE_F;
418b8e80941Smrg
419b8e80941Smrg   case BRW_REGISTER_TYPE_Q:
420b8e80941Smrg   case BRW_REGISTER_TYPE_UQ:
421b8e80941Smrg      return BRW_REGISTER_TYPE_Q;
422b8e80941Smrg
423b8e80941Smrg   case BRW_REGISTER_TYPE_D:
424b8e80941Smrg   case BRW_REGISTER_TYPE_UD:
425b8e80941Smrg      return BRW_REGISTER_TYPE_D;
426b8e80941Smrg
427b8e80941Smrg   case BRW_REGISTER_TYPE_W:
428b8e80941Smrg   case BRW_REGISTER_TYPE_UW:
429b8e80941Smrg   case BRW_REGISTER_TYPE_B:
430b8e80941Smrg   case BRW_REGISTER_TYPE_UB:
431b8e80941Smrg   case BRW_REGISTER_TYPE_V:
432b8e80941Smrg   case BRW_REGISTER_TYPE_UV:
433b8e80941Smrg      return BRW_REGISTER_TYPE_W;
434b8e80941Smrg   }
435b8e80941Smrg   unreachable("not reached");
436b8e80941Smrg}
437b8e80941Smrg
438b8e80941Smrg/**
439b8e80941Smrg * Returns the execution type of an instruction \p inst
440b8e80941Smrg */
441b8e80941Smrgstatic enum brw_reg_type
442b8e80941Smrgexecution_type(const struct gen_device_info *devinfo, const brw_inst *inst)
443b8e80941Smrg{
444b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
445b8e80941Smrg   enum brw_reg_type src0_exec_type, src1_exec_type;
446b8e80941Smrg
447b8e80941Smrg   /* Execution data type is independent of destination data type, except in
448b8e80941Smrg    * mixed F/HF instructions.
449b8e80941Smrg    */
450b8e80941Smrg   enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst);
451b8e80941Smrg
452b8e80941Smrg   src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
453b8e80941Smrg   if (num_sources == 1) {
454b8e80941Smrg      if (src0_exec_type == BRW_REGISTER_TYPE_HF)
455b8e80941Smrg         return dst_exec_type;
456b8e80941Smrg      return src0_exec_type;
457b8e80941Smrg   }
458b8e80941Smrg
459b8e80941Smrg   src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
460b8e80941Smrg   if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
461b8e80941Smrg       types_are_mixed_float(src0_exec_type, dst_exec_type) ||
462b8e80941Smrg       types_are_mixed_float(src1_exec_type, dst_exec_type)) {
463b8e80941Smrg      return BRW_REGISTER_TYPE_F;
464b8e80941Smrg   }
465b8e80941Smrg
466b8e80941Smrg   if (src0_exec_type == src1_exec_type)
467b8e80941Smrg      return src0_exec_type;
468b8e80941Smrg
469b8e80941Smrg   /* Mixed operand types where one is float is float on Gen < 6
470b8e80941Smrg    * (and not allowed on later platforms)
471b8e80941Smrg    */
472b8e80941Smrg   if (devinfo->gen < 6 &&
473b8e80941Smrg       (src0_exec_type == BRW_REGISTER_TYPE_F ||
474b8e80941Smrg        src1_exec_type == BRW_REGISTER_TYPE_F))
475b8e80941Smrg      return BRW_REGISTER_TYPE_F;
476b8e80941Smrg
477b8e80941Smrg   if (src0_exec_type == BRW_REGISTER_TYPE_Q ||
478b8e80941Smrg       src1_exec_type == BRW_REGISTER_TYPE_Q)
479b8e80941Smrg      return BRW_REGISTER_TYPE_Q;
480b8e80941Smrg
481b8e80941Smrg   if (src0_exec_type == BRW_REGISTER_TYPE_D ||
482b8e80941Smrg       src1_exec_type == BRW_REGISTER_TYPE_D)
483b8e80941Smrg      return BRW_REGISTER_TYPE_D;
484b8e80941Smrg
485b8e80941Smrg   if (src0_exec_type == BRW_REGISTER_TYPE_W ||
486b8e80941Smrg       src1_exec_type == BRW_REGISTER_TYPE_W)
487b8e80941Smrg      return BRW_REGISTER_TYPE_W;
488b8e80941Smrg
489b8e80941Smrg   if (src0_exec_type == BRW_REGISTER_TYPE_DF ||
490b8e80941Smrg       src1_exec_type == BRW_REGISTER_TYPE_DF)
491b8e80941Smrg      return BRW_REGISTER_TYPE_DF;
492b8e80941Smrg
493b8e80941Smrg   unreachable("not reached");
494b8e80941Smrg}
495b8e80941Smrg
496b8e80941Smrg/**
497b8e80941Smrg * Returns whether a region is packed
498b8e80941Smrg *
499b8e80941Smrg * A region is packed if its elements are adjacent in memory, with no
500b8e80941Smrg * intervening space, no overlap, and no replicated values.
501b8e80941Smrg */
502b8e80941Smrgstatic bool
503b8e80941Smrgis_packed(unsigned vstride, unsigned width, unsigned hstride)
504b8e80941Smrg{
505b8e80941Smrg   if (vstride == width) {
506b8e80941Smrg      if (vstride == 1) {
507b8e80941Smrg         return hstride == 0;
508b8e80941Smrg      } else {
509b8e80941Smrg         return hstride == 1;
510b8e80941Smrg      }
511b8e80941Smrg   }
512b8e80941Smrg
513b8e80941Smrg   return false;
514b8e80941Smrg}
515b8e80941Smrg
516b8e80941Smrg/**
517b8e80941Smrg * Returns whether an instruction is an explicit or implicit conversion
518b8e80941Smrg * to/from half-float.
519b8e80941Smrg */
520b8e80941Smrgstatic bool
521b8e80941Smrgis_half_float_conversion(const struct gen_device_info *devinfo,
522b8e80941Smrg                         const brw_inst *inst)
523b8e80941Smrg{
524b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
525b8e80941Smrg
526b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
527b8e80941Smrg   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
528b8e80941Smrg
529b8e80941Smrg   if (dst_type != src0_type &&
530b8e80941Smrg       (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
531b8e80941Smrg      return true;
532b8e80941Smrg   } else if (num_sources > 1) {
533b8e80941Smrg      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
534b8e80941Smrg      return dst_type != src1_type &&
535b8e80941Smrg            (dst_type == BRW_REGISTER_TYPE_HF ||
536b8e80941Smrg             src1_type == BRW_REGISTER_TYPE_HF);
537b8e80941Smrg   }
538b8e80941Smrg
539b8e80941Smrg   return false;
540b8e80941Smrg}
541b8e80941Smrg
542b8e80941Smrg/*
543b8e80941Smrg * Returns whether an instruction is using mixed float operation mode
544b8e80941Smrg */
545b8e80941Smrgstatic bool
546b8e80941Smrgis_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst)
547b8e80941Smrg{
548b8e80941Smrg   if (devinfo->gen < 8)
549b8e80941Smrg      return false;
550b8e80941Smrg
551b8e80941Smrg   if (inst_is_send(devinfo, inst))
552b8e80941Smrg      return false;
553b8e80941Smrg
554b8e80941Smrg   unsigned opcode = brw_inst_opcode(devinfo, inst);
555b8e80941Smrg   const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
556b8e80941Smrg   if (desc->ndst == 0)
557b8e80941Smrg      return false;
558b8e80941Smrg
559b8e80941Smrg   /* FIXME: support 3-src instructions */
560b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
561b8e80941Smrg   assert(num_sources < 3);
562b8e80941Smrg
563b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
564b8e80941Smrg   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
565b8e80941Smrg
566b8e80941Smrg   if (num_sources == 1)
567b8e80941Smrg      return types_are_mixed_float(src0_type, dst_type);
568b8e80941Smrg
569b8e80941Smrg   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
570b8e80941Smrg
571b8e80941Smrg   return types_are_mixed_float(src0_type, src1_type) ||
572b8e80941Smrg          types_are_mixed_float(src0_type, dst_type) ||
573b8e80941Smrg          types_are_mixed_float(src1_type, dst_type);
574b8e80941Smrg}
575b8e80941Smrg
576b8e80941Smrg/**
577b8e80941Smrg * Returns whether an instruction is an explicit or implicit conversion
578b8e80941Smrg * to/from byte.
579b8e80941Smrg */
580b8e80941Smrgstatic bool
581b8e80941Smrgis_byte_conversion(const struct gen_device_info *devinfo,
582b8e80941Smrg                   const brw_inst *inst)
583b8e80941Smrg{
584b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
585b8e80941Smrg
586b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
587b8e80941Smrg   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
588b8e80941Smrg
589b8e80941Smrg   if (dst_type != src0_type &&
590b8e80941Smrg       (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
591b8e80941Smrg      return true;
592b8e80941Smrg   } else if (num_sources > 1) {
593b8e80941Smrg      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
594b8e80941Smrg      return dst_type != src1_type &&
595b8e80941Smrg            (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
596b8e80941Smrg   }
597b8e80941Smrg
598b8e80941Smrg   return false;
599b8e80941Smrg}
600b8e80941Smrg
601b8e80941Smrg/**
602b8e80941Smrg * Checks restrictions listed in "General Restrictions Based on Operand Types"
603b8e80941Smrg * in the "Register Region Restrictions" section.
604b8e80941Smrg */
605b8e80941Smrgstatic struct string
606b8e80941Smrggeneral_restrictions_based_on_operand_types(const struct gen_device_info *devinfo,
607b8e80941Smrg                                            const brw_inst *inst)
608b8e80941Smrg{
609b8e80941Smrg   const struct opcode_desc *desc =
610b8e80941Smrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
611b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
612b8e80941Smrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
613b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
614b8e80941Smrg
615b8e80941Smrg   if (devinfo->gen >= 11) {
616b8e80941Smrg      if (num_sources == 3) {
617b8e80941Smrg         ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
618b8e80941Smrg                  brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
619b8e80941Smrg                  "Byte data type is not supported for src1/2 register regioning. This includes "
620b8e80941Smrg                  "byte broadcast as well.");
621b8e80941Smrg      }
622b8e80941Smrg      if (num_sources == 2) {
623b8e80941Smrg         ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
624b8e80941Smrg                  "Byte data type is not supported for src1 register regioning. This includes "
625b8e80941Smrg                  "byte broadcast as well.");
626b8e80941Smrg      }
627b8e80941Smrg   }
628b8e80941Smrg
629b8e80941Smrg   if (num_sources == 3)
630b8e80941Smrg      return error_msg;
631b8e80941Smrg
632b8e80941Smrg   if (inst_is_send(devinfo, inst))
633b8e80941Smrg      return error_msg;
634b8e80941Smrg
635b8e80941Smrg   if (exec_size == 1)
636b8e80941Smrg      return error_msg;
637b8e80941Smrg
638b8e80941Smrg   if (desc->ndst == 0)
639b8e80941Smrg      return error_msg;
640b8e80941Smrg
641b8e80941Smrg   /* The PRMs say:
642b8e80941Smrg    *
643b8e80941Smrg    *    Where n is the largest element size in bytes for any source or
644b8e80941Smrg    *    destination operand type, ExecSize * n must be <= 64.
645b8e80941Smrg    *
646b8e80941Smrg    * But we do not attempt to enforce it, because it is implied by other
647b8e80941Smrg    * rules:
648b8e80941Smrg    *
649b8e80941Smrg    *    - that the destination stride must match the execution data type
650b8e80941Smrg    *    - sources may not span more than two adjacent GRF registers
651b8e80941Smrg    *    - destination may not span more than two adjacent GRF registers
652b8e80941Smrg    *
653b8e80941Smrg    * In fact, checking it would weaken testing of the other rules.
654b8e80941Smrg    */
655b8e80941Smrg
656b8e80941Smrg   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
657b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
658b8e80941Smrg   bool dst_type_is_byte =
659b8e80941Smrg      brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B ||
660b8e80941Smrg      brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB;
661b8e80941Smrg
662b8e80941Smrg   if (dst_type_is_byte) {
663b8e80941Smrg      if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
664b8e80941Smrg         if (!inst_is_raw_move(devinfo, inst))
665b8e80941Smrg            ERROR("Only raw MOV supports a packed-byte destination");
666b8e80941Smrg         return error_msg;
667b8e80941Smrg      }
668b8e80941Smrg   }
669b8e80941Smrg
670b8e80941Smrg   unsigned exec_type = execution_type(devinfo, inst);
671b8e80941Smrg   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
672b8e80941Smrg   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
673b8e80941Smrg
674b8e80941Smrg   /* On IVB/BYT, region parameters and execution size for DF are in terms of
675b8e80941Smrg    * 32-bit elements, so they are doubled. For evaluating the validity of an
676b8e80941Smrg    * instruction, we halve them.
677b8e80941Smrg    */
678b8e80941Smrg   if (devinfo->gen == 7 && !devinfo->is_haswell &&
679b8e80941Smrg       exec_type_size == 8 && dst_type_size == 4)
680b8e80941Smrg      dst_type_size = 8;
681b8e80941Smrg
682b8e80941Smrg   if (is_byte_conversion(devinfo, inst)) {
683b8e80941Smrg      /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
684b8e80941Smrg       *
685b8e80941Smrg       *    "There is no direct conversion from B/UB to DF or DF to B/UB.
686b8e80941Smrg       *     There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
687b8e80941Smrg       *
688b8e80941Smrg       * Even if these restrictions are listed for the MOV instruction, we
689b8e80941Smrg       * validate this more generally, since there is the possibility
690b8e80941Smrg       * of implicit conversions from other instructions.
691b8e80941Smrg       */
692b8e80941Smrg      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
693b8e80941Smrg      enum brw_reg_type src1_type = num_sources > 1 ?
694b8e80941Smrg                                    brw_inst_src1_type(devinfo, inst) : 0;
695b8e80941Smrg
696b8e80941Smrg      ERROR_IF(type_sz(dst_type) == 1 &&
697b8e80941Smrg               (type_sz(src0_type) == 8 ||
698b8e80941Smrg                (num_sources > 1 && type_sz(src1_type) == 8)),
699b8e80941Smrg               "There are no direct conversions between 64-bit types and B/UB");
700b8e80941Smrg
701b8e80941Smrg      ERROR_IF(type_sz(dst_type) == 8 &&
702b8e80941Smrg               (type_sz(src0_type) == 1 ||
703b8e80941Smrg                (num_sources > 1 && type_sz(src1_type) == 1)),
704b8e80941Smrg               "There are no direct conversions between 64-bit types and B/UB");
705b8e80941Smrg   }
706b8e80941Smrg
707b8e80941Smrg   if (is_half_float_conversion(devinfo, inst)) {
708b8e80941Smrg      /**
709b8e80941Smrg       * A helper to validate used in the validation of the following restriction
710b8e80941Smrg       * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
711b8e80941Smrg       *
712b8e80941Smrg       *    "There is no direct conversion from HF to DF or DF to HF.
713b8e80941Smrg       *     There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
714b8e80941Smrg       *
715b8e80941Smrg       * Even if these restrictions are listed for the MOV instruction, we
716b8e80941Smrg       * validate this more generally, since there is the possibility
717b8e80941Smrg       * of implicit conversions from other instructions, such us implicit
718b8e80941Smrg       * conversion from integer to HF with the ADD instruction in SKL+.
719b8e80941Smrg       */
720b8e80941Smrg      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
721b8e80941Smrg      enum brw_reg_type src1_type = num_sources > 1 ?
722b8e80941Smrg                                    brw_inst_src1_type(devinfo, inst) : 0;
723b8e80941Smrg      ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
724b8e80941Smrg               (type_sz(src0_type) == 8 ||
725b8e80941Smrg                (num_sources > 1 && type_sz(src1_type) == 8)),
726b8e80941Smrg               "There are no direct conversions between 64-bit types and HF");
727b8e80941Smrg
728b8e80941Smrg      ERROR_IF(type_sz(dst_type) == 8 &&
729b8e80941Smrg               (src0_type == BRW_REGISTER_TYPE_HF ||
730b8e80941Smrg                (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
731b8e80941Smrg               "There are no direct conversions between 64-bit types and HF");
732b8e80941Smrg
733b8e80941Smrg      /* From the BDW+ PRM:
734b8e80941Smrg       *
735b8e80941Smrg       *   "Conversion between Integer and HF (Half Float) must be
736b8e80941Smrg       *    DWord-aligned and strided by a DWord on the destination."
737b8e80941Smrg       *
738b8e80941Smrg       * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
739b8e80941Smrg       *
740b8e80941Smrg       *   "There is a relaxed alignment rule for word destinations. When
741b8e80941Smrg       *    the destination type is word (UW, W, HF), destination data types
742b8e80941Smrg       *    can be aligned to either the lowest word or the second lowest
743b8e80941Smrg       *    word of the execution channel. This means the destination data
744b8e80941Smrg       *    words can be either all in the even word locations or all in the
745b8e80941Smrg       *    odd word locations."
746b8e80941Smrg       *
747b8e80941Smrg       * We do not implement the second rule as is though, since empirical
748b8e80941Smrg       * testing shows inconsistencies:
749b8e80941Smrg       *   - It suggests that packed 16-bit is not allowed, which is not true.
750b8e80941Smrg       *   - It suggests that conversions from Q/DF to W (which need to be
751b8e80941Smrg       *     64-bit aligned on the destination) are not possible, which is
752b8e80941Smrg       *     not true.
753b8e80941Smrg       *
754b8e80941Smrg       * So from this rule we only validate the implication that conversions
755b8e80941Smrg       * from F to HF need to be DWord strided (except in Align1 mixed
756b8e80941Smrg       * float mode where packed fp16 destination is allowed so long as the
757b8e80941Smrg       * destination is oword-aligned).
758b8e80941Smrg       *
759b8e80941Smrg       * Finally, we only validate this for Align1 because Align16 always
760b8e80941Smrg       * requires packed destinations, so these restrictions can't possibly
761b8e80941Smrg       * apply to Align16 mode.
762b8e80941Smrg       */
763b8e80941Smrg      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
764b8e80941Smrg         if ((dst_type == BRW_REGISTER_TYPE_HF &&
765b8e80941Smrg              (brw_reg_type_is_integer(src0_type) ||
766b8e80941Smrg               (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
767b8e80941Smrg             (brw_reg_type_is_integer(dst_type) &&
768b8e80941Smrg              (src0_type == BRW_REGISTER_TYPE_HF ||
769b8e80941Smrg               (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
770b8e80941Smrg            ERROR_IF(dst_stride * dst_type_size != 4,
771b8e80941Smrg                     "Conversions between integer and half-float must be "
772b8e80941Smrg                     "strided by a DWord on the destination");
773b8e80941Smrg
774b8e80941Smrg            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
775b8e80941Smrg            ERROR_IF(subreg % 4 != 0,
776b8e80941Smrg                     "Conversions between integer and half-float must be "
777b8e80941Smrg                     "aligned to a DWord on the destination");
778b8e80941Smrg         } else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
779b8e80941Smrg                    dst_type == BRW_REGISTER_TYPE_HF) {
780b8e80941Smrg            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
781b8e80941Smrg            ERROR_IF(dst_stride != 2 &&
782b8e80941Smrg                     !(is_mixed_float(devinfo, inst) &&
783b8e80941Smrg                       dst_stride == 1 && subreg % 16 == 0),
784b8e80941Smrg                     "Conversions to HF must have either all words in even "
785b8e80941Smrg                     "word locations or all words in odd word locations or "
786b8e80941Smrg                     "be mixed-float with Oword-aligned packed destination");
787b8e80941Smrg         }
788b8e80941Smrg      }
789b8e80941Smrg   }
790b8e80941Smrg
791b8e80941Smrg   /* There are special regioning rules for mixed-float mode in CHV and SKL that
792b8e80941Smrg    * override the general rule for the ratio of sizes of the destination type
793b8e80941Smrg    * and the execution type. We will add validation for those in a later patch.
794b8e80941Smrg    */
795b8e80941Smrg   bool validate_dst_size_and_exec_size_ratio =
796b8e80941Smrg      !is_mixed_float(devinfo, inst) ||
797b8e80941Smrg      !(devinfo->is_cherryview || devinfo->gen >= 9);
798b8e80941Smrg
799b8e80941Smrg   if (validate_dst_size_and_exec_size_ratio &&
800b8e80941Smrg       exec_type_size > dst_type_size) {
801b8e80941Smrg      if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
802b8e80941Smrg         ERROR_IF(dst_stride * dst_type_size != exec_type_size,
803b8e80941Smrg                  "Destination stride must be equal to the ratio of the sizes "
804b8e80941Smrg                  "of the execution data type to the destination type");
805b8e80941Smrg      }
806b8e80941Smrg
807b8e80941Smrg      unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
808b8e80941Smrg
809b8e80941Smrg      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
810b8e80941Smrg          brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
811b8e80941Smrg         /* The i965 PRM says:
812b8e80941Smrg          *
813b8e80941Smrg          *    Implementation Restriction: The relaxed alignment rule for byte
814b8e80941Smrg          *    destination (#10.5) is not supported.
815b8e80941Smrg          */
816b8e80941Smrg         if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) {
817b8e80941Smrg            ERROR_IF(subreg % exec_type_size != 0 &&
818b8e80941Smrg                     subreg % exec_type_size != 1,
819b8e80941Smrg                     "Destination subreg must be aligned to the size of the "
820b8e80941Smrg                     "execution data type (or to the next lowest byte for byte "
821b8e80941Smrg                     "destinations)");
822b8e80941Smrg         } else {
823b8e80941Smrg            ERROR_IF(subreg % exec_type_size != 0,
824b8e80941Smrg                     "Destination subreg must be aligned to the size of the "
825b8e80941Smrg                     "execution data type");
826b8e80941Smrg         }
827b8e80941Smrg      }
828b8e80941Smrg   }
829b8e80941Smrg
830b8e80941Smrg   return error_msg;
831b8e80941Smrg}
832b8e80941Smrg
833b8e80941Smrg/**
834b8e80941Smrg * Checks restrictions listed in "General Restrictions on Regioning Parameters"
835b8e80941Smrg * in the "Register Region Restrictions" section.
836b8e80941Smrg */
837b8e80941Smrgstatic struct string
838b8e80941Smrggeneral_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
839b8e80941Smrg                                          const brw_inst *inst)
840b8e80941Smrg{
841b8e80941Smrg   const struct opcode_desc *desc =
842b8e80941Smrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
843b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
844b8e80941Smrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
845b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
846b8e80941Smrg
847b8e80941Smrg   if (num_sources == 3)
848b8e80941Smrg      return (struct string){};
849b8e80941Smrg
850b8e80941Smrg   /* Split sends don't have the bits in the instruction to encode regions so
851b8e80941Smrg    * there's nothing to check.
852b8e80941Smrg    */
853b8e80941Smrg   if (inst_is_split_send(devinfo, inst))
854b8e80941Smrg      return (struct string){};
855b8e80941Smrg
856b8e80941Smrg   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
857b8e80941Smrg      if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
858b8e80941Smrg         ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
859b8e80941Smrg                  "Destination Horizontal Stride must be 1");
860b8e80941Smrg
861b8e80941Smrg      if (num_sources >= 1) {
862b8e80941Smrg         if (devinfo->is_haswell || devinfo->gen >= 8) {
863b8e80941Smrg            ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
864b8e80941Smrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
865b8e80941Smrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
866b8e80941Smrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
867b8e80941Smrg                     "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
868b8e80941Smrg         } else {
869b8e80941Smrg            ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
870b8e80941Smrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
871b8e80941Smrg                     brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
872b8e80941Smrg                     "In Align16 mode, only VertStride of 0 or 4 is allowed");
873b8e80941Smrg         }
874b8e80941Smrg      }
875b8e80941Smrg
876b8e80941Smrg      if (num_sources == 2) {
877b8e80941Smrg         if (devinfo->is_haswell || devinfo->gen >= 8) {
878b8e80941Smrg            ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
879b8e80941Smrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
880b8e80941Smrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
881b8e80941Smrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
882b8e80941Smrg                     "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
883b8e80941Smrg         } else {
884b8e80941Smrg            ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&
885b8e80941Smrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
886b8e80941Smrg                     brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
887b8e80941Smrg                     "In Align16 mode, only VertStride of 0 or 4 is allowed");
888b8e80941Smrg         }
889b8e80941Smrg      }
890b8e80941Smrg
891b8e80941Smrg      return error_msg;
892b8e80941Smrg   }
893b8e80941Smrg
894b8e80941Smrg   for (unsigned i = 0; i < num_sources; i++) {
895b8e80941Smrg      unsigned vstride, width, hstride, element_size, subreg;
896b8e80941Smrg      enum brw_reg_type type;
897b8e80941Smrg
898b8e80941Smrg#define DO_SRC(n)                                                              \
899b8e80941Smrg      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
900b8e80941Smrg          BRW_IMMEDIATE_VALUE)                                                 \
901b8e80941Smrg         continue;                                                             \
902b8e80941Smrg                                                                               \
903b8e80941Smrg      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
904b8e80941Smrg      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
905b8e80941Smrg      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
906b8e80941Smrg      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
907b8e80941Smrg      element_size = brw_reg_type_to_size(type);                               \
908b8e80941Smrg      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
909b8e80941Smrg
910b8e80941Smrg      if (i == 0) {
911b8e80941Smrg         DO_SRC(0);
912b8e80941Smrg      } else {
913b8e80941Smrg         DO_SRC(1);
914b8e80941Smrg      }
915b8e80941Smrg#undef DO_SRC
916b8e80941Smrg
917b8e80941Smrg      /* On IVB/BYT, region parameters and execution size for DF are in terms of
918b8e80941Smrg       * 32-bit elements, so they are doubled. For evaluating the validity of an
919b8e80941Smrg       * instruction, we halve them.
920b8e80941Smrg       */
921b8e80941Smrg      if (devinfo->gen == 7 && !devinfo->is_haswell &&
922b8e80941Smrg          element_size == 8)
923b8e80941Smrg         element_size = 4;
924b8e80941Smrg
925b8e80941Smrg      /* ExecSize must be greater than or equal to Width. */
926b8e80941Smrg      ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
927b8e80941Smrg                                  "to Width");
928b8e80941Smrg
929b8e80941Smrg      /* If ExecSize = Width and HorzStride ≠ 0,
930b8e80941Smrg       * VertStride must be set to Width * HorzStride.
931b8e80941Smrg       */
932b8e80941Smrg      if (exec_size == width && hstride != 0) {
933b8e80941Smrg         ERROR_IF(vstride != width * hstride,
934b8e80941Smrg                  "If ExecSize = Width and HorzStride ≠ 0, "
935b8e80941Smrg                  "VertStride must be set to Width * HorzStride");
936b8e80941Smrg      }
937b8e80941Smrg
938b8e80941Smrg      /* If Width = 1, HorzStride must be 0 regardless of the values of
939b8e80941Smrg       * ExecSize and VertStride.
940b8e80941Smrg       */
941b8e80941Smrg      if (width == 1) {
942b8e80941Smrg         ERROR_IF(hstride != 0,
943b8e80941Smrg                  "If Width = 1, HorzStride must be 0 regardless "
944b8e80941Smrg                  "of the values of ExecSize and VertStride");
945b8e80941Smrg      }
946b8e80941Smrg
947b8e80941Smrg      /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
948b8e80941Smrg      if (exec_size == 1 && width == 1) {
949b8e80941Smrg         ERROR_IF(vstride != 0 || hstride != 0,
950b8e80941Smrg                  "If ExecSize = Width = 1, both VertStride "
951b8e80941Smrg                  "and HorzStride must be 0");
952b8e80941Smrg      }
953b8e80941Smrg
954b8e80941Smrg      /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
955b8e80941Smrg       * value of ExecSize.
956b8e80941Smrg       */
957b8e80941Smrg      if (vstride == 0 && hstride == 0) {
958b8e80941Smrg         ERROR_IF(width != 1,
959b8e80941Smrg                  "If VertStride = HorzStride = 0, Width must be "
960b8e80941Smrg                  "1 regardless of the value of ExecSize");
961b8e80941Smrg      }
962b8e80941Smrg
963b8e80941Smrg      /* VertStride must be used to cross GRF register boundaries. This rule
964b8e80941Smrg       * implies that elements within a 'Width' cannot cross GRF boundaries.
965b8e80941Smrg       */
966b8e80941Smrg      const uint64_t mask = (1ULL << element_size) - 1;
967b8e80941Smrg      unsigned rowbase = subreg;
968b8e80941Smrg
969b8e80941Smrg      for (int y = 0; y < exec_size / width; y++) {
970b8e80941Smrg         uint64_t access_mask = 0;
971b8e80941Smrg         unsigned offset = rowbase;
972b8e80941Smrg
973b8e80941Smrg         for (int x = 0; x < width; x++) {
974b8e80941Smrg            access_mask |= mask << offset;
975b8e80941Smrg            offset += hstride * element_size;
976b8e80941Smrg         }
977b8e80941Smrg
978b8e80941Smrg         rowbase += vstride * element_size;
979b8e80941Smrg
980b8e80941Smrg         if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
981b8e80941Smrg            ERROR("VertStride must be used to cross GRF register boundaries");
982b8e80941Smrg            break;
983b8e80941Smrg         }
984b8e80941Smrg      }
985b8e80941Smrg   }
986b8e80941Smrg
987b8e80941Smrg   /* Dst.HorzStride must not be 0. */
988b8e80941Smrg   if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
989b8e80941Smrg      ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
990b8e80941Smrg               "Destination Horizontal Stride must not be 0");
991b8e80941Smrg   }
992b8e80941Smrg
993b8e80941Smrg   return error_msg;
994b8e80941Smrg}
995b8e80941Smrg
996b8e80941Smrgstatic struct string
997b8e80941Smrgspecial_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo,
998b8e80941Smrg                                          const brw_inst *inst)
999b8e80941Smrg{
1000b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
1001b8e80941Smrg
1002b8e80941Smrg   const unsigned opcode = brw_inst_opcode(devinfo, inst);
1003b8e80941Smrg   const unsigned num_sources = num_sources_from_inst(devinfo, inst);
1004b8e80941Smrg   if (num_sources >= 3)
1005b8e80941Smrg      return error_msg;
1006b8e80941Smrg
1007b8e80941Smrg   if (!is_mixed_float(devinfo, inst))
1008b8e80941Smrg      return error_msg;
1009b8e80941Smrg
1010b8e80941Smrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1011b8e80941Smrg   bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
1012b8e80941Smrg
1013b8e80941Smrg   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1014b8e80941Smrg   enum brw_reg_type src1_type = num_sources > 1 ?
1015b8e80941Smrg                                 brw_inst_src1_type(devinfo, inst) : 0;
1016b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1017b8e80941Smrg
1018b8e80941Smrg   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1019b8e80941Smrg   bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1020b8e80941Smrg
1021b8e80941Smrg   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1022b8e80941Smrg    * Float Operations:
1023b8e80941Smrg    *
1024b8e80941Smrg    *    "Indirect addressing on source is not supported when source and
1025b8e80941Smrg    *     destination data types are mixed float."
1026b8e80941Smrg    */
1027b8e80941Smrg   ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
1028b8e80941Smrg            (num_sources > 1 &&
1029b8e80941Smrg             brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
1030b8e80941Smrg            "Indirect addressing on source is not supported when source and "
1031b8e80941Smrg            "destination data types are mixed float");
1032b8e80941Smrg
1033b8e80941Smrg   /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1034b8e80941Smrg    * Float Operations:
1035b8e80941Smrg    *
1036b8e80941Smrg    *    "No SIMD16 in mixed mode when destination is f32. Instruction
1037b8e80941Smrg    *     execution size must be no more than 8."
1038b8e80941Smrg    */
1039b8e80941Smrg   ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,
1040b8e80941Smrg            "Mixed float mode with 32-bit float destination is limited "
1041b8e80941Smrg            "to SIMD8");
1042b8e80941Smrg
1043b8e80941Smrg   if (is_align16) {
1044b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1045b8e80941Smrg       * Float Operations:
1046b8e80941Smrg       *
1047b8e80941Smrg       *   "In Align16 mode, when half float and float data types are mixed
1048b8e80941Smrg       *    between source operands OR between source and destination operands,
1049b8e80941Smrg       *    the register content are assumed to be packed."
1050b8e80941Smrg       *
1051b8e80941Smrg       * Since Align16 doesn't have a concept of horizontal stride (or width),
1052b8e80941Smrg       * it means that vertical stride must always be 4, since 0 and 2 would
1053b8e80941Smrg       * lead to replicated data, and any other value is disallowed in Align16.
1054b8e80941Smrg       */
1055b8e80941Smrg      ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1056b8e80941Smrg               "Align16 mixed float mode assumes packed data (vstride must be 4");
1057b8e80941Smrg
1058b8e80941Smrg      ERROR_IF(num_sources >= 2 &&
1059b8e80941Smrg               brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1060b8e80941Smrg               "Align16 mixed float mode assumes packed data (vstride must be 4");
1061b8e80941Smrg
1062b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1063b8e80941Smrg       * Float Operations:
1064b8e80941Smrg       *
1065b8e80941Smrg       *   "For Align16 mixed mode, both input and output packed f16 data
1066b8e80941Smrg       *    must be oword aligned, no oword crossing in packed f16."
1067b8e80941Smrg       *
1068b8e80941Smrg       * The previous rule requires that Align16 operands are always packed,
1069b8e80941Smrg       * and since there is only one bit for Align16 subnr, which represents
1070b8e80941Smrg       * offsets 0B and 16B, this rule is always enforced and we don't need to
1071b8e80941Smrg       * validate it.
1072b8e80941Smrg       */
1073b8e80941Smrg
1074b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1075b8e80941Smrg       * Float Operations:
1076b8e80941Smrg       *
1077b8e80941Smrg       *    "No SIMD16 in mixed mode when destination is packed f16 for both
1078b8e80941Smrg       *     Align1 and Align16."
1079b8e80941Smrg       *
1080b8e80941Smrg       * And:
1081b8e80941Smrg       *
1082b8e80941Smrg       *   "In Align16 mode, when half float and float data types are mixed
1083b8e80941Smrg       *    between source operands OR between source and destination operands,
1084b8e80941Smrg       *    the register content are assumed to be packed."
1085b8e80941Smrg       *
1086b8e80941Smrg       * Which implies that SIMD16 is not available in Align16. This is further
1087b8e80941Smrg       * confirmed by:
1088b8e80941Smrg       *
1089b8e80941Smrg       *    "For Align16 mixed mode, both input and output packed f16 data
1090b8e80941Smrg       *     must be oword aligned, no oword crossing in packed f16"
1091b8e80941Smrg       *
1092b8e80941Smrg       * Since oword-aligned packed f16 data would cross oword boundaries when
1093b8e80941Smrg       * the execution size is larger than 8.
1094b8e80941Smrg       */
1095b8e80941Smrg      ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1096b8e80941Smrg
1097b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1098b8e80941Smrg       * Float Operations:
1099b8e80941Smrg       *
1100b8e80941Smrg       *    "No accumulator read access for Align16 mixed float."
1101b8e80941Smrg       */
1102b8e80941Smrg      ERROR_IF(inst_uses_src_acc(devinfo, inst),
1103b8e80941Smrg               "No accumulator read access for Align16 mixed float");
1104b8e80941Smrg   } else {
1105b8e80941Smrg      assert(!is_align16);
1106b8e80941Smrg
1107b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1108b8e80941Smrg       * Float Operations:
1109b8e80941Smrg       *
1110b8e80941Smrg       *    "No SIMD16 in mixed mode when destination is packed f16 for both
1111b8e80941Smrg       *     Align1 and Align16."
1112b8e80941Smrg       */
1113b8e80941Smrg      ERROR_IF(exec_size > 8 && dst_is_packed &&
1114b8e80941Smrg               dst_type == BRW_REGISTER_TYPE_HF,
1115b8e80941Smrg               "Align1 mixed float mode is limited to SIMD8 when destination "
1116b8e80941Smrg               "is packed half-float");
1117b8e80941Smrg
1118b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1119b8e80941Smrg       * Float Operations:
1120b8e80941Smrg       *
1121b8e80941Smrg       *    "Math operations for mixed mode:
1122b8e80941Smrg       *     - In Align1, f16 inputs need to be strided"
1123b8e80941Smrg       */
1124b8e80941Smrg      if (opcode == BRW_OPCODE_MATH) {
1125b8e80941Smrg         if (src0_type == BRW_REGISTER_TYPE_HF) {
1126b8e80941Smrg            ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
1127b8e80941Smrg                     "Align1 mixed mode math needs strided half-float inputs");
1128b8e80941Smrg         }
1129b8e80941Smrg
1130b8e80941Smrg         if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {
1131b8e80941Smrg            ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
1132b8e80941Smrg                     "Align1 mixed mode math needs strided half-float inputs");
1133b8e80941Smrg         }
1134b8e80941Smrg      }
1135b8e80941Smrg
1136b8e80941Smrg      if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {
1137b8e80941Smrg         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1138b8e80941Smrg          * Float Operations:
1139b8e80941Smrg          *
1140b8e80941Smrg          *    "In Align1, destination stride can be smaller than execution
1141b8e80941Smrg          *     type. When destination is stride of 1, 16 bit packed data is
1142b8e80941Smrg          *     updated on the destination. However, output packed f16 data
1143b8e80941Smrg          *     must be oword aligned, no oword crossing in packed f16."
1144b8e80941Smrg          *
1145b8e80941Smrg          * The requirement of not crossing oword boundaries for 16-bit oword
1146b8e80941Smrg          * aligned data means that execution size is limited to 8.
1147b8e80941Smrg          */
1148b8e80941Smrg         unsigned subreg;
1149b8e80941Smrg         if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
1150b8e80941Smrg            subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1151b8e80941Smrg         else
1152b8e80941Smrg            subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
1153b8e80941Smrg         ERROR_IF(subreg % 16 != 0,
1154b8e80941Smrg                  "Align1 mixed mode packed half-float output must be "
1155b8e80941Smrg                  "oword aligned");
1156b8e80941Smrg         ERROR_IF(exec_size > 8,
1157b8e80941Smrg                  "Align1 mixed mode packed half-float output must not "
1158b8e80941Smrg                  "cross oword boundaries (max exec size is 8)");
1159b8e80941Smrg
1160b8e80941Smrg         /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1161b8e80941Smrg          * Float Operations:
1162b8e80941Smrg          *
1163b8e80941Smrg          *    "When source is float or half float from accumulator register and
1164b8e80941Smrg          *     destination is half float with a stride of 1, the source must
1165b8e80941Smrg          *     register aligned. i.e., source must have offset zero."
1166b8e80941Smrg          *
1167b8e80941Smrg          * Align16 mixed float mode doesn't allow accumulator access on sources,
1168b8e80941Smrg          * so we only need to check this for Align1.
1169b8e80941Smrg          */
1170b8e80941Smrg         if (src0_is_acc(devinfo, inst) &&
1171b8e80941Smrg             (src0_type == BRW_REGISTER_TYPE_F ||
1172b8e80941Smrg              src0_type == BRW_REGISTER_TYPE_HF)) {
1173b8e80941Smrg            ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1174b8e80941Smrg                     "Mixed float mode requires register-aligned accumulator "
1175b8e80941Smrg                     "source reads when destination is packed half-float");
1176b8e80941Smrg
1177b8e80941Smrg         }
1178b8e80941Smrg
1179b8e80941Smrg         if (num_sources > 1 &&
1180b8e80941Smrg             src1_is_acc(devinfo, inst) &&
1181b8e80941Smrg             (src1_type == BRW_REGISTER_TYPE_F ||
1182b8e80941Smrg              src1_type == BRW_REGISTER_TYPE_HF)) {
1183b8e80941Smrg            ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1184b8e80941Smrg                     "Mixed float mode requires register-aligned accumulator "
1185b8e80941Smrg                     "source reads when destination is packed half-float");
1186b8e80941Smrg         }
1187b8e80941Smrg      }
1188b8e80941Smrg
1189b8e80941Smrg      /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1190b8e80941Smrg       * Float Operations:
1191b8e80941Smrg       *
1192b8e80941Smrg       *    "No swizzle is allowed when an accumulator is used as an implicit
1193b8e80941Smrg       *     source or an explicit source in an instruction. i.e. when
1194b8e80941Smrg       *     destination is half float with an implicit accumulator source,
1195b8e80941Smrg       *     destination stride needs to be 2."
1196b8e80941Smrg       *
1197b8e80941Smrg       * FIXME: it is not quite clear what the first sentence actually means
1198b8e80941Smrg       *        or its link to the implication described after it, so we only
1199b8e80941Smrg       *        validate the explicit implication, which is clearly described.
1200b8e80941Smrg       */
1201b8e80941Smrg      if (dst_type == BRW_REGISTER_TYPE_HF &&
1202b8e80941Smrg          inst_uses_src_acc(devinfo, inst)) {
1203b8e80941Smrg         ERROR_IF(dst_stride != 2,
1204b8e80941Smrg                  "Mixed float mode with implicit/explicit accumulator "
1205b8e80941Smrg                  "source and half-float destination requires a stride "
1206b8e80941Smrg                  "of 2 on the destination");
1207b8e80941Smrg      }
1208b8e80941Smrg   }
1209b8e80941Smrg
1210b8e80941Smrg   return error_msg;
1211b8e80941Smrg}
1212b8e80941Smrg
1213b8e80941Smrg/**
1214b8e80941Smrg * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1215b8e80941Smrg *
1216b8e80941Smrg * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1217b8e80941Smrg * a bitmask of bytes accessed by the region.
1218b8e80941Smrg *
1219b8e80941Smrg * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1220b8e80941Smrg * instruction would be
1221b8e80941Smrg *
1222b8e80941Smrg *    access_mask[0] = 0x00000000000000F0
1223b8e80941Smrg *    access_mask[1] = 0x000000000000F000
1224b8e80941Smrg *    access_mask[2] = 0x0000000000F00000
1225b8e80941Smrg *    access_mask[3] = 0x00000000F0000000
1226b8e80941Smrg *    access_mask[4-31] = 0
1227b8e80941Smrg *
1228b8e80941Smrg * because the first execution channel accesses bytes 7-4 and the second
1229b8e80941Smrg * execution channel accesses bytes 15-12, etc.
1230b8e80941Smrg */
1231b8e80941Smrgstatic void
1232b8e80941Smrgalign1_access_mask(uint64_t access_mask[static 32],
1233b8e80941Smrg                   unsigned exec_size, unsigned element_size, unsigned subreg,
1234b8e80941Smrg                   unsigned vstride, unsigned width, unsigned hstride)
1235b8e80941Smrg{
1236b8e80941Smrg   const uint64_t mask = (1ULL << element_size) - 1;
1237b8e80941Smrg   unsigned rowbase = subreg;
1238b8e80941Smrg   unsigned element = 0;
1239b8e80941Smrg
1240b8e80941Smrg   for (int y = 0; y < exec_size / width; y++) {
1241b8e80941Smrg      unsigned offset = rowbase;
1242b8e80941Smrg
1243b8e80941Smrg      for (int x = 0; x < width; x++) {
1244b8e80941Smrg         access_mask[element++] = mask << offset;
1245b8e80941Smrg         offset += hstride * element_size;
1246b8e80941Smrg      }
1247b8e80941Smrg
1248b8e80941Smrg      rowbase += vstride * element_size;
1249b8e80941Smrg   }
1250b8e80941Smrg
1251b8e80941Smrg   assert(element == 0 || element == exec_size);
1252b8e80941Smrg}
1253b8e80941Smrg
1254b8e80941Smrg/**
1255b8e80941Smrg * Returns the number of registers accessed according to the \p access_mask
1256b8e80941Smrg */
1257b8e80941Smrgstatic int
1258b8e80941Smrgregisters_read(const uint64_t access_mask[static 32])
1259b8e80941Smrg{
1260b8e80941Smrg   int regs_read = 0;
1261b8e80941Smrg
1262b8e80941Smrg   for (unsigned i = 0; i < 32; i++) {
1263b8e80941Smrg      if (access_mask[i] > 0xFFFFFFFF) {
1264b8e80941Smrg         return 2;
1265b8e80941Smrg      } else if (access_mask[i]) {
1266b8e80941Smrg         regs_read = 1;
1267b8e80941Smrg      }
1268b8e80941Smrg   }
1269b8e80941Smrg
1270b8e80941Smrg   return regs_read;
1271b8e80941Smrg}
1272b8e80941Smrg
1273b8e80941Smrg/**
1274b8e80941Smrg * Checks restrictions listed in "Region Alignment Rules" in the "Register
1275b8e80941Smrg * Region Restrictions" section.
1276b8e80941Smrg */
1277b8e80941Smrgstatic struct string
1278b8e80941Smrgregion_alignment_rules(const struct gen_device_info *devinfo,
1279b8e80941Smrg                       const brw_inst *inst)
1280b8e80941Smrg{
1281b8e80941Smrg   const struct opcode_desc *desc =
1282b8e80941Smrg      brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
1283b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
1284b8e80941Smrg   unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1285b8e80941Smrg   uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1286b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
1287b8e80941Smrg
1288b8e80941Smrg   if (num_sources == 3)
1289b8e80941Smrg      return (struct string){};
1290b8e80941Smrg
1291b8e80941Smrg   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
1292b8e80941Smrg      return (struct string){};
1293b8e80941Smrg
1294b8e80941Smrg   if (inst_is_send(devinfo, inst))
1295b8e80941Smrg      return (struct string){};
1296b8e80941Smrg
1297b8e80941Smrg   memset(dst_access_mask, 0, sizeof(dst_access_mask));
1298b8e80941Smrg   memset(src0_access_mask, 0, sizeof(src0_access_mask));
1299b8e80941Smrg   memset(src1_access_mask, 0, sizeof(src1_access_mask));
1300b8e80941Smrg
1301b8e80941Smrg   for (unsigned i = 0; i < num_sources; i++) {
1302b8e80941Smrg      unsigned vstride, width, hstride, element_size, subreg;
1303b8e80941Smrg      enum brw_reg_type type;
1304b8e80941Smrg
1305b8e80941Smrg      /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1306b8e80941Smrg       * GRF registers.
1307b8e80941Smrg       */
1308b8e80941Smrg
1309b8e80941Smrg#define DO_SRC(n)                                                              \
1310b8e80941Smrg      if (brw_inst_src ## n ## _address_mode(devinfo, inst) !=                 \
1311b8e80941Smrg          BRW_ADDRESS_DIRECT)                                                  \
1312b8e80941Smrg         continue;                                                             \
1313b8e80941Smrg                                                                               \
1314b8e80941Smrg      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1315b8e80941Smrg          BRW_IMMEDIATE_VALUE)                                                 \
1316b8e80941Smrg         continue;                                                             \
1317b8e80941Smrg                                                                               \
1318b8e80941Smrg      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1319b8e80941Smrg      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1320b8e80941Smrg      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1321b8e80941Smrg      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1322b8e80941Smrg      element_size = brw_reg_type_to_size(type);                               \
1323b8e80941Smrg      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1324b8e80941Smrg      align1_access_mask(src ## n ## _access_mask,                             \
1325b8e80941Smrg                         exec_size, element_size, subreg,                      \
1326b8e80941Smrg                         vstride, width, hstride)
1327b8e80941Smrg
1328b8e80941Smrg      if (i == 0) {
1329b8e80941Smrg         DO_SRC(0);
1330b8e80941Smrg      } else {
1331b8e80941Smrg         DO_SRC(1);
1332b8e80941Smrg      }
1333b8e80941Smrg#undef DO_SRC
1334b8e80941Smrg
1335b8e80941Smrg      unsigned num_vstride = exec_size / width;
1336b8e80941Smrg      unsigned num_hstride = width;
1337b8e80941Smrg      unsigned vstride_elements = (num_vstride - 1) * vstride;
1338b8e80941Smrg      unsigned hstride_elements = (num_hstride - 1) * hstride;
1339b8e80941Smrg      unsigned offset = (vstride_elements + hstride_elements) * element_size +
1340b8e80941Smrg                        subreg;
1341b8e80941Smrg      ERROR_IF(offset >= 64,
1342b8e80941Smrg               "A source cannot span more than 2 adjacent GRF registers");
1343b8e80941Smrg   }
1344b8e80941Smrg
1345b8e80941Smrg   if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1346b8e80941Smrg      return error_msg;
1347b8e80941Smrg
1348b8e80941Smrg   unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1349b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1350b8e80941Smrg   unsigned element_size = brw_reg_type_to_size(dst_type);
1351b8e80941Smrg   unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1352b8e80941Smrg   unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1353b8e80941Smrg   ERROR_IF(offset >= 64,
1354b8e80941Smrg            "A destination cannot span more than 2 adjacent GRF registers");
1355b8e80941Smrg
1356b8e80941Smrg   if (error_msg.str)
1357b8e80941Smrg      return error_msg;
1358b8e80941Smrg
1359b8e80941Smrg   /* On IVB/BYT, region parameters and execution size for DF are in terms of
1360b8e80941Smrg    * 32-bit elements, so they are doubled. For evaluating the validity of an
1361b8e80941Smrg    * instruction, we halve them.
1362b8e80941Smrg    */
1363b8e80941Smrg   if (devinfo->gen == 7 && !devinfo->is_haswell &&
1364b8e80941Smrg       element_size == 8)
1365b8e80941Smrg      element_size = 4;
1366b8e80941Smrg
1367b8e80941Smrg   align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1368b8e80941Smrg                      exec_size == 1 ? 0 : exec_size * stride,
1369b8e80941Smrg                      exec_size == 1 ? 1 : exec_size,
1370b8e80941Smrg                      exec_size == 1 ? 0 : stride);
1371b8e80941Smrg
1372b8e80941Smrg   unsigned dst_regs = registers_read(dst_access_mask);
1373b8e80941Smrg   unsigned src0_regs = registers_read(src0_access_mask);
1374b8e80941Smrg   unsigned src1_regs = registers_read(src1_access_mask);
1375b8e80941Smrg
1376b8e80941Smrg   /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
1377b8e80941Smrg    *
1378b8e80941Smrg    *    When an instruction has a source region spanning two registers and a
1379b8e80941Smrg    *    destination region contained in one register, the number of elements
1380b8e80941Smrg    *    must be the same between two sources and one of the following must be
1381b8e80941Smrg    *    true:
1382b8e80941Smrg    *
1383b8e80941Smrg    *       1. The destination region is entirely contained in the lower OWord
1384b8e80941Smrg    *          of a register.
1385b8e80941Smrg    *       2. The destination region is entirely contained in the upper OWord
1386b8e80941Smrg    *          of a register.
1387b8e80941Smrg    *       3. The destination elements are evenly split between the two OWords
1388b8e80941Smrg    *          of a register.
1389b8e80941Smrg    */
1390b8e80941Smrg   if (devinfo->gen <= 8) {
1391b8e80941Smrg      if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
1392b8e80941Smrg         unsigned upper_oword_writes = 0, lower_oword_writes = 0;
1393b8e80941Smrg
1394b8e80941Smrg         for (unsigned i = 0; i < exec_size; i++) {
1395b8e80941Smrg            if (dst_access_mask[i] > 0x0000FFFF) {
1396b8e80941Smrg               upper_oword_writes++;
1397b8e80941Smrg            } else {
1398b8e80941Smrg               assert(dst_access_mask[i] != 0);
1399b8e80941Smrg               lower_oword_writes++;
1400b8e80941Smrg            }
1401b8e80941Smrg         }
1402b8e80941Smrg
1403b8e80941Smrg         ERROR_IF(lower_oword_writes != 0 &&
1404b8e80941Smrg                  upper_oword_writes != 0 &&
1405b8e80941Smrg                  upper_oword_writes != lower_oword_writes,
1406b8e80941Smrg                  "Writes must be to only one OWord or "
1407b8e80941Smrg                  "evenly split between OWords");
1408b8e80941Smrg      }
1409b8e80941Smrg   }
1410b8e80941Smrg
1411b8e80941Smrg   /* The IVB and HSW PRMs say:
1412b8e80941Smrg    *
1413b8e80941Smrg    *    When an instruction has a source region that spans two registers and
1414b8e80941Smrg    *    the destination spans two registers, the destination elements must be
1415b8e80941Smrg    *    evenly split between the two registers [...]
1416b8e80941Smrg    *
1417b8e80941Smrg    * The SNB PRM contains similar wording (but written in a much more
1418b8e80941Smrg    * confusing manner).
1419b8e80941Smrg    *
1420b8e80941Smrg    * The BDW PRM says:
1421b8e80941Smrg    *
1422b8e80941Smrg    *    When destination spans two registers, the source may be one or two
1423b8e80941Smrg    *    registers. The destination elements must be evenly split between the
1424b8e80941Smrg    *    two registers.
1425b8e80941Smrg    *
1426b8e80941Smrg    * The SKL PRM says:
1427b8e80941Smrg    *
1428b8e80941Smrg    *    When destination of MATH instruction spans two registers, the
1429b8e80941Smrg    *    destination elements must be evenly split between the two registers.
1430b8e80941Smrg    *
1431b8e80941Smrg    * It is not known whether this restriction applies to KBL other Gens after
1432b8e80941Smrg    * SKL.
1433b8e80941Smrg    */
1434b8e80941Smrg   if (devinfo->gen <= 8 ||
1435b8e80941Smrg       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
1436b8e80941Smrg
1437b8e80941Smrg      /* Nothing explicitly states that on Gen < 8 elements must be evenly
1438b8e80941Smrg       * split between two destination registers in the two exceptional
1439b8e80941Smrg       * source-region-spans-one-register cases, but since Broadwell requires
1440b8e80941Smrg       * evenly split writes regardless of source region, we assume that it was
1441b8e80941Smrg       * an oversight and require it.
1442b8e80941Smrg       */
1443b8e80941Smrg      if (dst_regs == 2) {
1444b8e80941Smrg         unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1445b8e80941Smrg
1446b8e80941Smrg         for (unsigned i = 0; i < exec_size; i++) {
1447b8e80941Smrg            if (dst_access_mask[i] > 0xFFFFFFFF) {
1448b8e80941Smrg               upper_reg_writes++;
1449b8e80941Smrg            } else {
1450b8e80941Smrg               assert(dst_access_mask[i] != 0);
1451b8e80941Smrg               lower_reg_writes++;
1452b8e80941Smrg            }
1453b8e80941Smrg         }
1454b8e80941Smrg
1455b8e80941Smrg         ERROR_IF(upper_reg_writes != lower_reg_writes,
1456b8e80941Smrg                  "Writes must be evenly split between the two "
1457b8e80941Smrg                  "destination registers");
1458b8e80941Smrg      }
1459b8e80941Smrg   }
1460b8e80941Smrg
1461b8e80941Smrg   /* The IVB and HSW PRMs say:
1462b8e80941Smrg    *
1463b8e80941Smrg    *    When an instruction has a source region that spans two registers and
1464b8e80941Smrg    *    the destination spans two registers, the destination elements must be
1465b8e80941Smrg    *    evenly split between the two registers and each destination register
1466b8e80941Smrg    *    must be entirely derived from one source register.
1467b8e80941Smrg    *
1468b8e80941Smrg    *    Note: In such cases, the regioning parameters must ensure that the
1469b8e80941Smrg    *    offset from the two source registers is the same.
1470b8e80941Smrg    *
1471b8e80941Smrg    * The SNB PRM contains similar wording (but written in a much more
1472b8e80941Smrg    * confusing manner).
1473b8e80941Smrg    *
1474b8e80941Smrg    * There are effectively three rules stated here:
1475b8e80941Smrg    *
1476b8e80941Smrg    *    For an instruction with a source and a destination spanning two
1477b8e80941Smrg    *    registers,
1478b8e80941Smrg    *
1479b8e80941Smrg    *       (1) destination elements must be evenly split between the two
1480b8e80941Smrg    *           registers
1481b8e80941Smrg    *       (2) all destination elements in a register must be derived
1482b8e80941Smrg    *           from one source register
1483b8e80941Smrg    *       (3) the offset (i.e. the starting location in each of the two
1484b8e80941Smrg    *           registers spanned by a region) must be the same in the two
1485b8e80941Smrg    *           registers spanned by a region
1486b8e80941Smrg    *
1487b8e80941Smrg    * It is impossible to violate rule (1) without violating (2) or (3), so we
1488b8e80941Smrg    * do not attempt to validate it.
1489b8e80941Smrg    */
1490b8e80941Smrg   if (devinfo->gen <= 7 && dst_regs == 2) {
1491b8e80941Smrg      for (unsigned i = 0; i < num_sources; i++) {
1492b8e80941Smrg#define DO_SRC(n)                                                             \
1493b8e80941Smrg         if (src ## n ## _regs <= 1)                                          \
1494b8e80941Smrg            continue;                                                         \
1495b8e80941Smrg                                                                              \
1496b8e80941Smrg         for (unsigned i = 0; i < exec_size; i++) {                           \
1497b8e80941Smrg            if ((dst_access_mask[i] > 0xFFFFFFFF) !=                          \
1498b8e80941Smrg                (src ## n ## _access_mask[i] > 0xFFFFFFFF)) {                 \
1499b8e80941Smrg               ERROR("Each destination register must be entirely derived "    \
1500b8e80941Smrg                     "from one source register");                             \
1501b8e80941Smrg               break;                                                         \
1502b8e80941Smrg            }                                                                 \
1503b8e80941Smrg         }                                                                    \
1504b8e80941Smrg                                                                              \
1505b8e80941Smrg         unsigned offset_0 =                                                  \
1506b8e80941Smrg            brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);               \
1507b8e80941Smrg         unsigned offset_1 = offset_0;                                        \
1508b8e80941Smrg                                                                              \
1509b8e80941Smrg         for (unsigned i = 0; i < exec_size; i++) {                           \
1510b8e80941Smrg            if (src ## n ## _access_mask[i] > 0xFFFFFFFF) {                   \
1511b8e80941Smrg               offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32;  \
1512b8e80941Smrg               break;                                                         \
1513b8e80941Smrg            }                                                                 \
1514b8e80941Smrg         }                                                                    \
1515b8e80941Smrg                                                                              \
1516b8e80941Smrg         ERROR_IF(num_sources == 2 && offset_0 != offset_1,                   \
1517b8e80941Smrg                  "The offset from the two source registers "                 \
1518b8e80941Smrg                  "must be the same")
1519b8e80941Smrg
1520b8e80941Smrg         if (i == 0) {
1521b8e80941Smrg            DO_SRC(0);
1522b8e80941Smrg         } else {
1523b8e80941Smrg            DO_SRC(1);
1524b8e80941Smrg         }
1525b8e80941Smrg#undef DO_SRC
1526b8e80941Smrg      }
1527b8e80941Smrg   }
1528b8e80941Smrg
1529b8e80941Smrg   /* The IVB and HSW PRMs say:
1530b8e80941Smrg    *
1531b8e80941Smrg    *    When destination spans two registers, the source MUST span two
1532b8e80941Smrg    *    registers. The exception to the above rule:
1533b8e80941Smrg    *        1. When source is scalar, the source registers are not
1534b8e80941Smrg    *           incremented.
1535b8e80941Smrg    *        2. When source is packed integer Word and destination is packed
1536b8e80941Smrg    *           integer DWord, the source register is not incremented by the
1537b8e80941Smrg    *           source sub register is incremented.
1538b8e80941Smrg    *
1539b8e80941Smrg    * The SNB PRM does not contain this rule, but the internal documentation
1540b8e80941Smrg    * indicates that it applies to SNB as well. We assume that the rule applies
1541b8e80941Smrg    * to Gen <= 5 although their PRMs do not state it.
1542b8e80941Smrg    *
1543b8e80941Smrg    * While the documentation explicitly says in exception (2) that the
1544b8e80941Smrg    * destination must be an integer DWord, the hardware allows at least a
1545b8e80941Smrg    * float destination type as well. We emit such instructions from
1546b8e80941Smrg    *
1547b8e80941Smrg    *    fs_visitor::emit_interpolation_setup_gen6
1548b8e80941Smrg    *    fs_visitor::emit_fragcoord_interpolation
1549b8e80941Smrg    *
1550b8e80941Smrg    * and have for years with no ill effects.
1551b8e80941Smrg    *
1552b8e80941Smrg    * Additionally the simulator source code indicates that the real condition
1553b8e80941Smrg    * is that the size of the destination type is 4 bytes.
1554b8e80941Smrg    */
1555b8e80941Smrg   if (devinfo->gen <= 7 && dst_regs == 2) {
1556b8e80941Smrg      enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1557b8e80941Smrg      bool dst_is_packed_dword =
1558b8e80941Smrg         is_packed(exec_size * stride, exec_size, stride) &&
1559b8e80941Smrg         brw_reg_type_to_size(dst_type) == 4;
1560b8e80941Smrg
1561b8e80941Smrg      for (unsigned i = 0; i < num_sources; i++) {
1562b8e80941Smrg#define DO_SRC(n)                                                                  \
1563b8e80941Smrg         unsigned vstride, width, hstride;                                         \
1564b8e80941Smrg         vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));           \
1565b8e80941Smrg         width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));                \
1566b8e80941Smrg         hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));           \
1567b8e80941Smrg         bool src ## n ## _is_packed_word =                                        \
1568b8e80941Smrg            is_packed(vstride, width, hstride) &&                                  \
1569b8e80941Smrg            (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W ||   \
1570b8e80941Smrg             brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW);   \
1571b8e80941Smrg                                                                                   \
1572b8e80941Smrg         ERROR_IF(src ## n ## _regs == 1 &&                                        \
1573b8e80941Smrg                  !src ## n ## _has_scalar_region(devinfo, inst) &&                \
1574b8e80941Smrg                  !(dst_is_packed_dword && src ## n ## _is_packed_word),           \
1575b8e80941Smrg                  "When the destination spans two registers, the source must "     \
1576b8e80941Smrg                  "span two registers\n" ERROR_INDENT "(exceptions for scalar "    \
1577b8e80941Smrg                  "source and packed-word to packed-dword expansion)")
1578b8e80941Smrg
1579b8e80941Smrg         if (i == 0) {
1580b8e80941Smrg            DO_SRC(0);
1581b8e80941Smrg         } else {
1582b8e80941Smrg            DO_SRC(1);
1583b8e80941Smrg         }
1584b8e80941Smrg#undef DO_SRC
1585b8e80941Smrg      }
1586b8e80941Smrg   }
1587b8e80941Smrg
1588b8e80941Smrg   return error_msg;
1589b8e80941Smrg}
1590b8e80941Smrg
1591b8e80941Smrgstatic struct string
1592b8e80941Smrgvector_immediate_restrictions(const struct gen_device_info *devinfo,
1593b8e80941Smrg                              const brw_inst *inst)
1594b8e80941Smrg{
1595b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
1596b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
1597b8e80941Smrg
1598b8e80941Smrg   if (num_sources == 3 || num_sources == 0)
1599b8e80941Smrg      return (struct string){};
1600b8e80941Smrg
1601b8e80941Smrg   unsigned file = num_sources == 1 ?
1602b8e80941Smrg                   brw_inst_src0_reg_file(devinfo, inst) :
1603b8e80941Smrg                   brw_inst_src1_reg_file(devinfo, inst);
1604b8e80941Smrg   if (file != BRW_IMMEDIATE_VALUE)
1605b8e80941Smrg      return (struct string){};
1606b8e80941Smrg
1607b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1608b8e80941Smrg   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1609b8e80941Smrg   unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1610b8e80941Smrg                         brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1611b8e80941Smrg   unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1612b8e80941Smrg   enum brw_reg_type type = num_sources == 1 ?
1613b8e80941Smrg                            brw_inst_src0_type(devinfo, inst) :
1614b8e80941Smrg                            brw_inst_src1_type(devinfo, inst);
1615b8e80941Smrg
1616b8e80941Smrg   /* The PRMs say:
1617b8e80941Smrg    *
1618b8e80941Smrg    *    When an immediate vector is used in an instruction, the destination
1619b8e80941Smrg    *    must be 128-bit aligned with destination horizontal stride equivalent
1620b8e80941Smrg    *    to a word for an immediate integer vector (v) and equivalent to a
1621b8e80941Smrg    *    DWord for an immediate float vector (vf).
1622b8e80941Smrg    *
1623b8e80941Smrg    * The text has not been updated for the addition of the immediate unsigned
1624b8e80941Smrg    * integer vector type (uv) on SNB, but presumably the same restriction
1625b8e80941Smrg    * applies.
1626b8e80941Smrg    */
1627b8e80941Smrg   switch (type) {
1628b8e80941Smrg   case BRW_REGISTER_TYPE_V:
1629b8e80941Smrg   case BRW_REGISTER_TYPE_UV:
1630b8e80941Smrg   case BRW_REGISTER_TYPE_VF:
1631b8e80941Smrg      ERROR_IF(dst_subreg % (128 / 8) != 0,
1632b8e80941Smrg               "Destination must be 128-bit aligned in order to use immediate "
1633b8e80941Smrg               "vector types");
1634b8e80941Smrg
1635b8e80941Smrg      if (type == BRW_REGISTER_TYPE_VF) {
1636b8e80941Smrg         ERROR_IF(dst_type_size * dst_stride != 4,
1637b8e80941Smrg                  "Destination must have stride equivalent to dword in order "
1638b8e80941Smrg                  "to use the VF type");
1639b8e80941Smrg      } else {
1640b8e80941Smrg         ERROR_IF(dst_type_size * dst_stride != 2,
1641b8e80941Smrg                  "Destination must have stride equivalent to word in order "
1642b8e80941Smrg                  "to use the V or UV type");
1643b8e80941Smrg      }
1644b8e80941Smrg      break;
1645b8e80941Smrg   default:
1646b8e80941Smrg      break;
1647b8e80941Smrg   }
1648b8e80941Smrg
1649b8e80941Smrg   return error_msg;
1650b8e80941Smrg}
1651b8e80941Smrg
1652b8e80941Smrgstatic struct string
1653b8e80941Smrgspecial_requirements_for_handling_double_precision_data_types(
1654b8e80941Smrg                                       const struct gen_device_info *devinfo,
1655b8e80941Smrg                                       const brw_inst *inst)
1656b8e80941Smrg{
1657b8e80941Smrg   unsigned num_sources = num_sources_from_inst(devinfo, inst);
1658b8e80941Smrg   struct string error_msg = { .str = NULL, .len = 0 };
1659b8e80941Smrg
1660b8e80941Smrg   if (num_sources == 3 || num_sources == 0)
1661b8e80941Smrg      return (struct string){};
1662b8e80941Smrg
1663b8e80941Smrg   /* Split sends don't have types so there's no doubles there. */
1664b8e80941Smrg   if (inst_is_split_send(devinfo, inst))
1665b8e80941Smrg      return (struct string){};
1666b8e80941Smrg
1667b8e80941Smrg   enum brw_reg_type exec_type = execution_type(devinfo, inst);
1668b8e80941Smrg   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
1669b8e80941Smrg
1670b8e80941Smrg   enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1671b8e80941Smrg   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1672b8e80941Smrg   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
1673b8e80941Smrg   unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1674b8e80941Smrg   unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1675b8e80941Smrg   unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1676b8e80941Smrg   unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1677b8e80941Smrg
1678b8e80941Smrg   bool is_integer_dword_multiply =
1679b8e80941Smrg      devinfo->gen >= 8 &&
1680b8e80941Smrg      brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&
1681b8e80941Smrg      (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1682b8e80941Smrg       brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
1683b8e80941Smrg      (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
1684b8e80941Smrg       brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
1685b8e80941Smrg
1686b8e80941Smrg   if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply)
1687b8e80941Smrg      return (struct string){};
1688b8e80941Smrg
1689b8e80941Smrg   for (unsigned i = 0; i < num_sources; i++) {
1690b8e80941Smrg      unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1691b8e80941Smrg      bool is_scalar_region;
1692b8e80941Smrg      enum brw_reg_file file;
1693b8e80941Smrg      enum brw_reg_type type;
1694b8e80941Smrg
1695b8e80941Smrg#define DO_SRC(n)                                                              \
1696b8e80941Smrg      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1697b8e80941Smrg          BRW_IMMEDIATE_VALUE)                                                 \
1698b8e80941Smrg         continue;                                                             \
1699b8e80941Smrg                                                                               \
1700b8e80941Smrg      is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst);        \
1701b8e80941Smrg      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1702b8e80941Smrg      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1703b8e80941Smrg      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1704b8e80941Smrg      file = brw_inst_src ## n ## _reg_file(devinfo, inst);                    \
1705b8e80941Smrg      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1706b8e80941Smrg      type_size = brw_reg_type_to_size(type);                                  \
1707b8e80941Smrg      reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst);                    \
1708b8e80941Smrg      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1709b8e80941Smrg      address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1710b8e80941Smrg
1711b8e80941Smrg      if (i == 0) {
1712b8e80941Smrg         DO_SRC(0);
1713b8e80941Smrg      } else {
1714b8e80941Smrg         DO_SRC(1);
1715b8e80941Smrg      }
1716b8e80941Smrg#undef DO_SRC
1717b8e80941Smrg
1718b8e80941Smrg      /* The PRMs say that for CHV, BXT:
1719b8e80941Smrg       *
1720b8e80941Smrg       *    When source or destination datatype is 64b or operation is integer
1721b8e80941Smrg       *    DWord multiply, regioning in Align1 must follow these rules:
1722b8e80941Smrg       *
1723b8e80941Smrg       *    1. Source and Destination horizontal stride must be aligned to the
1724b8e80941Smrg       *       same qword.
1725b8e80941Smrg       *    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1726b8e80941Smrg       *    3. Source and Destination offset must be the same, except the case
1727b8e80941Smrg       *       of scalar source.
1728b8e80941Smrg       *
1729b8e80941Smrg       * We assume that the restriction applies to GLK as well.
1730b8e80941Smrg       */
1731b8e80941Smrg      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1732b8e80941Smrg          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
1733b8e80941Smrg         unsigned src_stride = hstride * type_size;
1734b8e80941Smrg         unsigned dst_stride = dst_hstride * dst_type_size;
1735b8e80941Smrg
1736b8e80941Smrg         ERROR_IF(!is_scalar_region &&
1737b8e80941Smrg                  (src_stride % 8 != 0 ||
1738b8e80941Smrg                   dst_stride % 8 != 0 ||
1739b8e80941Smrg                   src_stride != dst_stride),
1740b8e80941Smrg                  "Source and destination horizontal stride must equal and a "
1741b8e80941Smrg                  "multiple of a qword when the execution type is 64-bit");
1742b8e80941Smrg
1743b8e80941Smrg         ERROR_IF(vstride != width * hstride,
1744b8e80941Smrg                  "Vstride must be Width * Hstride when the execution type is "
1745b8e80941Smrg                  "64-bit");
1746b8e80941Smrg
1747b8e80941Smrg         ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1748b8e80941Smrg                  "Source and destination offset must be the same when the "
1749b8e80941Smrg                  "execution type is 64-bit");
1750b8e80941Smrg      }
1751b8e80941Smrg
1752b8e80941Smrg      /* The PRMs say that for CHV, BXT:
1753b8e80941Smrg       *
1754b8e80941Smrg       *    When source or destination datatype is 64b or operation is integer
1755b8e80941Smrg       *    DWord multiply, indirect addressing must not be used.
1756b8e80941Smrg       *
1757b8e80941Smrg       * We assume that the restriction applies to GLK as well.
1758b8e80941Smrg       */
1759b8e80941Smrg      if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1760b8e80941Smrg         ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1761b8e80941Smrg                  BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1762b8e80941Smrg                  "Indirect addressing is not allowed when the execution type "
1763b8e80941Smrg                  "is 64-bit");
1764b8e80941Smrg      }
1765b8e80941Smrg
1766b8e80941Smrg      /* The PRMs say that for CHV, BXT:
1767b8e80941Smrg       *
1768b8e80941Smrg       *    ARF registers must never be used with 64b datatype or when
1769b8e80941Smrg       *    operation is integer DWord multiply.
1770b8e80941Smrg       *
1771b8e80941Smrg       * We assume that the restriction applies to GLK as well.
1772b8e80941Smrg       *
1773b8e80941Smrg       * We assume that the restriction does not apply to the null register.
1774b8e80941Smrg       */
1775b8e80941Smrg      if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1776b8e80941Smrg         ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||
1777b8e80941Smrg                  brw_inst_acc_wr_control(devinfo, inst) ||
1778b8e80941Smrg                  (BRW_ARCHITECTURE_REGISTER_FILE == file &&
1779b8e80941Smrg                   reg != BRW_ARF_NULL) ||
1780b8e80941Smrg                  (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
1781b8e80941Smrg                   dst_reg != BRW_ARF_NULL),
1782b8e80941Smrg                  "Architecture registers cannot be used when the execution "
1783b8e80941Smrg                  "type is 64-bit");
1784b8e80941Smrg      }
1785b8e80941Smrg   }
1786b8e80941Smrg
1787b8e80941Smrg   /* The PRMs say that for BDW, SKL:
1788b8e80941Smrg    *
1789b8e80941Smrg    *    If Align16 is required for an operation with QW destination and non-QW
1790b8e80941Smrg    *    source datatypes, the execution size cannot exceed 2.
1791b8e80941Smrg    *
1792b8e80941Smrg    * We assume that the restriction applies to all Gen8+ parts.
1793b8e80941Smrg    */
1794b8e80941Smrg   if (devinfo->gen >= 8) {
1795b8e80941Smrg      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1796b8e80941Smrg      enum brw_reg_type src1_type =
1797b8e80941Smrg         num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
1798b8e80941Smrg      unsigned src0_type_size = brw_reg_type_to_size(src0_type);
1799b8e80941Smrg      unsigned src1_type_size = brw_reg_type_to_size(src1_type);
1800b8e80941Smrg
1801b8e80941Smrg      ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
1802b8e80941Smrg               dst_type_size == 8 &&
1803b8e80941Smrg               (src0_type_size != 8 || src1_type_size != 8) &&
1804b8e80941Smrg               brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
1805b8e80941Smrg               "In Align16 exec size cannot exceed 2 with a QWord destination "
1806b8e80941Smrg               "and a non-QWord source");
1807b8e80941Smrg   }
1808b8e80941Smrg
1809b8e80941Smrg   /* The PRMs say that for CHV, BXT:
1810b8e80941Smrg    *
1811b8e80941Smrg    *    When source or destination datatype is 64b or operation is integer
1812b8e80941Smrg    *    DWord multiply, DepCtrl must not be used.
1813b8e80941Smrg    *
1814b8e80941Smrg    * We assume that the restriction applies to GLK as well.
1815b8e80941Smrg    */
1816b8e80941Smrg   if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
1817b8e80941Smrg      ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
1818b8e80941Smrg               brw_inst_no_dd_clear(devinfo, inst),
1819b8e80941Smrg               "DepCtrl is not allowed when the execution type is 64-bit");
1820b8e80941Smrg   }
1821b8e80941Smrg
1822b8e80941Smrg   return error_msg;
1823b8e80941Smrg}
1824b8e80941Smrg
1825b8e80941Smrgbool
1826b8e80941Smrgbrw_validate_instructions(const struct gen_device_info *devinfo,
1827b8e80941Smrg                          const void *assembly, int start_offset, int end_offset,
1828b8e80941Smrg                          struct disasm_info *disasm)
1829b8e80941Smrg{
1830b8e80941Smrg   bool valid = true;
1831b8e80941Smrg
1832b8e80941Smrg   for (int src_offset = start_offset; src_offset < end_offset;) {
1833b8e80941Smrg      struct string error_msg = { .str = NULL, .len = 0 };
1834b8e80941Smrg      const brw_inst *inst = assembly + src_offset;
1835b8e80941Smrg      bool is_compact = brw_inst_cmpt_control(devinfo, inst);
1836b8e80941Smrg      brw_inst uncompacted;
1837b8e80941Smrg
1838b8e80941Smrg      if (is_compact) {
1839b8e80941Smrg         brw_compact_inst *compacted = (void *)inst;
1840b8e80941Smrg         brw_uncompact_instruction(devinfo, &uncompacted, compacted);
1841b8e80941Smrg         inst = &uncompacted;
1842b8e80941Smrg      }
1843b8e80941Smrg
1844b8e80941Smrg      if (is_unsupported_inst(devinfo, inst)) {
1845b8e80941Smrg         ERROR("Instruction not supported on this Gen");
1846b8e80941Smrg      } else {
1847b8e80941Smrg         CHECK(sources_not_null);
1848b8e80941Smrg         CHECK(send_restrictions);
1849b8e80941Smrg         CHECK(alignment_supported);
1850b8e80941Smrg         CHECK(general_restrictions_based_on_operand_types);
1851b8e80941Smrg         CHECK(general_restrictions_on_region_parameters);
1852b8e80941Smrg         CHECK(special_restrictions_for_mixed_float_mode);
1853b8e80941Smrg         CHECK(region_alignment_rules);
1854b8e80941Smrg         CHECK(vector_immediate_restrictions);
1855b8e80941Smrg         CHECK(special_requirements_for_handling_double_precision_data_types);
1856b8e80941Smrg      }
1857b8e80941Smrg
1858b8e80941Smrg      if (error_msg.str && disasm) {
1859b8e80941Smrg         disasm_insert_error(disasm, src_offset, error_msg.str);
1860b8e80941Smrg      }
1861b8e80941Smrg      valid = valid && error_msg.len == 0;
1862b8e80941Smrg      free(error_msg.str);
1863b8e80941Smrg
1864b8e80941Smrg      if (is_compact) {
1865b8e80941Smrg         src_offset += sizeof(brw_compact_inst);
1866b8e80941Smrg      } else {
1867b8e80941Smrg         src_offset += sizeof(brw_inst);
1868b8e80941Smrg      }
1869b8e80941Smrg   }
1870b8e80941Smrg
1871b8e80941Smrg   return valid;
1872b8e80941Smrg}
1873