1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015-2019 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** @file brw_eu_validate.c 25b8e80941Smrg * 26b8e80941Smrg * This file implements a pass that validates shader assembly. 27b8e80941Smrg * 28b8e80941Smrg * The restrictions implemented herein are intended to verify that instructions 29b8e80941Smrg * in shader assembly do not violate restrictions documented in the graphics 30b8e80941Smrg * programming reference manuals. 31b8e80941Smrg * 32b8e80941Smrg * The restrictions are difficult for humans to quickly verify due to their 33b8e80941Smrg * complexity and abundance. 34b8e80941Smrg * 35b8e80941Smrg * It is critical that this code is thoroughly unit tested because false 36b8e80941Smrg * results will lead developers astray, which is worse than having no validator 37b8e80941Smrg * at all. Functional changes to this file without corresponding unit tests (in 38b8e80941Smrg * test_eu_validate.cpp) will be rejected. 39b8e80941Smrg */ 40b8e80941Smrg 41b8e80941Smrg#include "brw_eu.h" 42b8e80941Smrg 43b8e80941Smrg/* We're going to do lots of string concatenation, so this should help. */ 44b8e80941Smrgstruct string { 45b8e80941Smrg char *str; 46b8e80941Smrg size_t len; 47b8e80941Smrg}; 48b8e80941Smrg 49b8e80941Smrgstatic void 50b8e80941Smrgcat(struct string *dest, const struct string src) 51b8e80941Smrg{ 52b8e80941Smrg dest->str = realloc(dest->str, dest->len + src.len + 1); 53b8e80941Smrg memcpy(dest->str + dest->len, src.str, src.len); 54b8e80941Smrg dest->str[dest->len + src.len] = '\0'; 55b8e80941Smrg dest->len = dest->len + src.len; 56b8e80941Smrg} 57b8e80941Smrg#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) 58b8e80941Smrg 59b8e80941Smrgstatic bool 60b8e80941Smrgcontains(const struct string haystack, const struct string needle) 61b8e80941Smrg{ 62b8e80941Smrg return haystack.str && memmem(haystack.str, haystack.len, 63b8e80941Smrg needle.str, needle.len) != NULL; 64b8e80941Smrg} 65b8e80941Smrg#define CONTAINS(haystack, needle) \ 66b8e80941Smrg contains(haystack, (struct string){needle, strlen(needle)}) 67b8e80941Smrg 68b8e80941Smrg#define error(str) "\tERROR: " str "\n" 69b8e80941Smrg#define ERROR_INDENT "\t " 70b8e80941Smrg 71b8e80941Smrg#define ERROR(msg) ERROR_IF(true, msg) 72b8e80941Smrg#define ERROR_IF(cond, msg) \ 73b8e80941Smrg do { \ 74b8e80941Smrg if ((cond) && !CONTAINS(error_msg, error(msg))) { \ 75b8e80941Smrg CAT(error_msg, error(msg)); \ 76b8e80941Smrg } \ 77b8e80941Smrg } while(0) 78b8e80941Smrg 79b8e80941Smrg#define CHECK(func, args...) \ 80b8e80941Smrg do { \ 81b8e80941Smrg struct string __msg = func(devinfo, inst, ##args); \ 82b8e80941Smrg if (__msg.str) { \ 83b8e80941Smrg cat(&error_msg, __msg); \ 84b8e80941Smrg free(__msg.str); \ 85b8e80941Smrg } \ 86b8e80941Smrg } while (0) 87b8e80941Smrg 88b8e80941Smrg#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) 89b8e80941Smrg#define WIDTH(width) (1 << (width)) 90b8e80941Smrg 91b8e80941Smrgstatic bool 92b8e80941Smrginst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst) 93b8e80941Smrg{ 94b8e80941Smrg switch (brw_inst_opcode(devinfo, inst)) { 95b8e80941Smrg case BRW_OPCODE_SEND: 96b8e80941Smrg case BRW_OPCODE_SENDC: 97b8e80941Smrg case BRW_OPCODE_SENDS: 98b8e80941Smrg case BRW_OPCODE_SENDSC: 99b8e80941Smrg return true; 100b8e80941Smrg default: 101b8e80941Smrg return false; 102b8e80941Smrg } 103b8e80941Smrg} 104b8e80941Smrg 105b8e80941Smrgstatic bool 106b8e80941Smrginst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst) 107b8e80941Smrg{ 108b8e80941Smrg switch (brw_inst_opcode(devinfo, inst)) { 109b8e80941Smrg case BRW_OPCODE_SENDS: 110b8e80941Smrg case BRW_OPCODE_SENDSC: 111b8e80941Smrg return true; 112b8e80941Smrg default: 113b8e80941Smrg return false; 114b8e80941Smrg } 115b8e80941Smrg} 116b8e80941Smrg 117b8e80941Smrgstatic unsigned 118b8e80941Smrgsigned_type(unsigned type) 119b8e80941Smrg{ 120b8e80941Smrg switch (type) { 121b8e80941Smrg case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D; 122b8e80941Smrg case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W; 123b8e80941Smrg case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B; 124b8e80941Smrg case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q; 125b8e80941Smrg default: return type; 126b8e80941Smrg } 127b8e80941Smrg} 128b8e80941Smrg 129b8e80941Smrgstatic bool 130b8e80941Smrginst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst) 131b8e80941Smrg{ 132b8e80941Smrg unsigned dst_type = signed_type(brw_inst_dst_type(devinfo, inst)); 133b8e80941Smrg unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); 134b8e80941Smrg 135b8e80941Smrg if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { 136b8e80941Smrg /* FIXME: not strictly true */ 137b8e80941Smrg if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF || 138b8e80941Smrg brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV || 139b8e80941Smrg brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) { 140b8e80941Smrg return false; 141b8e80941Smrg } 142b8e80941Smrg } else if (brw_inst_src0_negate(devinfo, inst) || 143b8e80941Smrg brw_inst_src0_abs(devinfo, inst)) { 144b8e80941Smrg return false; 145b8e80941Smrg } 146b8e80941Smrg 147b8e80941Smrg return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV && 148b8e80941Smrg brw_inst_saturate(devinfo, inst) == 0 && 149b8e80941Smrg dst_type == src_type; 150b8e80941Smrg} 151b8e80941Smrg 152b8e80941Smrgstatic bool 153b8e80941Smrgdst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 154b8e80941Smrg{ 155b8e80941Smrg return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 156b8e80941Smrg brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 157b8e80941Smrg} 158b8e80941Smrg 159b8e80941Smrgstatic bool 160b8e80941Smrgsrc0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 161b8e80941Smrg{ 162b8e80941Smrg return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 163b8e80941Smrg brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 164b8e80941Smrg} 165b8e80941Smrg 166b8e80941Smrgstatic bool 167b8e80941Smrgsrc1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 168b8e80941Smrg{ 169b8e80941Smrg return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 170b8e80941Smrg brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 171b8e80941Smrg} 172b8e80941Smrg 173b8e80941Smrgstatic bool 174b8e80941Smrgsrc0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) 175b8e80941Smrg{ 176b8e80941Smrg return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 177b8e80941Smrg (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 178b8e80941Smrg} 179b8e80941Smrg 180b8e80941Smrgstatic bool 181b8e80941Smrgsrc1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) 182b8e80941Smrg{ 183b8e80941Smrg return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 184b8e80941Smrg (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 185b8e80941Smrg} 186b8e80941Smrg 187b8e80941Smrgstatic bool 188b8e80941Smrgsrc0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) 189b8e80941Smrg{ 190b8e80941Smrg return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE; 191b8e80941Smrg} 192b8e80941Smrg 193b8e80941Smrgstatic bool 194b8e80941Smrgsrc0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) 195b8e80941Smrg{ 196b8e80941Smrg return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 197b8e80941Smrg brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && 198b8e80941Smrg brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 199b8e80941Smrg} 200b8e80941Smrg 201b8e80941Smrgstatic bool 202b8e80941Smrgsrc1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) 203b8e80941Smrg{ 204b8e80941Smrg return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 205b8e80941Smrg brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && 206b8e80941Smrg brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 207b8e80941Smrg} 208b8e80941Smrg 209b8e80941Smrgstatic unsigned 210b8e80941Smrgnum_sources_from_inst(const struct gen_device_info *devinfo, 211b8e80941Smrg const brw_inst *inst) 212b8e80941Smrg{ 213b8e80941Smrg const struct opcode_desc *desc = 214b8e80941Smrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 215b8e80941Smrg unsigned math_function; 216b8e80941Smrg 217b8e80941Smrg if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 218b8e80941Smrg math_function = brw_inst_math_function(devinfo, inst); 219b8e80941Smrg } else if (devinfo->gen < 6 && 220b8e80941Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { 221b8e80941Smrg if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { 222b8e80941Smrg /* src1 must be a descriptor (including the information to determine 223b8e80941Smrg * that the SEND is doing an extended math operation), but src0 can 224b8e80941Smrg * actually be null since it serves as the source of the implicit GRF 225b8e80941Smrg * to MRF move. 226b8e80941Smrg * 227b8e80941Smrg * If we stop using that functionality, we'll have to revisit this. 228b8e80941Smrg */ 229b8e80941Smrg return 2; 230b8e80941Smrg } else { 231b8e80941Smrg /* Send instructions are allowed to have null sources since they use 232b8e80941Smrg * the base_mrf field to specify which message register source. 233b8e80941Smrg */ 234b8e80941Smrg return 0; 235b8e80941Smrg } 236b8e80941Smrg } else { 237b8e80941Smrg assert(desc->nsrc < 4); 238b8e80941Smrg return desc->nsrc; 239b8e80941Smrg } 240b8e80941Smrg 241b8e80941Smrg switch (math_function) { 242b8e80941Smrg case BRW_MATH_FUNCTION_INV: 243b8e80941Smrg case BRW_MATH_FUNCTION_LOG: 244b8e80941Smrg case BRW_MATH_FUNCTION_EXP: 245b8e80941Smrg case BRW_MATH_FUNCTION_SQRT: 246b8e80941Smrg case BRW_MATH_FUNCTION_RSQ: 247b8e80941Smrg case BRW_MATH_FUNCTION_SIN: 248b8e80941Smrg case BRW_MATH_FUNCTION_COS: 249b8e80941Smrg case BRW_MATH_FUNCTION_SINCOS: 250b8e80941Smrg case GEN8_MATH_FUNCTION_INVM: 251b8e80941Smrg case GEN8_MATH_FUNCTION_RSQRTM: 252b8e80941Smrg return 1; 253b8e80941Smrg case BRW_MATH_FUNCTION_FDIV: 254b8e80941Smrg case BRW_MATH_FUNCTION_POW: 255b8e80941Smrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 256b8e80941Smrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 257b8e80941Smrg case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 258b8e80941Smrg return 2; 259b8e80941Smrg default: 260b8e80941Smrg unreachable("not reached"); 261b8e80941Smrg } 262b8e80941Smrg} 263b8e80941Smrg 264b8e80941Smrgstatic struct string 265b8e80941Smrgsources_not_null(const struct gen_device_info *devinfo, 266b8e80941Smrg const brw_inst *inst) 267b8e80941Smrg{ 268b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 269b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 270b8e80941Smrg 271b8e80941Smrg /* Nothing to test. 3-src instructions can only have GRF sources, and 272b8e80941Smrg * there's no bit to control the file. 273b8e80941Smrg */ 274b8e80941Smrg if (num_sources == 3) 275b8e80941Smrg return (struct string){}; 276b8e80941Smrg 277b8e80941Smrg /* Nothing to test. Split sends can only encode a file in sources that are 278b8e80941Smrg * allowed to be NULL. 279b8e80941Smrg */ 280b8e80941Smrg if (inst_is_split_send(devinfo, inst)) 281b8e80941Smrg return (struct string){}; 282b8e80941Smrg 283b8e80941Smrg if (num_sources >= 1) 284b8e80941Smrg ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); 285b8e80941Smrg 286b8e80941Smrg if (num_sources == 2) 287b8e80941Smrg ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); 288b8e80941Smrg 289b8e80941Smrg return error_msg; 290b8e80941Smrg} 291b8e80941Smrg 292b8e80941Smrgstatic struct string 293b8e80941Smrgalignment_supported(const struct gen_device_info *devinfo, 294b8e80941Smrg const brw_inst *inst) 295b8e80941Smrg{ 296b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 297b8e80941Smrg 298b8e80941Smrg ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, 299b8e80941Smrg "Align16 not supported"); 300b8e80941Smrg 301b8e80941Smrg return error_msg; 302b8e80941Smrg} 303b8e80941Smrg 304b8e80941Smrgstatic bool 305b8e80941Smrginst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) 306b8e80941Smrg{ 307b8e80941Smrg /* Check instructions that use implicit accumulator sources */ 308b8e80941Smrg switch (brw_inst_opcode(devinfo, inst)) { 309b8e80941Smrg case BRW_OPCODE_MAC: 310b8e80941Smrg case BRW_OPCODE_MACH: 311b8e80941Smrg case BRW_OPCODE_SADA2: 312b8e80941Smrg return true; 313b8e80941Smrg } 314b8e80941Smrg 315b8e80941Smrg /* FIXME: support 3-src instructions */ 316b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 317b8e80941Smrg assert(num_sources < 3); 318b8e80941Smrg 319b8e80941Smrg return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); 320b8e80941Smrg} 321b8e80941Smrg 322b8e80941Smrgstatic struct string 323b8e80941Smrgsend_restrictions(const struct gen_device_info *devinfo, 324b8e80941Smrg const brw_inst *inst) 325b8e80941Smrg{ 326b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 327b8e80941Smrg 328b8e80941Smrg if (inst_is_split_send(devinfo, inst)) { 329b8e80941Smrg ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 330b8e80941Smrg brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, 331b8e80941Smrg "src1 of split send must be a GRF or NULL"); 332b8e80941Smrg 333b8e80941Smrg ERROR_IF(brw_inst_eot(devinfo, inst) && 334b8e80941Smrg brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 335b8e80941Smrg "send with EOT must use g112-g127"); 336b8e80941Smrg ERROR_IF(brw_inst_eot(devinfo, inst) && 337b8e80941Smrg brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && 338b8e80941Smrg brw_inst_send_src1_reg_nr(devinfo, inst) < 112, 339b8e80941Smrg "send with EOT must use g112-g127"); 340b8e80941Smrg 341b8e80941Smrg if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { 342b8e80941Smrg /* Assume minimums if we don't know */ 343b8e80941Smrg unsigned mlen = 1; 344b8e80941Smrg if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { 345b8e80941Smrg const uint32_t desc = brw_inst_send_desc(devinfo, inst); 346b8e80941Smrg mlen = brw_message_desc_mlen(devinfo, desc); 347b8e80941Smrg } 348b8e80941Smrg 349b8e80941Smrg unsigned ex_mlen = 1; 350b8e80941Smrg if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { 351b8e80941Smrg const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst); 352b8e80941Smrg ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); 353b8e80941Smrg } 354b8e80941Smrg const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); 355b8e80941Smrg const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); 356b8e80941Smrg ERROR_IF((src0_reg_nr <= src1_reg_nr && 357b8e80941Smrg src1_reg_nr < src0_reg_nr + mlen) || 358b8e80941Smrg (src1_reg_nr <= src0_reg_nr && 359b8e80941Smrg src0_reg_nr < src1_reg_nr + ex_mlen), 360b8e80941Smrg "split send payloads must not overlap"); 361b8e80941Smrg } 362b8e80941Smrg } else if (inst_is_send(devinfo, inst)) { 363b8e80941Smrg ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, 364b8e80941Smrg "send must use direct addressing"); 365b8e80941Smrg 366b8e80941Smrg if (devinfo->gen >= 7) { 367b8e80941Smrg ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); 368b8e80941Smrg ERROR_IF(brw_inst_eot(devinfo, inst) && 369b8e80941Smrg brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 370b8e80941Smrg "send with EOT must use g112-g127"); 371b8e80941Smrg } 372b8e80941Smrg 373b8e80941Smrg if (devinfo->gen >= 8) { 374b8e80941Smrg ERROR_IF(!dst_is_null(devinfo, inst) && 375b8e80941Smrg (brw_inst_dst_da_reg_nr(devinfo, inst) + 376b8e80941Smrg brw_inst_rlen(devinfo, inst) > 127) && 377b8e80941Smrg (brw_inst_src0_da_reg_nr(devinfo, inst) + 378b8e80941Smrg brw_inst_mlen(devinfo, inst) > 379b8e80941Smrg brw_inst_dst_da_reg_nr(devinfo, inst)), 380b8e80941Smrg "r127 must not be used for return address when there is " 381b8e80941Smrg "a src and dest overlap"); 382b8e80941Smrg } 383b8e80941Smrg } 384b8e80941Smrg 385b8e80941Smrg return error_msg; 386b8e80941Smrg} 387b8e80941Smrg 388b8e80941Smrgstatic bool 389b8e80941Smrgis_unsupported_inst(const struct gen_device_info *devinfo, 390b8e80941Smrg const brw_inst *inst) 391b8e80941Smrg{ 392b8e80941Smrg return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL; 393b8e80941Smrg} 394b8e80941Smrg 395b8e80941Smrg/** 396b8e80941Smrg * Returns whether a combination of two types would qualify as mixed float 397b8e80941Smrg * operation mode 398b8e80941Smrg */ 399b8e80941Smrgstatic inline bool 400b8e80941Smrgtypes_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) 401b8e80941Smrg{ 402b8e80941Smrg return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) || 403b8e80941Smrg (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF); 404b8e80941Smrg} 405b8e80941Smrg 406b8e80941Smrgstatic enum brw_reg_type 407b8e80941Smrgexecution_type_for_type(enum brw_reg_type type) 408b8e80941Smrg{ 409b8e80941Smrg switch (type) { 410b8e80941Smrg case BRW_REGISTER_TYPE_NF: 411b8e80941Smrg case BRW_REGISTER_TYPE_DF: 412b8e80941Smrg case BRW_REGISTER_TYPE_F: 413b8e80941Smrg case BRW_REGISTER_TYPE_HF: 414b8e80941Smrg return type; 415b8e80941Smrg 416b8e80941Smrg case BRW_REGISTER_TYPE_VF: 417b8e80941Smrg return BRW_REGISTER_TYPE_F; 418b8e80941Smrg 419b8e80941Smrg case BRW_REGISTER_TYPE_Q: 420b8e80941Smrg case BRW_REGISTER_TYPE_UQ: 421b8e80941Smrg return BRW_REGISTER_TYPE_Q; 422b8e80941Smrg 423b8e80941Smrg case BRW_REGISTER_TYPE_D: 424b8e80941Smrg case BRW_REGISTER_TYPE_UD: 425b8e80941Smrg return BRW_REGISTER_TYPE_D; 426b8e80941Smrg 427b8e80941Smrg case BRW_REGISTER_TYPE_W: 428b8e80941Smrg case BRW_REGISTER_TYPE_UW: 429b8e80941Smrg case BRW_REGISTER_TYPE_B: 430b8e80941Smrg case BRW_REGISTER_TYPE_UB: 431b8e80941Smrg case BRW_REGISTER_TYPE_V: 432b8e80941Smrg case BRW_REGISTER_TYPE_UV: 433b8e80941Smrg return BRW_REGISTER_TYPE_W; 434b8e80941Smrg } 435b8e80941Smrg unreachable("not reached"); 436b8e80941Smrg} 437b8e80941Smrg 438b8e80941Smrg/** 439b8e80941Smrg * Returns the execution type of an instruction \p inst 440b8e80941Smrg */ 441b8e80941Smrgstatic enum brw_reg_type 442b8e80941Smrgexecution_type(const struct gen_device_info *devinfo, const brw_inst *inst) 443b8e80941Smrg{ 444b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 445b8e80941Smrg enum brw_reg_type src0_exec_type, src1_exec_type; 446b8e80941Smrg 447b8e80941Smrg /* Execution data type is independent of destination data type, except in 448b8e80941Smrg * mixed F/HF instructions. 449b8e80941Smrg */ 450b8e80941Smrg enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst); 451b8e80941Smrg 452b8e80941Smrg src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); 453b8e80941Smrg if (num_sources == 1) { 454b8e80941Smrg if (src0_exec_type == BRW_REGISTER_TYPE_HF) 455b8e80941Smrg return dst_exec_type; 456b8e80941Smrg return src0_exec_type; 457b8e80941Smrg } 458b8e80941Smrg 459b8e80941Smrg src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); 460b8e80941Smrg if (types_are_mixed_float(src0_exec_type, src1_exec_type) || 461b8e80941Smrg types_are_mixed_float(src0_exec_type, dst_exec_type) || 462b8e80941Smrg types_are_mixed_float(src1_exec_type, dst_exec_type)) { 463b8e80941Smrg return BRW_REGISTER_TYPE_F; 464b8e80941Smrg } 465b8e80941Smrg 466b8e80941Smrg if (src0_exec_type == src1_exec_type) 467b8e80941Smrg return src0_exec_type; 468b8e80941Smrg 469b8e80941Smrg /* Mixed operand types where one is float is float on Gen < 6 470b8e80941Smrg * (and not allowed on later platforms) 471b8e80941Smrg */ 472b8e80941Smrg if (devinfo->gen < 6 && 473b8e80941Smrg (src0_exec_type == BRW_REGISTER_TYPE_F || 474b8e80941Smrg src1_exec_type == BRW_REGISTER_TYPE_F)) 475b8e80941Smrg return BRW_REGISTER_TYPE_F; 476b8e80941Smrg 477b8e80941Smrg if (src0_exec_type == BRW_REGISTER_TYPE_Q || 478b8e80941Smrg src1_exec_type == BRW_REGISTER_TYPE_Q) 479b8e80941Smrg return BRW_REGISTER_TYPE_Q; 480b8e80941Smrg 481b8e80941Smrg if (src0_exec_type == BRW_REGISTER_TYPE_D || 482b8e80941Smrg src1_exec_type == BRW_REGISTER_TYPE_D) 483b8e80941Smrg return BRW_REGISTER_TYPE_D; 484b8e80941Smrg 485b8e80941Smrg if (src0_exec_type == BRW_REGISTER_TYPE_W || 486b8e80941Smrg src1_exec_type == BRW_REGISTER_TYPE_W) 487b8e80941Smrg return BRW_REGISTER_TYPE_W; 488b8e80941Smrg 489b8e80941Smrg if (src0_exec_type == BRW_REGISTER_TYPE_DF || 490b8e80941Smrg src1_exec_type == BRW_REGISTER_TYPE_DF) 491b8e80941Smrg return BRW_REGISTER_TYPE_DF; 492b8e80941Smrg 493b8e80941Smrg unreachable("not reached"); 494b8e80941Smrg} 495b8e80941Smrg 496b8e80941Smrg/** 497b8e80941Smrg * Returns whether a region is packed 498b8e80941Smrg * 499b8e80941Smrg * A region is packed if its elements are adjacent in memory, with no 500b8e80941Smrg * intervening space, no overlap, and no replicated values. 501b8e80941Smrg */ 502b8e80941Smrgstatic bool 503b8e80941Smrgis_packed(unsigned vstride, unsigned width, unsigned hstride) 504b8e80941Smrg{ 505b8e80941Smrg if (vstride == width) { 506b8e80941Smrg if (vstride == 1) { 507b8e80941Smrg return hstride == 0; 508b8e80941Smrg } else { 509b8e80941Smrg return hstride == 1; 510b8e80941Smrg } 511b8e80941Smrg } 512b8e80941Smrg 513b8e80941Smrg return false; 514b8e80941Smrg} 515b8e80941Smrg 516b8e80941Smrg/** 517b8e80941Smrg * Returns whether an instruction is an explicit or implicit conversion 518b8e80941Smrg * to/from half-float. 519b8e80941Smrg */ 520b8e80941Smrgstatic bool 521b8e80941Smrgis_half_float_conversion(const struct gen_device_info *devinfo, 522b8e80941Smrg const brw_inst *inst) 523b8e80941Smrg{ 524b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 525b8e80941Smrg 526b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 527b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 528b8e80941Smrg 529b8e80941Smrg if (dst_type != src0_type && 530b8e80941Smrg (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { 531b8e80941Smrg return true; 532b8e80941Smrg } else if (num_sources > 1) { 533b8e80941Smrg enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 534b8e80941Smrg return dst_type != src1_type && 535b8e80941Smrg (dst_type == BRW_REGISTER_TYPE_HF || 536b8e80941Smrg src1_type == BRW_REGISTER_TYPE_HF); 537b8e80941Smrg } 538b8e80941Smrg 539b8e80941Smrg return false; 540b8e80941Smrg} 541b8e80941Smrg 542b8e80941Smrg/* 543b8e80941Smrg * Returns whether an instruction is using mixed float operation mode 544b8e80941Smrg */ 545b8e80941Smrgstatic bool 546b8e80941Smrgis_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst) 547b8e80941Smrg{ 548b8e80941Smrg if (devinfo->gen < 8) 549b8e80941Smrg return false; 550b8e80941Smrg 551b8e80941Smrg if (inst_is_send(devinfo, inst)) 552b8e80941Smrg return false; 553b8e80941Smrg 554b8e80941Smrg unsigned opcode = brw_inst_opcode(devinfo, inst); 555b8e80941Smrg const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); 556b8e80941Smrg if (desc->ndst == 0) 557b8e80941Smrg return false; 558b8e80941Smrg 559b8e80941Smrg /* FIXME: support 3-src instructions */ 560b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 561b8e80941Smrg assert(num_sources < 3); 562b8e80941Smrg 563b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 564b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 565b8e80941Smrg 566b8e80941Smrg if (num_sources == 1) 567b8e80941Smrg return types_are_mixed_float(src0_type, dst_type); 568b8e80941Smrg 569b8e80941Smrg enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 570b8e80941Smrg 571b8e80941Smrg return types_are_mixed_float(src0_type, src1_type) || 572b8e80941Smrg types_are_mixed_float(src0_type, dst_type) || 573b8e80941Smrg types_are_mixed_float(src1_type, dst_type); 574b8e80941Smrg} 575b8e80941Smrg 576b8e80941Smrg/** 577b8e80941Smrg * Returns whether an instruction is an explicit or implicit conversion 578b8e80941Smrg * to/from byte. 579b8e80941Smrg */ 580b8e80941Smrgstatic bool 581b8e80941Smrgis_byte_conversion(const struct gen_device_info *devinfo, 582b8e80941Smrg const brw_inst *inst) 583b8e80941Smrg{ 584b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 585b8e80941Smrg 586b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 587b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 588b8e80941Smrg 589b8e80941Smrg if (dst_type != src0_type && 590b8e80941Smrg (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { 591b8e80941Smrg return true; 592b8e80941Smrg } else if (num_sources > 1) { 593b8e80941Smrg enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 594b8e80941Smrg return dst_type != src1_type && 595b8e80941Smrg (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); 596b8e80941Smrg } 597b8e80941Smrg 598b8e80941Smrg return false; 599b8e80941Smrg} 600b8e80941Smrg 601b8e80941Smrg/** 602b8e80941Smrg * Checks restrictions listed in "General Restrictions Based on Operand Types" 603b8e80941Smrg * in the "Register Region Restrictions" section. 604b8e80941Smrg */ 605b8e80941Smrgstatic struct string 606b8e80941Smrggeneral_restrictions_based_on_operand_types(const struct gen_device_info *devinfo, 607b8e80941Smrg const brw_inst *inst) 608b8e80941Smrg{ 609b8e80941Smrg const struct opcode_desc *desc = 610b8e80941Smrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 611b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 612b8e80941Smrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 613b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 614b8e80941Smrg 615b8e80941Smrg if (devinfo->gen >= 11) { 616b8e80941Smrg if (num_sources == 3) { 617b8e80941Smrg ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || 618b8e80941Smrg brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, 619b8e80941Smrg "Byte data type is not supported for src1/2 register regioning. This includes " 620b8e80941Smrg "byte broadcast as well."); 621b8e80941Smrg } 622b8e80941Smrg if (num_sources == 2) { 623b8e80941Smrg ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, 624b8e80941Smrg "Byte data type is not supported for src1 register regioning. This includes " 625b8e80941Smrg "byte broadcast as well."); 626b8e80941Smrg } 627b8e80941Smrg } 628b8e80941Smrg 629b8e80941Smrg if (num_sources == 3) 630b8e80941Smrg return error_msg; 631b8e80941Smrg 632b8e80941Smrg if (inst_is_send(devinfo, inst)) 633b8e80941Smrg return error_msg; 634b8e80941Smrg 635b8e80941Smrg if (exec_size == 1) 636b8e80941Smrg return error_msg; 637b8e80941Smrg 638b8e80941Smrg if (desc->ndst == 0) 639b8e80941Smrg return error_msg; 640b8e80941Smrg 641b8e80941Smrg /* The PRMs say: 642b8e80941Smrg * 643b8e80941Smrg * Where n is the largest element size in bytes for any source or 644b8e80941Smrg * destination operand type, ExecSize * n must be <= 64. 645b8e80941Smrg * 646b8e80941Smrg * But we do not attempt to enforce it, because it is implied by other 647b8e80941Smrg * rules: 648b8e80941Smrg * 649b8e80941Smrg * - that the destination stride must match the execution data type 650b8e80941Smrg * - sources may not span more than two adjacent GRF registers 651b8e80941Smrg * - destination may not span more than two adjacent GRF registers 652b8e80941Smrg * 653b8e80941Smrg * In fact, checking it would weaken testing of the other rules. 654b8e80941Smrg */ 655b8e80941Smrg 656b8e80941Smrg unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 657b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 658b8e80941Smrg bool dst_type_is_byte = 659b8e80941Smrg brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || 660b8e80941Smrg brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; 661b8e80941Smrg 662b8e80941Smrg if (dst_type_is_byte) { 663b8e80941Smrg if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { 664b8e80941Smrg if (!inst_is_raw_move(devinfo, inst)) 665b8e80941Smrg ERROR("Only raw MOV supports a packed-byte destination"); 666b8e80941Smrg return error_msg; 667b8e80941Smrg } 668b8e80941Smrg } 669b8e80941Smrg 670b8e80941Smrg unsigned exec_type = execution_type(devinfo, inst); 671b8e80941Smrg unsigned exec_type_size = brw_reg_type_to_size(exec_type); 672b8e80941Smrg unsigned dst_type_size = brw_reg_type_to_size(dst_type); 673b8e80941Smrg 674b8e80941Smrg /* On IVB/BYT, region parameters and execution size for DF are in terms of 675b8e80941Smrg * 32-bit elements, so they are doubled. For evaluating the validity of an 676b8e80941Smrg * instruction, we halve them. 677b8e80941Smrg */ 678b8e80941Smrg if (devinfo->gen == 7 && !devinfo->is_haswell && 679b8e80941Smrg exec_type_size == 8 && dst_type_size == 4) 680b8e80941Smrg dst_type_size = 8; 681b8e80941Smrg 682b8e80941Smrg if (is_byte_conversion(devinfo, inst)) { 683b8e80941Smrg /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 684b8e80941Smrg * 685b8e80941Smrg * "There is no direct conversion from B/UB to DF or DF to B/UB. 686b8e80941Smrg * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." 687b8e80941Smrg * 688b8e80941Smrg * Even if these restrictions are listed for the MOV instruction, we 689b8e80941Smrg * validate this more generally, since there is the possibility 690b8e80941Smrg * of implicit conversions from other instructions. 691b8e80941Smrg */ 692b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 693b8e80941Smrg enum brw_reg_type src1_type = num_sources > 1 ? 694b8e80941Smrg brw_inst_src1_type(devinfo, inst) : 0; 695b8e80941Smrg 696b8e80941Smrg ERROR_IF(type_sz(dst_type) == 1 && 697b8e80941Smrg (type_sz(src0_type) == 8 || 698b8e80941Smrg (num_sources > 1 && type_sz(src1_type) == 8)), 699b8e80941Smrg "There are no direct conversions between 64-bit types and B/UB"); 700b8e80941Smrg 701b8e80941Smrg ERROR_IF(type_sz(dst_type) == 8 && 702b8e80941Smrg (type_sz(src0_type) == 1 || 703b8e80941Smrg (num_sources > 1 && type_sz(src1_type) == 1)), 704b8e80941Smrg "There are no direct conversions between 64-bit types and B/UB"); 705b8e80941Smrg } 706b8e80941Smrg 707b8e80941Smrg if (is_half_float_conversion(devinfo, inst)) { 708b8e80941Smrg /** 709b8e80941Smrg * A helper to validate used in the validation of the following restriction 710b8e80941Smrg * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 711b8e80941Smrg * 712b8e80941Smrg * "There is no direct conversion from HF to DF or DF to HF. 713b8e80941Smrg * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." 714b8e80941Smrg * 715b8e80941Smrg * Even if these restrictions are listed for the MOV instruction, we 716b8e80941Smrg * validate this more generally, since there is the possibility 717b8e80941Smrg * of implicit conversions from other instructions, such us implicit 718b8e80941Smrg * conversion from integer to HF with the ADD instruction in SKL+. 719b8e80941Smrg */ 720b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 721b8e80941Smrg enum brw_reg_type src1_type = num_sources > 1 ? 722b8e80941Smrg brw_inst_src1_type(devinfo, inst) : 0; 723b8e80941Smrg ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && 724b8e80941Smrg (type_sz(src0_type) == 8 || 725b8e80941Smrg (num_sources > 1 && type_sz(src1_type) == 8)), 726b8e80941Smrg "There are no direct conversions between 64-bit types and HF"); 727b8e80941Smrg 728b8e80941Smrg ERROR_IF(type_sz(dst_type) == 8 && 729b8e80941Smrg (src0_type == BRW_REGISTER_TYPE_HF || 730b8e80941Smrg (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), 731b8e80941Smrg "There are no direct conversions between 64-bit types and HF"); 732b8e80941Smrg 733b8e80941Smrg /* From the BDW+ PRM: 734b8e80941Smrg * 735b8e80941Smrg * "Conversion between Integer and HF (Half Float) must be 736b8e80941Smrg * DWord-aligned and strided by a DWord on the destination." 737b8e80941Smrg * 738b8e80941Smrg * Also, the above restrictions seems to be expanded on CHV and SKL+ by: 739b8e80941Smrg * 740b8e80941Smrg * "There is a relaxed alignment rule for word destinations. When 741b8e80941Smrg * the destination type is word (UW, W, HF), destination data types 742b8e80941Smrg * can be aligned to either the lowest word or the second lowest 743b8e80941Smrg * word of the execution channel. This means the destination data 744b8e80941Smrg * words can be either all in the even word locations or all in the 745b8e80941Smrg * odd word locations." 746b8e80941Smrg * 747b8e80941Smrg * We do not implement the second rule as is though, since empirical 748b8e80941Smrg * testing shows inconsistencies: 749b8e80941Smrg * - It suggests that packed 16-bit is not allowed, which is not true. 750b8e80941Smrg * - It suggests that conversions from Q/DF to W (which need to be 751b8e80941Smrg * 64-bit aligned on the destination) are not possible, which is 752b8e80941Smrg * not true. 753b8e80941Smrg * 754b8e80941Smrg * So from this rule we only validate the implication that conversions 755b8e80941Smrg * from F to HF need to be DWord strided (except in Align1 mixed 756b8e80941Smrg * float mode where packed fp16 destination is allowed so long as the 757b8e80941Smrg * destination is oword-aligned). 758b8e80941Smrg * 759b8e80941Smrg * Finally, we only validate this for Align1 because Align16 always 760b8e80941Smrg * requires packed destinations, so these restrictions can't possibly 761b8e80941Smrg * apply to Align16 mode. 762b8e80941Smrg */ 763b8e80941Smrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 764b8e80941Smrg if ((dst_type == BRW_REGISTER_TYPE_HF && 765b8e80941Smrg (brw_reg_type_is_integer(src0_type) || 766b8e80941Smrg (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || 767b8e80941Smrg (brw_reg_type_is_integer(dst_type) && 768b8e80941Smrg (src0_type == BRW_REGISTER_TYPE_HF || 769b8e80941Smrg (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { 770b8e80941Smrg ERROR_IF(dst_stride * dst_type_size != 4, 771b8e80941Smrg "Conversions between integer and half-float must be " 772b8e80941Smrg "strided by a DWord on the destination"); 773b8e80941Smrg 774b8e80941Smrg unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 775b8e80941Smrg ERROR_IF(subreg % 4 != 0, 776b8e80941Smrg "Conversions between integer and half-float must be " 777b8e80941Smrg "aligned to a DWord on the destination"); 778b8e80941Smrg } else if ((devinfo->is_cherryview || devinfo->gen >= 9) && 779b8e80941Smrg dst_type == BRW_REGISTER_TYPE_HF) { 780b8e80941Smrg unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 781b8e80941Smrg ERROR_IF(dst_stride != 2 && 782b8e80941Smrg !(is_mixed_float(devinfo, inst) && 783b8e80941Smrg dst_stride == 1 && subreg % 16 == 0), 784b8e80941Smrg "Conversions to HF must have either all words in even " 785b8e80941Smrg "word locations or all words in odd word locations or " 786b8e80941Smrg "be mixed-float with Oword-aligned packed destination"); 787b8e80941Smrg } 788b8e80941Smrg } 789b8e80941Smrg } 790b8e80941Smrg 791b8e80941Smrg /* There are special regioning rules for mixed-float mode in CHV and SKL that 792b8e80941Smrg * override the general rule for the ratio of sizes of the destination type 793b8e80941Smrg * and the execution type. We will add validation for those in a later patch. 794b8e80941Smrg */ 795b8e80941Smrg bool validate_dst_size_and_exec_size_ratio = 796b8e80941Smrg !is_mixed_float(devinfo, inst) || 797b8e80941Smrg !(devinfo->is_cherryview || devinfo->gen >= 9); 798b8e80941Smrg 799b8e80941Smrg if (validate_dst_size_and_exec_size_ratio && 800b8e80941Smrg exec_type_size > dst_type_size) { 801b8e80941Smrg if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) { 802b8e80941Smrg ERROR_IF(dst_stride * dst_type_size != exec_type_size, 803b8e80941Smrg "Destination stride must be equal to the ratio of the sizes " 804b8e80941Smrg "of the execution data type to the destination type"); 805b8e80941Smrg } 806b8e80941Smrg 807b8e80941Smrg unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 808b8e80941Smrg 809b8e80941Smrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 810b8e80941Smrg brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { 811b8e80941Smrg /* The i965 PRM says: 812b8e80941Smrg * 813b8e80941Smrg * Implementation Restriction: The relaxed alignment rule for byte 814b8e80941Smrg * destination (#10.5) is not supported. 815b8e80941Smrg */ 816b8e80941Smrg if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) { 817b8e80941Smrg ERROR_IF(subreg % exec_type_size != 0 && 818b8e80941Smrg subreg % exec_type_size != 1, 819b8e80941Smrg "Destination subreg must be aligned to the size of the " 820b8e80941Smrg "execution data type (or to the next lowest byte for byte " 821b8e80941Smrg "destinations)"); 822b8e80941Smrg } else { 823b8e80941Smrg ERROR_IF(subreg % exec_type_size != 0, 824b8e80941Smrg "Destination subreg must be aligned to the size of the " 825b8e80941Smrg "execution data type"); 826b8e80941Smrg } 827b8e80941Smrg } 828b8e80941Smrg } 829b8e80941Smrg 830b8e80941Smrg return error_msg; 831b8e80941Smrg} 832b8e80941Smrg 833b8e80941Smrg/** 834b8e80941Smrg * Checks restrictions listed in "General Restrictions on Regioning Parameters" 835b8e80941Smrg * in the "Register Region Restrictions" section. 836b8e80941Smrg */ 837b8e80941Smrgstatic struct string 838b8e80941Smrggeneral_restrictions_on_region_parameters(const struct gen_device_info *devinfo, 839b8e80941Smrg const brw_inst *inst) 840b8e80941Smrg{ 841b8e80941Smrg const struct opcode_desc *desc = 842b8e80941Smrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 843b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 844b8e80941Smrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 845b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 846b8e80941Smrg 847b8e80941Smrg if (num_sources == 3) 848b8e80941Smrg return (struct string){}; 849b8e80941Smrg 850b8e80941Smrg /* Split sends don't have the bits in the instruction to encode regions so 851b8e80941Smrg * there's nothing to check. 852b8e80941Smrg */ 853b8e80941Smrg if (inst_is_split_send(devinfo, inst)) 854b8e80941Smrg return (struct string){}; 855b8e80941Smrg 856b8e80941Smrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { 857b8e80941Smrg if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) 858b8e80941Smrg ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, 859b8e80941Smrg "Destination Horizontal Stride must be 1"); 860b8e80941Smrg 861b8e80941Smrg if (num_sources >= 1) { 862b8e80941Smrg if (devinfo->is_haswell || devinfo->gen >= 8) { 863b8e80941Smrg ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 864b8e80941Smrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 865b8e80941Smrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 866b8e80941Smrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 867b8e80941Smrg "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 868b8e80941Smrg } else { 869b8e80941Smrg ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 870b8e80941Smrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 871b8e80941Smrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 872b8e80941Smrg "In Align16 mode, only VertStride of 0 or 4 is allowed"); 873b8e80941Smrg } 874b8e80941Smrg } 875b8e80941Smrg 876b8e80941Smrg if (num_sources == 2) { 877b8e80941Smrg if (devinfo->is_haswell || devinfo->gen >= 8) { 878b8e80941Smrg ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 879b8e80941Smrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 880b8e80941Smrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 881b8e80941Smrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 882b8e80941Smrg "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 883b8e80941Smrg } else { 884b8e80941Smrg ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 885b8e80941Smrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 886b8e80941Smrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 887b8e80941Smrg "In Align16 mode, only VertStride of 0 or 4 is allowed"); 888b8e80941Smrg } 889b8e80941Smrg } 890b8e80941Smrg 891b8e80941Smrg return error_msg; 892b8e80941Smrg } 893b8e80941Smrg 894b8e80941Smrg for (unsigned i = 0; i < num_sources; i++) { 895b8e80941Smrg unsigned vstride, width, hstride, element_size, subreg; 896b8e80941Smrg enum brw_reg_type type; 897b8e80941Smrg 898b8e80941Smrg#define DO_SRC(n) \ 899b8e80941Smrg if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 900b8e80941Smrg BRW_IMMEDIATE_VALUE) \ 901b8e80941Smrg continue; \ 902b8e80941Smrg \ 903b8e80941Smrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 904b8e80941Smrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 905b8e80941Smrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 906b8e80941Smrg type = brw_inst_src ## n ## _type(devinfo, inst); \ 907b8e80941Smrg element_size = brw_reg_type_to_size(type); \ 908b8e80941Smrg subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) 909b8e80941Smrg 910b8e80941Smrg if (i == 0) { 911b8e80941Smrg DO_SRC(0); 912b8e80941Smrg } else { 913b8e80941Smrg DO_SRC(1); 914b8e80941Smrg } 915b8e80941Smrg#undef DO_SRC 916b8e80941Smrg 917b8e80941Smrg /* On IVB/BYT, region parameters and execution size for DF are in terms of 918b8e80941Smrg * 32-bit elements, so they are doubled. For evaluating the validity of an 919b8e80941Smrg * instruction, we halve them. 920b8e80941Smrg */ 921b8e80941Smrg if (devinfo->gen == 7 && !devinfo->is_haswell && 922b8e80941Smrg element_size == 8) 923b8e80941Smrg element_size = 4; 924b8e80941Smrg 925b8e80941Smrg /* ExecSize must be greater than or equal to Width. */ 926b8e80941Smrg ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " 927b8e80941Smrg "to Width"); 928b8e80941Smrg 929b8e80941Smrg /* If ExecSize = Width and HorzStride ≠ 0, 930b8e80941Smrg * VertStride must be set to Width * HorzStride. 931b8e80941Smrg */ 932b8e80941Smrg if (exec_size == width && hstride != 0) { 933b8e80941Smrg ERROR_IF(vstride != width * hstride, 934b8e80941Smrg "If ExecSize = Width and HorzStride ≠ 0, " 935b8e80941Smrg "VertStride must be set to Width * HorzStride"); 936b8e80941Smrg } 937b8e80941Smrg 938b8e80941Smrg /* If Width = 1, HorzStride must be 0 regardless of the values of 939b8e80941Smrg * ExecSize and VertStride. 940b8e80941Smrg */ 941b8e80941Smrg if (width == 1) { 942b8e80941Smrg ERROR_IF(hstride != 0, 943b8e80941Smrg "If Width = 1, HorzStride must be 0 regardless " 944b8e80941Smrg "of the values of ExecSize and VertStride"); 945b8e80941Smrg } 946b8e80941Smrg 947b8e80941Smrg /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ 948b8e80941Smrg if (exec_size == 1 && width == 1) { 949b8e80941Smrg ERROR_IF(vstride != 0 || hstride != 0, 950b8e80941Smrg "If ExecSize = Width = 1, both VertStride " 951b8e80941Smrg "and HorzStride must be 0"); 952b8e80941Smrg } 953b8e80941Smrg 954b8e80941Smrg /* If VertStride = HorzStride = 0, Width must be 1 regardless of the 955b8e80941Smrg * value of ExecSize. 956b8e80941Smrg */ 957b8e80941Smrg if (vstride == 0 && hstride == 0) { 958b8e80941Smrg ERROR_IF(width != 1, 959b8e80941Smrg "If VertStride = HorzStride = 0, Width must be " 960b8e80941Smrg "1 regardless of the value of ExecSize"); 961b8e80941Smrg } 962b8e80941Smrg 963b8e80941Smrg /* VertStride must be used to cross GRF register boundaries. This rule 964b8e80941Smrg * implies that elements within a 'Width' cannot cross GRF boundaries. 965b8e80941Smrg */ 966b8e80941Smrg const uint64_t mask = (1ULL << element_size) - 1; 967b8e80941Smrg unsigned rowbase = subreg; 968b8e80941Smrg 969b8e80941Smrg for (int y = 0; y < exec_size / width; y++) { 970b8e80941Smrg uint64_t access_mask = 0; 971b8e80941Smrg unsigned offset = rowbase; 972b8e80941Smrg 973b8e80941Smrg for (int x = 0; x < width; x++) { 974b8e80941Smrg access_mask |= mask << offset; 975b8e80941Smrg offset += hstride * element_size; 976b8e80941Smrg } 977b8e80941Smrg 978b8e80941Smrg rowbase += vstride * element_size; 979b8e80941Smrg 980b8e80941Smrg if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { 981b8e80941Smrg ERROR("VertStride must be used to cross GRF register boundaries"); 982b8e80941Smrg break; 983b8e80941Smrg } 984b8e80941Smrg } 985b8e80941Smrg } 986b8e80941Smrg 987b8e80941Smrg /* Dst.HorzStride must not be 0. */ 988b8e80941Smrg if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { 989b8e80941Smrg ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, 990b8e80941Smrg "Destination Horizontal Stride must not be 0"); 991b8e80941Smrg } 992b8e80941Smrg 993b8e80941Smrg return error_msg; 994b8e80941Smrg} 995b8e80941Smrg 996b8e80941Smrgstatic struct string 997b8e80941Smrgspecial_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo, 998b8e80941Smrg const brw_inst *inst) 999b8e80941Smrg{ 1000b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 1001b8e80941Smrg 1002b8e80941Smrg const unsigned opcode = brw_inst_opcode(devinfo, inst); 1003b8e80941Smrg const unsigned num_sources = num_sources_from_inst(devinfo, inst); 1004b8e80941Smrg if (num_sources >= 3) 1005b8e80941Smrg return error_msg; 1006b8e80941Smrg 1007b8e80941Smrg if (!is_mixed_float(devinfo, inst)) 1008b8e80941Smrg return error_msg; 1009b8e80941Smrg 1010b8e80941Smrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1011b8e80941Smrg bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; 1012b8e80941Smrg 1013b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1014b8e80941Smrg enum brw_reg_type src1_type = num_sources > 1 ? 1015b8e80941Smrg brw_inst_src1_type(devinfo, inst) : 0; 1016b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1017b8e80941Smrg 1018b8e80941Smrg unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1019b8e80941Smrg bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); 1020b8e80941Smrg 1021b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1022b8e80941Smrg * Float Operations: 1023b8e80941Smrg * 1024b8e80941Smrg * "Indirect addressing on source is not supported when source and 1025b8e80941Smrg * destination data types are mixed float." 1026b8e80941Smrg */ 1027b8e80941Smrg ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || 1028b8e80941Smrg (num_sources > 1 && 1029b8e80941Smrg brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), 1030b8e80941Smrg "Indirect addressing on source is not supported when source and " 1031b8e80941Smrg "destination data types are mixed float"); 1032b8e80941Smrg 1033b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1034b8e80941Smrg * Float Operations: 1035b8e80941Smrg * 1036b8e80941Smrg * "No SIMD16 in mixed mode when destination is f32. Instruction 1037b8e80941Smrg * execution size must be no more than 8." 1038b8e80941Smrg */ 1039b8e80941Smrg ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, 1040b8e80941Smrg "Mixed float mode with 32-bit float destination is limited " 1041b8e80941Smrg "to SIMD8"); 1042b8e80941Smrg 1043b8e80941Smrg if (is_align16) { 1044b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1045b8e80941Smrg * Float Operations: 1046b8e80941Smrg * 1047b8e80941Smrg * "In Align16 mode, when half float and float data types are mixed 1048b8e80941Smrg * between source operands OR between source and destination operands, 1049b8e80941Smrg * the register content are assumed to be packed." 1050b8e80941Smrg * 1051b8e80941Smrg * Since Align16 doesn't have a concept of horizontal stride (or width), 1052b8e80941Smrg * it means that vertical stride must always be 4, since 0 and 2 would 1053b8e80941Smrg * lead to replicated data, and any other value is disallowed in Align16. 1054b8e80941Smrg */ 1055b8e80941Smrg ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1056b8e80941Smrg "Align16 mixed float mode assumes packed data (vstride must be 4"); 1057b8e80941Smrg 1058b8e80941Smrg ERROR_IF(num_sources >= 2 && 1059b8e80941Smrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1060b8e80941Smrg "Align16 mixed float mode assumes packed data (vstride must be 4"); 1061b8e80941Smrg 1062b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1063b8e80941Smrg * Float Operations: 1064b8e80941Smrg * 1065b8e80941Smrg * "For Align16 mixed mode, both input and output packed f16 data 1066b8e80941Smrg * must be oword aligned, no oword crossing in packed f16." 1067b8e80941Smrg * 1068b8e80941Smrg * The previous rule requires that Align16 operands are always packed, 1069b8e80941Smrg * and since there is only one bit for Align16 subnr, which represents 1070b8e80941Smrg * offsets 0B and 16B, this rule is always enforced and we don't need to 1071b8e80941Smrg * validate it. 1072b8e80941Smrg */ 1073b8e80941Smrg 1074b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1075b8e80941Smrg * Float Operations: 1076b8e80941Smrg * 1077b8e80941Smrg * "No SIMD16 in mixed mode when destination is packed f16 for both 1078b8e80941Smrg * Align1 and Align16." 1079b8e80941Smrg * 1080b8e80941Smrg * And: 1081b8e80941Smrg * 1082b8e80941Smrg * "In Align16 mode, when half float and float data types are mixed 1083b8e80941Smrg * between source operands OR between source and destination operands, 1084b8e80941Smrg * the register content are assumed to be packed." 1085b8e80941Smrg * 1086b8e80941Smrg * Which implies that SIMD16 is not available in Align16. This is further 1087b8e80941Smrg * confirmed by: 1088b8e80941Smrg * 1089b8e80941Smrg * "For Align16 mixed mode, both input and output packed f16 data 1090b8e80941Smrg * must be oword aligned, no oword crossing in packed f16" 1091b8e80941Smrg * 1092b8e80941Smrg * Since oword-aligned packed f16 data would cross oword boundaries when 1093b8e80941Smrg * the execution size is larger than 8. 1094b8e80941Smrg */ 1095b8e80941Smrg ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); 1096b8e80941Smrg 1097b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1098b8e80941Smrg * Float Operations: 1099b8e80941Smrg * 1100b8e80941Smrg * "No accumulator read access for Align16 mixed float." 1101b8e80941Smrg */ 1102b8e80941Smrg ERROR_IF(inst_uses_src_acc(devinfo, inst), 1103b8e80941Smrg "No accumulator read access for Align16 mixed float"); 1104b8e80941Smrg } else { 1105b8e80941Smrg assert(!is_align16); 1106b8e80941Smrg 1107b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1108b8e80941Smrg * Float Operations: 1109b8e80941Smrg * 1110b8e80941Smrg * "No SIMD16 in mixed mode when destination is packed f16 for both 1111b8e80941Smrg * Align1 and Align16." 1112b8e80941Smrg */ 1113b8e80941Smrg ERROR_IF(exec_size > 8 && dst_is_packed && 1114b8e80941Smrg dst_type == BRW_REGISTER_TYPE_HF, 1115b8e80941Smrg "Align1 mixed float mode is limited to SIMD8 when destination " 1116b8e80941Smrg "is packed half-float"); 1117b8e80941Smrg 1118b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1119b8e80941Smrg * Float Operations: 1120b8e80941Smrg * 1121b8e80941Smrg * "Math operations for mixed mode: 1122b8e80941Smrg * - In Align1, f16 inputs need to be strided" 1123b8e80941Smrg */ 1124b8e80941Smrg if (opcode == BRW_OPCODE_MATH) { 1125b8e80941Smrg if (src0_type == BRW_REGISTER_TYPE_HF) { 1126b8e80941Smrg ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, 1127b8e80941Smrg "Align1 mixed mode math needs strided half-float inputs"); 1128b8e80941Smrg } 1129b8e80941Smrg 1130b8e80941Smrg if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { 1131b8e80941Smrg ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, 1132b8e80941Smrg "Align1 mixed mode math needs strided half-float inputs"); 1133b8e80941Smrg } 1134b8e80941Smrg } 1135b8e80941Smrg 1136b8e80941Smrg if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { 1137b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1138b8e80941Smrg * Float Operations: 1139b8e80941Smrg * 1140b8e80941Smrg * "In Align1, destination stride can be smaller than execution 1141b8e80941Smrg * type. When destination is stride of 1, 16 bit packed data is 1142b8e80941Smrg * updated on the destination. However, output packed f16 data 1143b8e80941Smrg * must be oword aligned, no oword crossing in packed f16." 1144b8e80941Smrg * 1145b8e80941Smrg * The requirement of not crossing oword boundaries for 16-bit oword 1146b8e80941Smrg * aligned data means that execution size is limited to 8. 1147b8e80941Smrg */ 1148b8e80941Smrg unsigned subreg; 1149b8e80941Smrg if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) 1150b8e80941Smrg subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1151b8e80941Smrg else 1152b8e80941Smrg subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); 1153b8e80941Smrg ERROR_IF(subreg % 16 != 0, 1154b8e80941Smrg "Align1 mixed mode packed half-float output must be " 1155b8e80941Smrg "oword aligned"); 1156b8e80941Smrg ERROR_IF(exec_size > 8, 1157b8e80941Smrg "Align1 mixed mode packed half-float output must not " 1158b8e80941Smrg "cross oword boundaries (max exec size is 8)"); 1159b8e80941Smrg 1160b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1161b8e80941Smrg * Float Operations: 1162b8e80941Smrg * 1163b8e80941Smrg * "When source is float or half float from accumulator register and 1164b8e80941Smrg * destination is half float with a stride of 1, the source must 1165b8e80941Smrg * register aligned. i.e., source must have offset zero." 1166b8e80941Smrg * 1167b8e80941Smrg * Align16 mixed float mode doesn't allow accumulator access on sources, 1168b8e80941Smrg * so we only need to check this for Align1. 1169b8e80941Smrg */ 1170b8e80941Smrg if (src0_is_acc(devinfo, inst) && 1171b8e80941Smrg (src0_type == BRW_REGISTER_TYPE_F || 1172b8e80941Smrg src0_type == BRW_REGISTER_TYPE_HF)) { 1173b8e80941Smrg ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, 1174b8e80941Smrg "Mixed float mode requires register-aligned accumulator " 1175b8e80941Smrg "source reads when destination is packed half-float"); 1176b8e80941Smrg 1177b8e80941Smrg } 1178b8e80941Smrg 1179b8e80941Smrg if (num_sources > 1 && 1180b8e80941Smrg src1_is_acc(devinfo, inst) && 1181b8e80941Smrg (src1_type == BRW_REGISTER_TYPE_F || 1182b8e80941Smrg src1_type == BRW_REGISTER_TYPE_HF)) { 1183b8e80941Smrg ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, 1184b8e80941Smrg "Mixed float mode requires register-aligned accumulator " 1185b8e80941Smrg "source reads when destination is packed half-float"); 1186b8e80941Smrg } 1187b8e80941Smrg } 1188b8e80941Smrg 1189b8e80941Smrg /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1190b8e80941Smrg * Float Operations: 1191b8e80941Smrg * 1192b8e80941Smrg * "No swizzle is allowed when an accumulator is used as an implicit 1193b8e80941Smrg * source or an explicit source in an instruction. i.e. when 1194b8e80941Smrg * destination is half float with an implicit accumulator source, 1195b8e80941Smrg * destination stride needs to be 2." 1196b8e80941Smrg * 1197b8e80941Smrg * FIXME: it is not quite clear what the first sentence actually means 1198b8e80941Smrg * or its link to the implication described after it, so we only 1199b8e80941Smrg * validate the explicit implication, which is clearly described. 1200b8e80941Smrg */ 1201b8e80941Smrg if (dst_type == BRW_REGISTER_TYPE_HF && 1202b8e80941Smrg inst_uses_src_acc(devinfo, inst)) { 1203b8e80941Smrg ERROR_IF(dst_stride != 2, 1204b8e80941Smrg "Mixed float mode with implicit/explicit accumulator " 1205b8e80941Smrg "source and half-float destination requires a stride " 1206b8e80941Smrg "of 2 on the destination"); 1207b8e80941Smrg } 1208b8e80941Smrg } 1209b8e80941Smrg 1210b8e80941Smrg return error_msg; 1211b8e80941Smrg} 1212b8e80941Smrg 1213b8e80941Smrg/** 1214b8e80941Smrg * Creates an \p access_mask for an \p exec_size, \p element_size, and a region 1215b8e80941Smrg * 1216b8e80941Smrg * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is 1217b8e80941Smrg * a bitmask of bytes accessed by the region. 1218b8e80941Smrg * 1219b8e80941Smrg * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 1220b8e80941Smrg * instruction would be 1221b8e80941Smrg * 1222b8e80941Smrg * access_mask[0] = 0x00000000000000F0 1223b8e80941Smrg * access_mask[1] = 0x000000000000F000 1224b8e80941Smrg * access_mask[2] = 0x0000000000F00000 1225b8e80941Smrg * access_mask[3] = 0x00000000F0000000 1226b8e80941Smrg * access_mask[4-31] = 0 1227b8e80941Smrg * 1228b8e80941Smrg * because the first execution channel accesses bytes 7-4 and the second 1229b8e80941Smrg * execution channel accesses bytes 15-12, etc. 1230b8e80941Smrg */ 1231b8e80941Smrgstatic void 1232b8e80941Smrgalign1_access_mask(uint64_t access_mask[static 32], 1233b8e80941Smrg unsigned exec_size, unsigned element_size, unsigned subreg, 1234b8e80941Smrg unsigned vstride, unsigned width, unsigned hstride) 1235b8e80941Smrg{ 1236b8e80941Smrg const uint64_t mask = (1ULL << element_size) - 1; 1237b8e80941Smrg unsigned rowbase = subreg; 1238b8e80941Smrg unsigned element = 0; 1239b8e80941Smrg 1240b8e80941Smrg for (int y = 0; y < exec_size / width; y++) { 1241b8e80941Smrg unsigned offset = rowbase; 1242b8e80941Smrg 1243b8e80941Smrg for (int x = 0; x < width; x++) { 1244b8e80941Smrg access_mask[element++] = mask << offset; 1245b8e80941Smrg offset += hstride * element_size; 1246b8e80941Smrg } 1247b8e80941Smrg 1248b8e80941Smrg rowbase += vstride * element_size; 1249b8e80941Smrg } 1250b8e80941Smrg 1251b8e80941Smrg assert(element == 0 || element == exec_size); 1252b8e80941Smrg} 1253b8e80941Smrg 1254b8e80941Smrg/** 1255b8e80941Smrg * Returns the number of registers accessed according to the \p access_mask 1256b8e80941Smrg */ 1257b8e80941Smrgstatic int 1258b8e80941Smrgregisters_read(const uint64_t access_mask[static 32]) 1259b8e80941Smrg{ 1260b8e80941Smrg int regs_read = 0; 1261b8e80941Smrg 1262b8e80941Smrg for (unsigned i = 0; i < 32; i++) { 1263b8e80941Smrg if (access_mask[i] > 0xFFFFFFFF) { 1264b8e80941Smrg return 2; 1265b8e80941Smrg } else if (access_mask[i]) { 1266b8e80941Smrg regs_read = 1; 1267b8e80941Smrg } 1268b8e80941Smrg } 1269b8e80941Smrg 1270b8e80941Smrg return regs_read; 1271b8e80941Smrg} 1272b8e80941Smrg 1273b8e80941Smrg/** 1274b8e80941Smrg * Checks restrictions listed in "Region Alignment Rules" in the "Register 1275b8e80941Smrg * Region Restrictions" section. 1276b8e80941Smrg */ 1277b8e80941Smrgstatic struct string 1278b8e80941Smrgregion_alignment_rules(const struct gen_device_info *devinfo, 1279b8e80941Smrg const brw_inst *inst) 1280b8e80941Smrg{ 1281b8e80941Smrg const struct opcode_desc *desc = 1282b8e80941Smrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 1283b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 1284b8e80941Smrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1285b8e80941Smrg uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; 1286b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 1287b8e80941Smrg 1288b8e80941Smrg if (num_sources == 3) 1289b8e80941Smrg return (struct string){}; 1290b8e80941Smrg 1291b8e80941Smrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) 1292b8e80941Smrg return (struct string){}; 1293b8e80941Smrg 1294b8e80941Smrg if (inst_is_send(devinfo, inst)) 1295b8e80941Smrg return (struct string){}; 1296b8e80941Smrg 1297b8e80941Smrg memset(dst_access_mask, 0, sizeof(dst_access_mask)); 1298b8e80941Smrg memset(src0_access_mask, 0, sizeof(src0_access_mask)); 1299b8e80941Smrg memset(src1_access_mask, 0, sizeof(src1_access_mask)); 1300b8e80941Smrg 1301b8e80941Smrg for (unsigned i = 0; i < num_sources; i++) { 1302b8e80941Smrg unsigned vstride, width, hstride, element_size, subreg; 1303b8e80941Smrg enum brw_reg_type type; 1304b8e80941Smrg 1305b8e80941Smrg /* In Direct Addressing mode, a source cannot span more than 2 adjacent 1306b8e80941Smrg * GRF registers. 1307b8e80941Smrg */ 1308b8e80941Smrg 1309b8e80941Smrg#define DO_SRC(n) \ 1310b8e80941Smrg if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ 1311b8e80941Smrg BRW_ADDRESS_DIRECT) \ 1312b8e80941Smrg continue; \ 1313b8e80941Smrg \ 1314b8e80941Smrg if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1315b8e80941Smrg BRW_IMMEDIATE_VALUE) \ 1316b8e80941Smrg continue; \ 1317b8e80941Smrg \ 1318b8e80941Smrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1319b8e80941Smrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1320b8e80941Smrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1321b8e80941Smrg type = brw_inst_src ## n ## _type(devinfo, inst); \ 1322b8e80941Smrg element_size = brw_reg_type_to_size(type); \ 1323b8e80941Smrg subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1324b8e80941Smrg align1_access_mask(src ## n ## _access_mask, \ 1325b8e80941Smrg exec_size, element_size, subreg, \ 1326b8e80941Smrg vstride, width, hstride) 1327b8e80941Smrg 1328b8e80941Smrg if (i == 0) { 1329b8e80941Smrg DO_SRC(0); 1330b8e80941Smrg } else { 1331b8e80941Smrg DO_SRC(1); 1332b8e80941Smrg } 1333b8e80941Smrg#undef DO_SRC 1334b8e80941Smrg 1335b8e80941Smrg unsigned num_vstride = exec_size / width; 1336b8e80941Smrg unsigned num_hstride = width; 1337b8e80941Smrg unsigned vstride_elements = (num_vstride - 1) * vstride; 1338b8e80941Smrg unsigned hstride_elements = (num_hstride - 1) * hstride; 1339b8e80941Smrg unsigned offset = (vstride_elements + hstride_elements) * element_size + 1340b8e80941Smrg subreg; 1341b8e80941Smrg ERROR_IF(offset >= 64, 1342b8e80941Smrg "A source cannot span more than 2 adjacent GRF registers"); 1343b8e80941Smrg } 1344b8e80941Smrg 1345b8e80941Smrg if (desc->ndst == 0 || dst_is_null(devinfo, inst)) 1346b8e80941Smrg return error_msg; 1347b8e80941Smrg 1348b8e80941Smrg unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1349b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1350b8e80941Smrg unsigned element_size = brw_reg_type_to_size(dst_type); 1351b8e80941Smrg unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1352b8e80941Smrg unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; 1353b8e80941Smrg ERROR_IF(offset >= 64, 1354b8e80941Smrg "A destination cannot span more than 2 adjacent GRF registers"); 1355b8e80941Smrg 1356b8e80941Smrg if (error_msg.str) 1357b8e80941Smrg return error_msg; 1358b8e80941Smrg 1359b8e80941Smrg /* On IVB/BYT, region parameters and execution size for DF are in terms of 1360b8e80941Smrg * 32-bit elements, so they are doubled. For evaluating the validity of an 1361b8e80941Smrg * instruction, we halve them. 1362b8e80941Smrg */ 1363b8e80941Smrg if (devinfo->gen == 7 && !devinfo->is_haswell && 1364b8e80941Smrg element_size == 8) 1365b8e80941Smrg element_size = 4; 1366b8e80941Smrg 1367b8e80941Smrg align1_access_mask(dst_access_mask, exec_size, element_size, subreg, 1368b8e80941Smrg exec_size == 1 ? 0 : exec_size * stride, 1369b8e80941Smrg exec_size == 1 ? 1 : exec_size, 1370b8e80941Smrg exec_size == 1 ? 0 : stride); 1371b8e80941Smrg 1372b8e80941Smrg unsigned dst_regs = registers_read(dst_access_mask); 1373b8e80941Smrg unsigned src0_regs = registers_read(src0_access_mask); 1374b8e80941Smrg unsigned src1_regs = registers_read(src1_access_mask); 1375b8e80941Smrg 1376b8e80941Smrg /* The SNB, IVB, HSW, BDW, and CHV PRMs say: 1377b8e80941Smrg * 1378b8e80941Smrg * When an instruction has a source region spanning two registers and a 1379b8e80941Smrg * destination region contained in one register, the number of elements 1380b8e80941Smrg * must be the same between two sources and one of the following must be 1381b8e80941Smrg * true: 1382b8e80941Smrg * 1383b8e80941Smrg * 1. The destination region is entirely contained in the lower OWord 1384b8e80941Smrg * of a register. 1385b8e80941Smrg * 2. The destination region is entirely contained in the upper OWord 1386b8e80941Smrg * of a register. 1387b8e80941Smrg * 3. The destination elements are evenly split between the two OWords 1388b8e80941Smrg * of a register. 1389b8e80941Smrg */ 1390b8e80941Smrg if (devinfo->gen <= 8) { 1391b8e80941Smrg if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { 1392b8e80941Smrg unsigned upper_oword_writes = 0, lower_oword_writes = 0; 1393b8e80941Smrg 1394b8e80941Smrg for (unsigned i = 0; i < exec_size; i++) { 1395b8e80941Smrg if (dst_access_mask[i] > 0x0000FFFF) { 1396b8e80941Smrg upper_oword_writes++; 1397b8e80941Smrg } else { 1398b8e80941Smrg assert(dst_access_mask[i] != 0); 1399b8e80941Smrg lower_oword_writes++; 1400b8e80941Smrg } 1401b8e80941Smrg } 1402b8e80941Smrg 1403b8e80941Smrg ERROR_IF(lower_oword_writes != 0 && 1404b8e80941Smrg upper_oword_writes != 0 && 1405b8e80941Smrg upper_oword_writes != lower_oword_writes, 1406b8e80941Smrg "Writes must be to only one OWord or " 1407b8e80941Smrg "evenly split between OWords"); 1408b8e80941Smrg } 1409b8e80941Smrg } 1410b8e80941Smrg 1411b8e80941Smrg /* The IVB and HSW PRMs say: 1412b8e80941Smrg * 1413b8e80941Smrg * When an instruction has a source region that spans two registers and 1414b8e80941Smrg * the destination spans two registers, the destination elements must be 1415b8e80941Smrg * evenly split between the two registers [...] 1416b8e80941Smrg * 1417b8e80941Smrg * The SNB PRM contains similar wording (but written in a much more 1418b8e80941Smrg * confusing manner). 1419b8e80941Smrg * 1420b8e80941Smrg * The BDW PRM says: 1421b8e80941Smrg * 1422b8e80941Smrg * When destination spans two registers, the source may be one or two 1423b8e80941Smrg * registers. The destination elements must be evenly split between the 1424b8e80941Smrg * two registers. 1425b8e80941Smrg * 1426b8e80941Smrg * The SKL PRM says: 1427b8e80941Smrg * 1428b8e80941Smrg * When destination of MATH instruction spans two registers, the 1429b8e80941Smrg * destination elements must be evenly split between the two registers. 1430b8e80941Smrg * 1431b8e80941Smrg * It is not known whether this restriction applies to KBL other Gens after 1432b8e80941Smrg * SKL. 1433b8e80941Smrg */ 1434b8e80941Smrg if (devinfo->gen <= 8 || 1435b8e80941Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 1436b8e80941Smrg 1437b8e80941Smrg /* Nothing explicitly states that on Gen < 8 elements must be evenly 1438b8e80941Smrg * split between two destination registers in the two exceptional 1439b8e80941Smrg * source-region-spans-one-register cases, but since Broadwell requires 1440b8e80941Smrg * evenly split writes regardless of source region, we assume that it was 1441b8e80941Smrg * an oversight and require it. 1442b8e80941Smrg */ 1443b8e80941Smrg if (dst_regs == 2) { 1444b8e80941Smrg unsigned upper_reg_writes = 0, lower_reg_writes = 0; 1445b8e80941Smrg 1446b8e80941Smrg for (unsigned i = 0; i < exec_size; i++) { 1447b8e80941Smrg if (dst_access_mask[i] > 0xFFFFFFFF) { 1448b8e80941Smrg upper_reg_writes++; 1449b8e80941Smrg } else { 1450b8e80941Smrg assert(dst_access_mask[i] != 0); 1451b8e80941Smrg lower_reg_writes++; 1452b8e80941Smrg } 1453b8e80941Smrg } 1454b8e80941Smrg 1455b8e80941Smrg ERROR_IF(upper_reg_writes != lower_reg_writes, 1456b8e80941Smrg "Writes must be evenly split between the two " 1457b8e80941Smrg "destination registers"); 1458b8e80941Smrg } 1459b8e80941Smrg } 1460b8e80941Smrg 1461b8e80941Smrg /* The IVB and HSW PRMs say: 1462b8e80941Smrg * 1463b8e80941Smrg * When an instruction has a source region that spans two registers and 1464b8e80941Smrg * the destination spans two registers, the destination elements must be 1465b8e80941Smrg * evenly split between the two registers and each destination register 1466b8e80941Smrg * must be entirely derived from one source register. 1467b8e80941Smrg * 1468b8e80941Smrg * Note: In such cases, the regioning parameters must ensure that the 1469b8e80941Smrg * offset from the two source registers is the same. 1470b8e80941Smrg * 1471b8e80941Smrg * The SNB PRM contains similar wording (but written in a much more 1472b8e80941Smrg * confusing manner). 1473b8e80941Smrg * 1474b8e80941Smrg * There are effectively three rules stated here: 1475b8e80941Smrg * 1476b8e80941Smrg * For an instruction with a source and a destination spanning two 1477b8e80941Smrg * registers, 1478b8e80941Smrg * 1479b8e80941Smrg * (1) destination elements must be evenly split between the two 1480b8e80941Smrg * registers 1481b8e80941Smrg * (2) all destination elements in a register must be derived 1482b8e80941Smrg * from one source register 1483b8e80941Smrg * (3) the offset (i.e. the starting location in each of the two 1484b8e80941Smrg * registers spanned by a region) must be the same in the two 1485b8e80941Smrg * registers spanned by a region 1486b8e80941Smrg * 1487b8e80941Smrg * It is impossible to violate rule (1) without violating (2) or (3), so we 1488b8e80941Smrg * do not attempt to validate it. 1489b8e80941Smrg */ 1490b8e80941Smrg if (devinfo->gen <= 7 && dst_regs == 2) { 1491b8e80941Smrg for (unsigned i = 0; i < num_sources; i++) { 1492b8e80941Smrg#define DO_SRC(n) \ 1493b8e80941Smrg if (src ## n ## _regs <= 1) \ 1494b8e80941Smrg continue; \ 1495b8e80941Smrg \ 1496b8e80941Smrg for (unsigned i = 0; i < exec_size; i++) { \ 1497b8e80941Smrg if ((dst_access_mask[i] > 0xFFFFFFFF) != \ 1498b8e80941Smrg (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ 1499b8e80941Smrg ERROR("Each destination register must be entirely derived " \ 1500b8e80941Smrg "from one source register"); \ 1501b8e80941Smrg break; \ 1502b8e80941Smrg } \ 1503b8e80941Smrg } \ 1504b8e80941Smrg \ 1505b8e80941Smrg unsigned offset_0 = \ 1506b8e80941Smrg brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1507b8e80941Smrg unsigned offset_1 = offset_0; \ 1508b8e80941Smrg \ 1509b8e80941Smrg for (unsigned i = 0; i < exec_size; i++) { \ 1510b8e80941Smrg if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ 1511b8e80941Smrg offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ 1512b8e80941Smrg break; \ 1513b8e80941Smrg } \ 1514b8e80941Smrg } \ 1515b8e80941Smrg \ 1516b8e80941Smrg ERROR_IF(num_sources == 2 && offset_0 != offset_1, \ 1517b8e80941Smrg "The offset from the two source registers " \ 1518b8e80941Smrg "must be the same") 1519b8e80941Smrg 1520b8e80941Smrg if (i == 0) { 1521b8e80941Smrg DO_SRC(0); 1522b8e80941Smrg } else { 1523b8e80941Smrg DO_SRC(1); 1524b8e80941Smrg } 1525b8e80941Smrg#undef DO_SRC 1526b8e80941Smrg } 1527b8e80941Smrg } 1528b8e80941Smrg 1529b8e80941Smrg /* The IVB and HSW PRMs say: 1530b8e80941Smrg * 1531b8e80941Smrg * When destination spans two registers, the source MUST span two 1532b8e80941Smrg * registers. The exception to the above rule: 1533b8e80941Smrg * 1. When source is scalar, the source registers are not 1534b8e80941Smrg * incremented. 1535b8e80941Smrg * 2. When source is packed integer Word and destination is packed 1536b8e80941Smrg * integer DWord, the source register is not incremented by the 1537b8e80941Smrg * source sub register is incremented. 1538b8e80941Smrg * 1539b8e80941Smrg * The SNB PRM does not contain this rule, but the internal documentation 1540b8e80941Smrg * indicates that it applies to SNB as well. We assume that the rule applies 1541b8e80941Smrg * to Gen <= 5 although their PRMs do not state it. 1542b8e80941Smrg * 1543b8e80941Smrg * While the documentation explicitly says in exception (2) that the 1544b8e80941Smrg * destination must be an integer DWord, the hardware allows at least a 1545b8e80941Smrg * float destination type as well. We emit such instructions from 1546b8e80941Smrg * 1547b8e80941Smrg * fs_visitor::emit_interpolation_setup_gen6 1548b8e80941Smrg * fs_visitor::emit_fragcoord_interpolation 1549b8e80941Smrg * 1550b8e80941Smrg * and have for years with no ill effects. 1551b8e80941Smrg * 1552b8e80941Smrg * Additionally the simulator source code indicates that the real condition 1553b8e80941Smrg * is that the size of the destination type is 4 bytes. 1554b8e80941Smrg */ 1555b8e80941Smrg if (devinfo->gen <= 7 && dst_regs == 2) { 1556b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1557b8e80941Smrg bool dst_is_packed_dword = 1558b8e80941Smrg is_packed(exec_size * stride, exec_size, stride) && 1559b8e80941Smrg brw_reg_type_to_size(dst_type) == 4; 1560b8e80941Smrg 1561b8e80941Smrg for (unsigned i = 0; i < num_sources; i++) { 1562b8e80941Smrg#define DO_SRC(n) \ 1563b8e80941Smrg unsigned vstride, width, hstride; \ 1564b8e80941Smrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1565b8e80941Smrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1566b8e80941Smrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1567b8e80941Smrg bool src ## n ## _is_packed_word = \ 1568b8e80941Smrg is_packed(vstride, width, hstride) && \ 1569b8e80941Smrg (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \ 1570b8e80941Smrg brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \ 1571b8e80941Smrg \ 1572b8e80941Smrg ERROR_IF(src ## n ## _regs == 1 && \ 1573b8e80941Smrg !src ## n ## _has_scalar_region(devinfo, inst) && \ 1574b8e80941Smrg !(dst_is_packed_dword && src ## n ## _is_packed_word), \ 1575b8e80941Smrg "When the destination spans two registers, the source must " \ 1576b8e80941Smrg "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ 1577b8e80941Smrg "source and packed-word to packed-dword expansion)") 1578b8e80941Smrg 1579b8e80941Smrg if (i == 0) { 1580b8e80941Smrg DO_SRC(0); 1581b8e80941Smrg } else { 1582b8e80941Smrg DO_SRC(1); 1583b8e80941Smrg } 1584b8e80941Smrg#undef DO_SRC 1585b8e80941Smrg } 1586b8e80941Smrg } 1587b8e80941Smrg 1588b8e80941Smrg return error_msg; 1589b8e80941Smrg} 1590b8e80941Smrg 1591b8e80941Smrgstatic struct string 1592b8e80941Smrgvector_immediate_restrictions(const struct gen_device_info *devinfo, 1593b8e80941Smrg const brw_inst *inst) 1594b8e80941Smrg{ 1595b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 1596b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 1597b8e80941Smrg 1598b8e80941Smrg if (num_sources == 3 || num_sources == 0) 1599b8e80941Smrg return (struct string){}; 1600b8e80941Smrg 1601b8e80941Smrg unsigned file = num_sources == 1 ? 1602b8e80941Smrg brw_inst_src0_reg_file(devinfo, inst) : 1603b8e80941Smrg brw_inst_src1_reg_file(devinfo, inst); 1604b8e80941Smrg if (file != BRW_IMMEDIATE_VALUE) 1605b8e80941Smrg return (struct string){}; 1606b8e80941Smrg 1607b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1608b8e80941Smrg unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1609b8e80941Smrg unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? 1610b8e80941Smrg brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; 1611b8e80941Smrg unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1612b8e80941Smrg enum brw_reg_type type = num_sources == 1 ? 1613b8e80941Smrg brw_inst_src0_type(devinfo, inst) : 1614b8e80941Smrg brw_inst_src1_type(devinfo, inst); 1615b8e80941Smrg 1616b8e80941Smrg /* The PRMs say: 1617b8e80941Smrg * 1618b8e80941Smrg * When an immediate vector is used in an instruction, the destination 1619b8e80941Smrg * must be 128-bit aligned with destination horizontal stride equivalent 1620b8e80941Smrg * to a word for an immediate integer vector (v) and equivalent to a 1621b8e80941Smrg * DWord for an immediate float vector (vf). 1622b8e80941Smrg * 1623b8e80941Smrg * The text has not been updated for the addition of the immediate unsigned 1624b8e80941Smrg * integer vector type (uv) on SNB, but presumably the same restriction 1625b8e80941Smrg * applies. 1626b8e80941Smrg */ 1627b8e80941Smrg switch (type) { 1628b8e80941Smrg case BRW_REGISTER_TYPE_V: 1629b8e80941Smrg case BRW_REGISTER_TYPE_UV: 1630b8e80941Smrg case BRW_REGISTER_TYPE_VF: 1631b8e80941Smrg ERROR_IF(dst_subreg % (128 / 8) != 0, 1632b8e80941Smrg "Destination must be 128-bit aligned in order to use immediate " 1633b8e80941Smrg "vector types"); 1634b8e80941Smrg 1635b8e80941Smrg if (type == BRW_REGISTER_TYPE_VF) { 1636b8e80941Smrg ERROR_IF(dst_type_size * dst_stride != 4, 1637b8e80941Smrg "Destination must have stride equivalent to dword in order " 1638b8e80941Smrg "to use the VF type"); 1639b8e80941Smrg } else { 1640b8e80941Smrg ERROR_IF(dst_type_size * dst_stride != 2, 1641b8e80941Smrg "Destination must have stride equivalent to word in order " 1642b8e80941Smrg "to use the V or UV type"); 1643b8e80941Smrg } 1644b8e80941Smrg break; 1645b8e80941Smrg default: 1646b8e80941Smrg break; 1647b8e80941Smrg } 1648b8e80941Smrg 1649b8e80941Smrg return error_msg; 1650b8e80941Smrg} 1651b8e80941Smrg 1652b8e80941Smrgstatic struct string 1653b8e80941Smrgspecial_requirements_for_handling_double_precision_data_types( 1654b8e80941Smrg const struct gen_device_info *devinfo, 1655b8e80941Smrg const brw_inst *inst) 1656b8e80941Smrg{ 1657b8e80941Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 1658b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 1659b8e80941Smrg 1660b8e80941Smrg if (num_sources == 3 || num_sources == 0) 1661b8e80941Smrg return (struct string){}; 1662b8e80941Smrg 1663b8e80941Smrg /* Split sends don't have types so there's no doubles there. */ 1664b8e80941Smrg if (inst_is_split_send(devinfo, inst)) 1665b8e80941Smrg return (struct string){}; 1666b8e80941Smrg 1667b8e80941Smrg enum brw_reg_type exec_type = execution_type(devinfo, inst); 1668b8e80941Smrg unsigned exec_type_size = brw_reg_type_to_size(exec_type); 1669b8e80941Smrg 1670b8e80941Smrg enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); 1671b8e80941Smrg enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1672b8e80941Smrg unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1673b8e80941Smrg unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1674b8e80941Smrg unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); 1675b8e80941Smrg unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1676b8e80941Smrg unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); 1677b8e80941Smrg 1678b8e80941Smrg bool is_integer_dword_multiply = 1679b8e80941Smrg devinfo->gen >= 8 && 1680b8e80941Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL && 1681b8e80941Smrg (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1682b8e80941Smrg brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && 1683b8e80941Smrg (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1684b8e80941Smrg brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); 1685b8e80941Smrg 1686b8e80941Smrg if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply) 1687b8e80941Smrg return (struct string){}; 1688b8e80941Smrg 1689b8e80941Smrg for (unsigned i = 0; i < num_sources; i++) { 1690b8e80941Smrg unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; 1691b8e80941Smrg bool is_scalar_region; 1692b8e80941Smrg enum brw_reg_file file; 1693b8e80941Smrg enum brw_reg_type type; 1694b8e80941Smrg 1695b8e80941Smrg#define DO_SRC(n) \ 1696b8e80941Smrg if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1697b8e80941Smrg BRW_IMMEDIATE_VALUE) \ 1698b8e80941Smrg continue; \ 1699b8e80941Smrg \ 1700b8e80941Smrg is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ 1701b8e80941Smrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1702b8e80941Smrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1703b8e80941Smrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1704b8e80941Smrg file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ 1705b8e80941Smrg type = brw_inst_src ## n ## _type(devinfo, inst); \ 1706b8e80941Smrg type_size = brw_reg_type_to_size(type); \ 1707b8e80941Smrg reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ 1708b8e80941Smrg subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1709b8e80941Smrg address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) 1710b8e80941Smrg 1711b8e80941Smrg if (i == 0) { 1712b8e80941Smrg DO_SRC(0); 1713b8e80941Smrg } else { 1714b8e80941Smrg DO_SRC(1); 1715b8e80941Smrg } 1716b8e80941Smrg#undef DO_SRC 1717b8e80941Smrg 1718b8e80941Smrg /* The PRMs say that for CHV, BXT: 1719b8e80941Smrg * 1720b8e80941Smrg * When source or destination datatype is 64b or operation is integer 1721b8e80941Smrg * DWord multiply, regioning in Align1 must follow these rules: 1722b8e80941Smrg * 1723b8e80941Smrg * 1. Source and Destination horizontal stride must be aligned to the 1724b8e80941Smrg * same qword. 1725b8e80941Smrg * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 1726b8e80941Smrg * 3. Source and Destination offset must be the same, except the case 1727b8e80941Smrg * of scalar source. 1728b8e80941Smrg * 1729b8e80941Smrg * We assume that the restriction applies to GLK as well. 1730b8e80941Smrg */ 1731b8e80941Smrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 1732b8e80941Smrg (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { 1733b8e80941Smrg unsigned src_stride = hstride * type_size; 1734b8e80941Smrg unsigned dst_stride = dst_hstride * dst_type_size; 1735b8e80941Smrg 1736b8e80941Smrg ERROR_IF(!is_scalar_region && 1737b8e80941Smrg (src_stride % 8 != 0 || 1738b8e80941Smrg dst_stride % 8 != 0 || 1739b8e80941Smrg src_stride != dst_stride), 1740b8e80941Smrg "Source and destination horizontal stride must equal and a " 1741b8e80941Smrg "multiple of a qword when the execution type is 64-bit"); 1742b8e80941Smrg 1743b8e80941Smrg ERROR_IF(vstride != width * hstride, 1744b8e80941Smrg "Vstride must be Width * Hstride when the execution type is " 1745b8e80941Smrg "64-bit"); 1746b8e80941Smrg 1747b8e80941Smrg ERROR_IF(!is_scalar_region && dst_subreg != subreg, 1748b8e80941Smrg "Source and destination offset must be the same when the " 1749b8e80941Smrg "execution type is 64-bit"); 1750b8e80941Smrg } 1751b8e80941Smrg 1752b8e80941Smrg /* The PRMs say that for CHV, BXT: 1753b8e80941Smrg * 1754b8e80941Smrg * When source or destination datatype is 64b or operation is integer 1755b8e80941Smrg * DWord multiply, indirect addressing must not be used. 1756b8e80941Smrg * 1757b8e80941Smrg * We assume that the restriction applies to GLK as well. 1758b8e80941Smrg */ 1759b8e80941Smrg if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1760b8e80941Smrg ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || 1761b8e80941Smrg BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, 1762b8e80941Smrg "Indirect addressing is not allowed when the execution type " 1763b8e80941Smrg "is 64-bit"); 1764b8e80941Smrg } 1765b8e80941Smrg 1766b8e80941Smrg /* The PRMs say that for CHV, BXT: 1767b8e80941Smrg * 1768b8e80941Smrg * ARF registers must never be used with 64b datatype or when 1769b8e80941Smrg * operation is integer DWord multiply. 1770b8e80941Smrg * 1771b8e80941Smrg * We assume that the restriction applies to GLK as well. 1772b8e80941Smrg * 1773b8e80941Smrg * We assume that the restriction does not apply to the null register. 1774b8e80941Smrg */ 1775b8e80941Smrg if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1776b8e80941Smrg ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC || 1777b8e80941Smrg brw_inst_acc_wr_control(devinfo, inst) || 1778b8e80941Smrg (BRW_ARCHITECTURE_REGISTER_FILE == file && 1779b8e80941Smrg reg != BRW_ARF_NULL) || 1780b8e80941Smrg (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && 1781b8e80941Smrg dst_reg != BRW_ARF_NULL), 1782b8e80941Smrg "Architecture registers cannot be used when the execution " 1783b8e80941Smrg "type is 64-bit"); 1784b8e80941Smrg } 1785b8e80941Smrg } 1786b8e80941Smrg 1787b8e80941Smrg /* The PRMs say that for BDW, SKL: 1788b8e80941Smrg * 1789b8e80941Smrg * If Align16 is required for an operation with QW destination and non-QW 1790b8e80941Smrg * source datatypes, the execution size cannot exceed 2. 1791b8e80941Smrg * 1792b8e80941Smrg * We assume that the restriction applies to all Gen8+ parts. 1793b8e80941Smrg */ 1794b8e80941Smrg if (devinfo->gen >= 8) { 1795b8e80941Smrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1796b8e80941Smrg enum brw_reg_type src1_type = 1797b8e80941Smrg num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; 1798b8e80941Smrg unsigned src0_type_size = brw_reg_type_to_size(src0_type); 1799b8e80941Smrg unsigned src1_type_size = brw_reg_type_to_size(src1_type); 1800b8e80941Smrg 1801b8e80941Smrg ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && 1802b8e80941Smrg dst_type_size == 8 && 1803b8e80941Smrg (src0_type_size != 8 || src1_type_size != 8) && 1804b8e80941Smrg brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, 1805b8e80941Smrg "In Align16 exec size cannot exceed 2 with a QWord destination " 1806b8e80941Smrg "and a non-QWord source"); 1807b8e80941Smrg } 1808b8e80941Smrg 1809b8e80941Smrg /* The PRMs say that for CHV, BXT: 1810b8e80941Smrg * 1811b8e80941Smrg * When source or destination datatype is 64b or operation is integer 1812b8e80941Smrg * DWord multiply, DepCtrl must not be used. 1813b8e80941Smrg * 1814b8e80941Smrg * We assume that the restriction applies to GLK as well. 1815b8e80941Smrg */ 1816b8e80941Smrg if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1817b8e80941Smrg ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || 1818b8e80941Smrg brw_inst_no_dd_clear(devinfo, inst), 1819b8e80941Smrg "DepCtrl is not allowed when the execution type is 64-bit"); 1820b8e80941Smrg } 1821b8e80941Smrg 1822b8e80941Smrg return error_msg; 1823b8e80941Smrg} 1824b8e80941Smrg 1825b8e80941Smrgbool 1826b8e80941Smrgbrw_validate_instructions(const struct gen_device_info *devinfo, 1827b8e80941Smrg const void *assembly, int start_offset, int end_offset, 1828b8e80941Smrg struct disasm_info *disasm) 1829b8e80941Smrg{ 1830b8e80941Smrg bool valid = true; 1831b8e80941Smrg 1832b8e80941Smrg for (int src_offset = start_offset; src_offset < end_offset;) { 1833b8e80941Smrg struct string error_msg = { .str = NULL, .len = 0 }; 1834b8e80941Smrg const brw_inst *inst = assembly + src_offset; 1835b8e80941Smrg bool is_compact = brw_inst_cmpt_control(devinfo, inst); 1836b8e80941Smrg brw_inst uncompacted; 1837b8e80941Smrg 1838b8e80941Smrg if (is_compact) { 1839b8e80941Smrg brw_compact_inst *compacted = (void *)inst; 1840b8e80941Smrg brw_uncompact_instruction(devinfo, &uncompacted, compacted); 1841b8e80941Smrg inst = &uncompacted; 1842b8e80941Smrg } 1843b8e80941Smrg 1844b8e80941Smrg if (is_unsupported_inst(devinfo, inst)) { 1845b8e80941Smrg ERROR("Instruction not supported on this Gen"); 1846b8e80941Smrg } else { 1847b8e80941Smrg CHECK(sources_not_null); 1848b8e80941Smrg CHECK(send_restrictions); 1849b8e80941Smrg CHECK(alignment_supported); 1850b8e80941Smrg CHECK(general_restrictions_based_on_operand_types); 1851b8e80941Smrg CHECK(general_restrictions_on_region_parameters); 1852b8e80941Smrg CHECK(special_restrictions_for_mixed_float_mode); 1853b8e80941Smrg CHECK(region_alignment_rules); 1854b8e80941Smrg CHECK(vector_immediate_restrictions); 1855b8e80941Smrg CHECK(special_requirements_for_handling_double_precision_data_types); 1856b8e80941Smrg } 1857b8e80941Smrg 1858b8e80941Smrg if (error_msg.str && disasm) { 1859b8e80941Smrg disasm_insert_error(disasm, src_offset, error_msg.str); 1860b8e80941Smrg } 1861b8e80941Smrg valid = valid && error_msg.len == 0; 1862b8e80941Smrg free(error_msg.str); 1863b8e80941Smrg 1864b8e80941Smrg if (is_compact) { 1865b8e80941Smrg src_offset += sizeof(brw_compact_inst); 1866b8e80941Smrg } else { 1867b8e80941Smrg src_offset += sizeof(brw_inst); 1868b8e80941Smrg } 1869b8e80941Smrg } 1870b8e80941Smrg 1871b8e80941Smrg return valid; 1872b8e80941Smrg} 1873