1/* 2 * Copyright © 2015-2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_eu_validate.c 25 * 26 * This file implements a pass that validates shader assembly. 27 * 28 * The restrictions implemented herein are intended to verify that instructions 29 * in shader assembly do not violate restrictions documented in the graphics 30 * programming reference manuals. 31 * 32 * The restrictions are difficult for humans to quickly verify due to their 33 * complexity and abundance. 34 * 35 * It is critical that this code is thoroughly unit tested because false 36 * results will lead developers astray, which is worse than having no validator 37 * at all. Functional changes to this file without corresponding unit tests (in 38 * test_eu_validate.cpp) will be rejected. 39 */ 40 41#include "brw_eu.h" 42 43/* We're going to do lots of string concatenation, so this should help. */ 44struct string { 45 char *str; 46 size_t len; 47}; 48 49static void 50cat(struct string *dest, const struct string src) 51{ 52 dest->str = realloc(dest->str, dest->len + src.len + 1); 53 memcpy(dest->str + dest->len, src.str, src.len); 54 dest->str[dest->len + src.len] = '\0'; 55 dest->len = dest->len + src.len; 56} 57#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) 58 59static bool 60contains(const struct string haystack, const struct string needle) 61{ 62 return haystack.str && memmem(haystack.str, haystack.len, 63 needle.str, needle.len) != NULL; 64} 65#define CONTAINS(haystack, needle) \ 66 contains(haystack, (struct string){needle, strlen(needle)}) 67 68#define error(str) "\tERROR: " str "\n" 69#define ERROR_INDENT "\t " 70 71#define ERROR(msg) ERROR_IF(true, msg) 72#define ERROR_IF(cond, msg) \ 73 do { \ 74 if ((cond) && !CONTAINS(error_msg, error(msg))) { \ 75 CAT(error_msg, error(msg)); \ 76 } \ 77 } while(0) 78 79#define CHECK(func, args...) \ 80 do { \ 81 struct string __msg = func(devinfo, inst, ##args); \ 82 if (__msg.str) { \ 83 cat(&error_msg, __msg); \ 84 free(__msg.str); \ 85 } \ 86 } while (0) 87 88#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) 89#define WIDTH(width) (1 << (width)) 90 91static bool 92inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst) 93{ 94 switch (brw_inst_opcode(devinfo, inst)) { 95 case BRW_OPCODE_SEND: 96 case BRW_OPCODE_SENDC: 97 case BRW_OPCODE_SENDS: 98 case BRW_OPCODE_SENDSC: 99 return true; 100 default: 101 return false; 102 } 103} 104 105static bool 106inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst) 107{ 108 switch (brw_inst_opcode(devinfo, inst)) { 109 case BRW_OPCODE_SENDS: 110 case BRW_OPCODE_SENDSC: 111 return true; 112 default: 113 return false; 114 } 115} 116 117static unsigned 118signed_type(unsigned type) 119{ 120 switch (type) { 121 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D; 122 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W; 123 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B; 124 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q; 125 default: return type; 126 } 127} 128 129static bool 130inst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst) 131{ 132 unsigned dst_type = signed_type(brw_inst_dst_type(devinfo, inst)); 133 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); 134 135 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { 136 /* FIXME: not strictly true */ 137 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF || 138 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV || 139 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) { 140 return false; 141 } 142 } else if (brw_inst_src0_negate(devinfo, inst) || 143 brw_inst_src0_abs(devinfo, inst)) { 144 return false; 145 } 146 147 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV && 148 brw_inst_saturate(devinfo, inst) == 0 && 149 dst_type == src_type; 150} 151 152static bool 153dst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 154{ 155 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 156 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 157} 158 159static bool 160src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 161{ 162 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 163 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 164} 165 166static bool 167src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) 168{ 169 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 170 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 171} 172 173static bool 174src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) 175{ 176 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 177 (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 178} 179 180static bool 181src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) 182{ 183 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 184 (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 185} 186 187static bool 188src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) 189{ 190 return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE; 191} 192 193static bool 194src0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) 195{ 196 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 197 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && 198 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 199} 200 201static bool 202src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) 203{ 204 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 205 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && 206 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 207} 208 209static unsigned 210num_sources_from_inst(const struct gen_device_info *devinfo, 211 const brw_inst *inst) 212{ 213 const struct opcode_desc *desc = 214 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 215 unsigned math_function; 216 217 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 218 math_function = brw_inst_math_function(devinfo, inst); 219 } else if (devinfo->gen < 6 && 220 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { 221 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { 222 /* src1 must be a descriptor (including the information to determine 223 * that the SEND is doing an extended math operation), but src0 can 224 * actually be null since it serves as the source of the implicit GRF 225 * to MRF move. 226 * 227 * If we stop using that functionality, we'll have to revisit this. 228 */ 229 return 2; 230 } else { 231 /* Send instructions are allowed to have null sources since they use 232 * the base_mrf field to specify which message register source. 233 */ 234 return 0; 235 } 236 } else { 237 assert(desc->nsrc < 4); 238 return desc->nsrc; 239 } 240 241 switch (math_function) { 242 case BRW_MATH_FUNCTION_INV: 243 case BRW_MATH_FUNCTION_LOG: 244 case BRW_MATH_FUNCTION_EXP: 245 case BRW_MATH_FUNCTION_SQRT: 246 case BRW_MATH_FUNCTION_RSQ: 247 case BRW_MATH_FUNCTION_SIN: 248 case BRW_MATH_FUNCTION_COS: 249 case BRW_MATH_FUNCTION_SINCOS: 250 case GEN8_MATH_FUNCTION_INVM: 251 case GEN8_MATH_FUNCTION_RSQRTM: 252 return 1; 253 case BRW_MATH_FUNCTION_FDIV: 254 case BRW_MATH_FUNCTION_POW: 255 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 256 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 257 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 258 return 2; 259 default: 260 unreachable("not reached"); 261 } 262} 263 264static struct string 265sources_not_null(const struct gen_device_info *devinfo, 266 const brw_inst *inst) 267{ 268 unsigned num_sources = num_sources_from_inst(devinfo, inst); 269 struct string error_msg = { .str = NULL, .len = 0 }; 270 271 /* Nothing to test. 3-src instructions can only have GRF sources, and 272 * there's no bit to control the file. 273 */ 274 if (num_sources == 3) 275 return (struct string){}; 276 277 /* Nothing to test. Split sends can only encode a file in sources that are 278 * allowed to be NULL. 279 */ 280 if (inst_is_split_send(devinfo, inst)) 281 return (struct string){}; 282 283 if (num_sources >= 1) 284 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); 285 286 if (num_sources == 2) 287 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); 288 289 return error_msg; 290} 291 292static struct string 293alignment_supported(const struct gen_device_info *devinfo, 294 const brw_inst *inst) 295{ 296 struct string error_msg = { .str = NULL, .len = 0 }; 297 298 ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, 299 "Align16 not supported"); 300 301 return error_msg; 302} 303 304static bool 305inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) 306{ 307 /* Check instructions that use implicit accumulator sources */ 308 switch (brw_inst_opcode(devinfo, inst)) { 309 case BRW_OPCODE_MAC: 310 case BRW_OPCODE_MACH: 311 case BRW_OPCODE_SADA2: 312 return true; 313 } 314 315 /* FIXME: support 3-src instructions */ 316 unsigned num_sources = num_sources_from_inst(devinfo, inst); 317 assert(num_sources < 3); 318 319 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); 320} 321 322static struct string 323send_restrictions(const struct gen_device_info *devinfo, 324 const brw_inst *inst) 325{ 326 struct string error_msg = { .str = NULL, .len = 0 }; 327 328 if (inst_is_split_send(devinfo, inst)) { 329 ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 330 brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, 331 "src1 of split send must be a GRF or NULL"); 332 333 ERROR_IF(brw_inst_eot(devinfo, inst) && 334 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 335 "send with EOT must use g112-g127"); 336 ERROR_IF(brw_inst_eot(devinfo, inst) && 337 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && 338 brw_inst_send_src1_reg_nr(devinfo, inst) < 112, 339 "send with EOT must use g112-g127"); 340 341 if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { 342 /* Assume minimums if we don't know */ 343 unsigned mlen = 1; 344 if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { 345 const uint32_t desc = brw_inst_send_desc(devinfo, inst); 346 mlen = brw_message_desc_mlen(devinfo, desc); 347 } 348 349 unsigned ex_mlen = 1; 350 if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { 351 const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst); 352 ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); 353 } 354 const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); 355 const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); 356 ERROR_IF((src0_reg_nr <= src1_reg_nr && 357 src1_reg_nr < src0_reg_nr + mlen) || 358 (src1_reg_nr <= src0_reg_nr && 359 src0_reg_nr < src1_reg_nr + ex_mlen), 360 "split send payloads must not overlap"); 361 } 362 } else if (inst_is_send(devinfo, inst)) { 363 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, 364 "send must use direct addressing"); 365 366 if (devinfo->gen >= 7) { 367 ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); 368 ERROR_IF(brw_inst_eot(devinfo, inst) && 369 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 370 "send with EOT must use g112-g127"); 371 } 372 373 if (devinfo->gen >= 8) { 374 ERROR_IF(!dst_is_null(devinfo, inst) && 375 (brw_inst_dst_da_reg_nr(devinfo, inst) + 376 brw_inst_rlen(devinfo, inst) > 127) && 377 (brw_inst_src0_da_reg_nr(devinfo, inst) + 378 brw_inst_mlen(devinfo, inst) > 379 brw_inst_dst_da_reg_nr(devinfo, inst)), 380 "r127 must not be used for return address when there is " 381 "a src and dest overlap"); 382 } 383 } 384 385 return error_msg; 386} 387 388static bool 389is_unsupported_inst(const struct gen_device_info *devinfo, 390 const brw_inst *inst) 391{ 392 return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL; 393} 394 395/** 396 * Returns whether a combination of two types would qualify as mixed float 397 * operation mode 398 */ 399static inline bool 400types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) 401{ 402 return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) || 403 (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF); 404} 405 406static enum brw_reg_type 407execution_type_for_type(enum brw_reg_type type) 408{ 409 switch (type) { 410 case BRW_REGISTER_TYPE_NF: 411 case BRW_REGISTER_TYPE_DF: 412 case BRW_REGISTER_TYPE_F: 413 case BRW_REGISTER_TYPE_HF: 414 return type; 415 416 case BRW_REGISTER_TYPE_VF: 417 return BRW_REGISTER_TYPE_F; 418 419 case BRW_REGISTER_TYPE_Q: 420 case BRW_REGISTER_TYPE_UQ: 421 return BRW_REGISTER_TYPE_Q; 422 423 case BRW_REGISTER_TYPE_D: 424 case BRW_REGISTER_TYPE_UD: 425 return BRW_REGISTER_TYPE_D; 426 427 case BRW_REGISTER_TYPE_W: 428 case BRW_REGISTER_TYPE_UW: 429 case BRW_REGISTER_TYPE_B: 430 case BRW_REGISTER_TYPE_UB: 431 case BRW_REGISTER_TYPE_V: 432 case BRW_REGISTER_TYPE_UV: 433 return BRW_REGISTER_TYPE_W; 434 } 435 unreachable("not reached"); 436} 437 438/** 439 * Returns the execution type of an instruction \p inst 440 */ 441static enum brw_reg_type 442execution_type(const struct gen_device_info *devinfo, const brw_inst *inst) 443{ 444 unsigned num_sources = num_sources_from_inst(devinfo, inst); 445 enum brw_reg_type src0_exec_type, src1_exec_type; 446 447 /* Execution data type is independent of destination data type, except in 448 * mixed F/HF instructions. 449 */ 450 enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst); 451 452 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); 453 if (num_sources == 1) { 454 if (src0_exec_type == BRW_REGISTER_TYPE_HF) 455 return dst_exec_type; 456 return src0_exec_type; 457 } 458 459 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); 460 if (types_are_mixed_float(src0_exec_type, src1_exec_type) || 461 types_are_mixed_float(src0_exec_type, dst_exec_type) || 462 types_are_mixed_float(src1_exec_type, dst_exec_type)) { 463 return BRW_REGISTER_TYPE_F; 464 } 465 466 if (src0_exec_type == src1_exec_type) 467 return src0_exec_type; 468 469 /* Mixed operand types where one is float is float on Gen < 6 470 * (and not allowed on later platforms) 471 */ 472 if (devinfo->gen < 6 && 473 (src0_exec_type == BRW_REGISTER_TYPE_F || 474 src1_exec_type == BRW_REGISTER_TYPE_F)) 475 return BRW_REGISTER_TYPE_F; 476 477 if (src0_exec_type == BRW_REGISTER_TYPE_Q || 478 src1_exec_type == BRW_REGISTER_TYPE_Q) 479 return BRW_REGISTER_TYPE_Q; 480 481 if (src0_exec_type == BRW_REGISTER_TYPE_D || 482 src1_exec_type == BRW_REGISTER_TYPE_D) 483 return BRW_REGISTER_TYPE_D; 484 485 if (src0_exec_type == BRW_REGISTER_TYPE_W || 486 src1_exec_type == BRW_REGISTER_TYPE_W) 487 return BRW_REGISTER_TYPE_W; 488 489 if (src0_exec_type == BRW_REGISTER_TYPE_DF || 490 src1_exec_type == BRW_REGISTER_TYPE_DF) 491 return BRW_REGISTER_TYPE_DF; 492 493 unreachable("not reached"); 494} 495 496/** 497 * Returns whether a region is packed 498 * 499 * A region is packed if its elements are adjacent in memory, with no 500 * intervening space, no overlap, and no replicated values. 501 */ 502static bool 503is_packed(unsigned vstride, unsigned width, unsigned hstride) 504{ 505 if (vstride == width) { 506 if (vstride == 1) { 507 return hstride == 0; 508 } else { 509 return hstride == 1; 510 } 511 } 512 513 return false; 514} 515 516/** 517 * Returns whether an instruction is an explicit or implicit conversion 518 * to/from half-float. 519 */ 520static bool 521is_half_float_conversion(const struct gen_device_info *devinfo, 522 const brw_inst *inst) 523{ 524 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 525 526 unsigned num_sources = num_sources_from_inst(devinfo, inst); 527 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 528 529 if (dst_type != src0_type && 530 (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { 531 return true; 532 } else if (num_sources > 1) { 533 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 534 return dst_type != src1_type && 535 (dst_type == BRW_REGISTER_TYPE_HF || 536 src1_type == BRW_REGISTER_TYPE_HF); 537 } 538 539 return false; 540} 541 542/* 543 * Returns whether an instruction is using mixed float operation mode 544 */ 545static bool 546is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst) 547{ 548 if (devinfo->gen < 8) 549 return false; 550 551 if (inst_is_send(devinfo, inst)) 552 return false; 553 554 unsigned opcode = brw_inst_opcode(devinfo, inst); 555 const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); 556 if (desc->ndst == 0) 557 return false; 558 559 /* FIXME: support 3-src instructions */ 560 unsigned num_sources = num_sources_from_inst(devinfo, inst); 561 assert(num_sources < 3); 562 563 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 564 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 565 566 if (num_sources == 1) 567 return types_are_mixed_float(src0_type, dst_type); 568 569 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 570 571 return types_are_mixed_float(src0_type, src1_type) || 572 types_are_mixed_float(src0_type, dst_type) || 573 types_are_mixed_float(src1_type, dst_type); 574} 575 576/** 577 * Returns whether an instruction is an explicit or implicit conversion 578 * to/from byte. 579 */ 580static bool 581is_byte_conversion(const struct gen_device_info *devinfo, 582 const brw_inst *inst) 583{ 584 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 585 586 unsigned num_sources = num_sources_from_inst(devinfo, inst); 587 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 588 589 if (dst_type != src0_type && 590 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { 591 return true; 592 } else if (num_sources > 1) { 593 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 594 return dst_type != src1_type && 595 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); 596 } 597 598 return false; 599} 600 601/** 602 * Checks restrictions listed in "General Restrictions Based on Operand Types" 603 * in the "Register Region Restrictions" section. 604 */ 605static struct string 606general_restrictions_based_on_operand_types(const struct gen_device_info *devinfo, 607 const brw_inst *inst) 608{ 609 const struct opcode_desc *desc = 610 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 611 unsigned num_sources = num_sources_from_inst(devinfo, inst); 612 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 613 struct string error_msg = { .str = NULL, .len = 0 }; 614 615 if (devinfo->gen >= 11) { 616 if (num_sources == 3) { 617 ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || 618 brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, 619 "Byte data type is not supported for src1/2 register regioning. This includes " 620 "byte broadcast as well."); 621 } 622 if (num_sources == 2) { 623 ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, 624 "Byte data type is not supported for src1 register regioning. This includes " 625 "byte broadcast as well."); 626 } 627 } 628 629 if (num_sources == 3) 630 return error_msg; 631 632 if (inst_is_send(devinfo, inst)) 633 return error_msg; 634 635 if (exec_size == 1) 636 return error_msg; 637 638 if (desc->ndst == 0) 639 return error_msg; 640 641 /* The PRMs say: 642 * 643 * Where n is the largest element size in bytes for any source or 644 * destination operand type, ExecSize * n must be <= 64. 645 * 646 * But we do not attempt to enforce it, because it is implied by other 647 * rules: 648 * 649 * - that the destination stride must match the execution data type 650 * - sources may not span more than two adjacent GRF registers 651 * - destination may not span more than two adjacent GRF registers 652 * 653 * In fact, checking it would weaken testing of the other rules. 654 */ 655 656 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 657 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 658 bool dst_type_is_byte = 659 brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || 660 brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; 661 662 if (dst_type_is_byte) { 663 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { 664 if (!inst_is_raw_move(devinfo, inst)) 665 ERROR("Only raw MOV supports a packed-byte destination"); 666 return error_msg; 667 } 668 } 669 670 unsigned exec_type = execution_type(devinfo, inst); 671 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 672 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 673 674 /* On IVB/BYT, region parameters and execution size for DF are in terms of 675 * 32-bit elements, so they are doubled. For evaluating the validity of an 676 * instruction, we halve them. 677 */ 678 if (devinfo->gen == 7 && !devinfo->is_haswell && 679 exec_type_size == 8 && dst_type_size == 4) 680 dst_type_size = 8; 681 682 if (is_byte_conversion(devinfo, inst)) { 683 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 684 * 685 * "There is no direct conversion from B/UB to DF or DF to B/UB. 686 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." 687 * 688 * Even if these restrictions are listed for the MOV instruction, we 689 * validate this more generally, since there is the possibility 690 * of implicit conversions from other instructions. 691 */ 692 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 693 enum brw_reg_type src1_type = num_sources > 1 ? 694 brw_inst_src1_type(devinfo, inst) : 0; 695 696 ERROR_IF(type_sz(dst_type) == 1 && 697 (type_sz(src0_type) == 8 || 698 (num_sources > 1 && type_sz(src1_type) == 8)), 699 "There are no direct conversions between 64-bit types and B/UB"); 700 701 ERROR_IF(type_sz(dst_type) == 8 && 702 (type_sz(src0_type) == 1 || 703 (num_sources > 1 && type_sz(src1_type) == 1)), 704 "There are no direct conversions between 64-bit types and B/UB"); 705 } 706 707 if (is_half_float_conversion(devinfo, inst)) { 708 /** 709 * A helper to validate used in the validation of the following restriction 710 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 711 * 712 * "There is no direct conversion from HF to DF or DF to HF. 713 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." 714 * 715 * Even if these restrictions are listed for the MOV instruction, we 716 * validate this more generally, since there is the possibility 717 * of implicit conversions from other instructions, such us implicit 718 * conversion from integer to HF with the ADD instruction in SKL+. 719 */ 720 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 721 enum brw_reg_type src1_type = num_sources > 1 ? 722 brw_inst_src1_type(devinfo, inst) : 0; 723 ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && 724 (type_sz(src0_type) == 8 || 725 (num_sources > 1 && type_sz(src1_type) == 8)), 726 "There are no direct conversions between 64-bit types and HF"); 727 728 ERROR_IF(type_sz(dst_type) == 8 && 729 (src0_type == BRW_REGISTER_TYPE_HF || 730 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), 731 "There are no direct conversions between 64-bit types and HF"); 732 733 /* From the BDW+ PRM: 734 * 735 * "Conversion between Integer and HF (Half Float) must be 736 * DWord-aligned and strided by a DWord on the destination." 737 * 738 * Also, the above restrictions seems to be expanded on CHV and SKL+ by: 739 * 740 * "There is a relaxed alignment rule for word destinations. When 741 * the destination type is word (UW, W, HF), destination data types 742 * can be aligned to either the lowest word or the second lowest 743 * word of the execution channel. This means the destination data 744 * words can be either all in the even word locations or all in the 745 * odd word locations." 746 * 747 * We do not implement the second rule as is though, since empirical 748 * testing shows inconsistencies: 749 * - It suggests that packed 16-bit is not allowed, which is not true. 750 * - It suggests that conversions from Q/DF to W (which need to be 751 * 64-bit aligned on the destination) are not possible, which is 752 * not true. 753 * 754 * So from this rule we only validate the implication that conversions 755 * from F to HF need to be DWord strided (except in Align1 mixed 756 * float mode where packed fp16 destination is allowed so long as the 757 * destination is oword-aligned). 758 * 759 * Finally, we only validate this for Align1 because Align16 always 760 * requires packed destinations, so these restrictions can't possibly 761 * apply to Align16 mode. 762 */ 763 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 764 if ((dst_type == BRW_REGISTER_TYPE_HF && 765 (brw_reg_type_is_integer(src0_type) || 766 (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || 767 (brw_reg_type_is_integer(dst_type) && 768 (src0_type == BRW_REGISTER_TYPE_HF || 769 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { 770 ERROR_IF(dst_stride * dst_type_size != 4, 771 "Conversions between integer and half-float must be " 772 "strided by a DWord on the destination"); 773 774 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 775 ERROR_IF(subreg % 4 != 0, 776 "Conversions between integer and half-float must be " 777 "aligned to a DWord on the destination"); 778 } else if ((devinfo->is_cherryview || devinfo->gen >= 9) && 779 dst_type == BRW_REGISTER_TYPE_HF) { 780 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 781 ERROR_IF(dst_stride != 2 && 782 !(is_mixed_float(devinfo, inst) && 783 dst_stride == 1 && subreg % 16 == 0), 784 "Conversions to HF must have either all words in even " 785 "word locations or all words in odd word locations or " 786 "be mixed-float with Oword-aligned packed destination"); 787 } 788 } 789 } 790 791 /* There are special regioning rules for mixed-float mode in CHV and SKL that 792 * override the general rule for the ratio of sizes of the destination type 793 * and the execution type. We will add validation for those in a later patch. 794 */ 795 bool validate_dst_size_and_exec_size_ratio = 796 !is_mixed_float(devinfo, inst) || 797 !(devinfo->is_cherryview || devinfo->gen >= 9); 798 799 if (validate_dst_size_and_exec_size_ratio && 800 exec_type_size > dst_type_size) { 801 if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) { 802 ERROR_IF(dst_stride * dst_type_size != exec_type_size, 803 "Destination stride must be equal to the ratio of the sizes " 804 "of the execution data type to the destination type"); 805 } 806 807 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 808 809 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 810 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { 811 /* The i965 PRM says: 812 * 813 * Implementation Restriction: The relaxed alignment rule for byte 814 * destination (#10.5) is not supported. 815 */ 816 if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) { 817 ERROR_IF(subreg % exec_type_size != 0 && 818 subreg % exec_type_size != 1, 819 "Destination subreg must be aligned to the size of the " 820 "execution data type (or to the next lowest byte for byte " 821 "destinations)"); 822 } else { 823 ERROR_IF(subreg % exec_type_size != 0, 824 "Destination subreg must be aligned to the size of the " 825 "execution data type"); 826 } 827 } 828 } 829 830 return error_msg; 831} 832 833/** 834 * Checks restrictions listed in "General Restrictions on Regioning Parameters" 835 * in the "Register Region Restrictions" section. 836 */ 837static struct string 838general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, 839 const brw_inst *inst) 840{ 841 const struct opcode_desc *desc = 842 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 843 unsigned num_sources = num_sources_from_inst(devinfo, inst); 844 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 845 struct string error_msg = { .str = NULL, .len = 0 }; 846 847 if (num_sources == 3) 848 return (struct string){}; 849 850 /* Split sends don't have the bits in the instruction to encode regions so 851 * there's nothing to check. 852 */ 853 if (inst_is_split_send(devinfo, inst)) 854 return (struct string){}; 855 856 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { 857 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) 858 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, 859 "Destination Horizontal Stride must be 1"); 860 861 if (num_sources >= 1) { 862 if (devinfo->is_haswell || devinfo->gen >= 8) { 863 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 864 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 865 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 866 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 867 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 868 } else { 869 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 870 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 871 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 872 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 873 } 874 } 875 876 if (num_sources == 2) { 877 if (devinfo->is_haswell || devinfo->gen >= 8) { 878 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 879 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 880 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 881 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 882 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 883 } else { 884 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 885 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 886 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 887 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 888 } 889 } 890 891 return error_msg; 892 } 893 894 for (unsigned i = 0; i < num_sources; i++) { 895 unsigned vstride, width, hstride, element_size, subreg; 896 enum brw_reg_type type; 897 898#define DO_SRC(n) \ 899 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 900 BRW_IMMEDIATE_VALUE) \ 901 continue; \ 902 \ 903 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 904 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 905 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 906 type = brw_inst_src ## n ## _type(devinfo, inst); \ 907 element_size = brw_reg_type_to_size(type); \ 908 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) 909 910 if (i == 0) { 911 DO_SRC(0); 912 } else { 913 DO_SRC(1); 914 } 915#undef DO_SRC 916 917 /* On IVB/BYT, region parameters and execution size for DF are in terms of 918 * 32-bit elements, so they are doubled. For evaluating the validity of an 919 * instruction, we halve them. 920 */ 921 if (devinfo->gen == 7 && !devinfo->is_haswell && 922 element_size == 8) 923 element_size = 4; 924 925 /* ExecSize must be greater than or equal to Width. */ 926 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " 927 "to Width"); 928 929 /* If ExecSize = Width and HorzStride ≠ 0, 930 * VertStride must be set to Width * HorzStride. 931 */ 932 if (exec_size == width && hstride != 0) { 933 ERROR_IF(vstride != width * hstride, 934 "If ExecSize = Width and HorzStride ≠ 0, " 935 "VertStride must be set to Width * HorzStride"); 936 } 937 938 /* If Width = 1, HorzStride must be 0 regardless of the values of 939 * ExecSize and VertStride. 940 */ 941 if (width == 1) { 942 ERROR_IF(hstride != 0, 943 "If Width = 1, HorzStride must be 0 regardless " 944 "of the values of ExecSize and VertStride"); 945 } 946 947 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ 948 if (exec_size == 1 && width == 1) { 949 ERROR_IF(vstride != 0 || hstride != 0, 950 "If ExecSize = Width = 1, both VertStride " 951 "and HorzStride must be 0"); 952 } 953 954 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the 955 * value of ExecSize. 956 */ 957 if (vstride == 0 && hstride == 0) { 958 ERROR_IF(width != 1, 959 "If VertStride = HorzStride = 0, Width must be " 960 "1 regardless of the value of ExecSize"); 961 } 962 963 /* VertStride must be used to cross GRF register boundaries. This rule 964 * implies that elements within a 'Width' cannot cross GRF boundaries. 965 */ 966 const uint64_t mask = (1ULL << element_size) - 1; 967 unsigned rowbase = subreg; 968 969 for (int y = 0; y < exec_size / width; y++) { 970 uint64_t access_mask = 0; 971 unsigned offset = rowbase; 972 973 for (int x = 0; x < width; x++) { 974 access_mask |= mask << offset; 975 offset += hstride * element_size; 976 } 977 978 rowbase += vstride * element_size; 979 980 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { 981 ERROR("VertStride must be used to cross GRF register boundaries"); 982 break; 983 } 984 } 985 } 986 987 /* Dst.HorzStride must not be 0. */ 988 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { 989 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, 990 "Destination Horizontal Stride must not be 0"); 991 } 992 993 return error_msg; 994} 995 996static struct string 997special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo, 998 const brw_inst *inst) 999{ 1000 struct string error_msg = { .str = NULL, .len = 0 }; 1001 1002 const unsigned opcode = brw_inst_opcode(devinfo, inst); 1003 const unsigned num_sources = num_sources_from_inst(devinfo, inst); 1004 if (num_sources >= 3) 1005 return error_msg; 1006 1007 if (!is_mixed_float(devinfo, inst)) 1008 return error_msg; 1009 1010 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1011 bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; 1012 1013 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1014 enum brw_reg_type src1_type = num_sources > 1 ? 1015 brw_inst_src1_type(devinfo, inst) : 0; 1016 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1017 1018 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1019 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); 1020 1021 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1022 * Float Operations: 1023 * 1024 * "Indirect addressing on source is not supported when source and 1025 * destination data types are mixed float." 1026 */ 1027 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || 1028 (num_sources > 1 && 1029 brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), 1030 "Indirect addressing on source is not supported when source and " 1031 "destination data types are mixed float"); 1032 1033 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1034 * Float Operations: 1035 * 1036 * "No SIMD16 in mixed mode when destination is f32. Instruction 1037 * execution size must be no more than 8." 1038 */ 1039 ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, 1040 "Mixed float mode with 32-bit float destination is limited " 1041 "to SIMD8"); 1042 1043 if (is_align16) { 1044 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1045 * Float Operations: 1046 * 1047 * "In Align16 mode, when half float and float data types are mixed 1048 * between source operands OR between source and destination operands, 1049 * the register content are assumed to be packed." 1050 * 1051 * Since Align16 doesn't have a concept of horizontal stride (or width), 1052 * it means that vertical stride must always be 4, since 0 and 2 would 1053 * lead to replicated data, and any other value is disallowed in Align16. 1054 */ 1055 ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1056 "Align16 mixed float mode assumes packed data (vstride must be 4"); 1057 1058 ERROR_IF(num_sources >= 2 && 1059 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1060 "Align16 mixed float mode assumes packed data (vstride must be 4"); 1061 1062 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1063 * Float Operations: 1064 * 1065 * "For Align16 mixed mode, both input and output packed f16 data 1066 * must be oword aligned, no oword crossing in packed f16." 1067 * 1068 * The previous rule requires that Align16 operands are always packed, 1069 * and since there is only one bit for Align16 subnr, which represents 1070 * offsets 0B and 16B, this rule is always enforced and we don't need to 1071 * validate it. 1072 */ 1073 1074 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1075 * Float Operations: 1076 * 1077 * "No SIMD16 in mixed mode when destination is packed f16 for both 1078 * Align1 and Align16." 1079 * 1080 * And: 1081 * 1082 * "In Align16 mode, when half float and float data types are mixed 1083 * between source operands OR between source and destination operands, 1084 * the register content are assumed to be packed." 1085 * 1086 * Which implies that SIMD16 is not available in Align16. This is further 1087 * confirmed by: 1088 * 1089 * "For Align16 mixed mode, both input and output packed f16 data 1090 * must be oword aligned, no oword crossing in packed f16" 1091 * 1092 * Since oword-aligned packed f16 data would cross oword boundaries when 1093 * the execution size is larger than 8. 1094 */ 1095 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); 1096 1097 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1098 * Float Operations: 1099 * 1100 * "No accumulator read access for Align16 mixed float." 1101 */ 1102 ERROR_IF(inst_uses_src_acc(devinfo, inst), 1103 "No accumulator read access for Align16 mixed float"); 1104 } else { 1105 assert(!is_align16); 1106 1107 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1108 * Float Operations: 1109 * 1110 * "No SIMD16 in mixed mode when destination is packed f16 for both 1111 * Align1 and Align16." 1112 */ 1113 ERROR_IF(exec_size > 8 && dst_is_packed && 1114 dst_type == BRW_REGISTER_TYPE_HF, 1115 "Align1 mixed float mode is limited to SIMD8 when destination " 1116 "is packed half-float"); 1117 1118 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1119 * Float Operations: 1120 * 1121 * "Math operations for mixed mode: 1122 * - In Align1, f16 inputs need to be strided" 1123 */ 1124 if (opcode == BRW_OPCODE_MATH) { 1125 if (src0_type == BRW_REGISTER_TYPE_HF) { 1126 ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, 1127 "Align1 mixed mode math needs strided half-float inputs"); 1128 } 1129 1130 if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { 1131 ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, 1132 "Align1 mixed mode math needs strided half-float inputs"); 1133 } 1134 } 1135 1136 if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { 1137 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1138 * Float Operations: 1139 * 1140 * "In Align1, destination stride can be smaller than execution 1141 * type. When destination is stride of 1, 16 bit packed data is 1142 * updated on the destination. However, output packed f16 data 1143 * must be oword aligned, no oword crossing in packed f16." 1144 * 1145 * The requirement of not crossing oword boundaries for 16-bit oword 1146 * aligned data means that execution size is limited to 8. 1147 */ 1148 unsigned subreg; 1149 if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) 1150 subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1151 else 1152 subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); 1153 ERROR_IF(subreg % 16 != 0, 1154 "Align1 mixed mode packed half-float output must be " 1155 "oword aligned"); 1156 ERROR_IF(exec_size > 8, 1157 "Align1 mixed mode packed half-float output must not " 1158 "cross oword boundaries (max exec size is 8)"); 1159 1160 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1161 * Float Operations: 1162 * 1163 * "When source is float or half float from accumulator register and 1164 * destination is half float with a stride of 1, the source must 1165 * register aligned. i.e., source must have offset zero." 1166 * 1167 * Align16 mixed float mode doesn't allow accumulator access on sources, 1168 * so we only need to check this for Align1. 1169 */ 1170 if (src0_is_acc(devinfo, inst) && 1171 (src0_type == BRW_REGISTER_TYPE_F || 1172 src0_type == BRW_REGISTER_TYPE_HF)) { 1173 ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, 1174 "Mixed float mode requires register-aligned accumulator " 1175 "source reads when destination is packed half-float"); 1176 1177 } 1178 1179 if (num_sources > 1 && 1180 src1_is_acc(devinfo, inst) && 1181 (src1_type == BRW_REGISTER_TYPE_F || 1182 src1_type == BRW_REGISTER_TYPE_HF)) { 1183 ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, 1184 "Mixed float mode requires register-aligned accumulator " 1185 "source reads when destination is packed half-float"); 1186 } 1187 } 1188 1189 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1190 * Float Operations: 1191 * 1192 * "No swizzle is allowed when an accumulator is used as an implicit 1193 * source or an explicit source in an instruction. i.e. when 1194 * destination is half float with an implicit accumulator source, 1195 * destination stride needs to be 2." 1196 * 1197 * FIXME: it is not quite clear what the first sentence actually means 1198 * or its link to the implication described after it, so we only 1199 * validate the explicit implication, which is clearly described. 1200 */ 1201 if (dst_type == BRW_REGISTER_TYPE_HF && 1202 inst_uses_src_acc(devinfo, inst)) { 1203 ERROR_IF(dst_stride != 2, 1204 "Mixed float mode with implicit/explicit accumulator " 1205 "source and half-float destination requires a stride " 1206 "of 2 on the destination"); 1207 } 1208 } 1209 1210 return error_msg; 1211} 1212 1213/** 1214 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region 1215 * 1216 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is 1217 * a bitmask of bytes accessed by the region. 1218 * 1219 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 1220 * instruction would be 1221 * 1222 * access_mask[0] = 0x00000000000000F0 1223 * access_mask[1] = 0x000000000000F000 1224 * access_mask[2] = 0x0000000000F00000 1225 * access_mask[3] = 0x00000000F0000000 1226 * access_mask[4-31] = 0 1227 * 1228 * because the first execution channel accesses bytes 7-4 and the second 1229 * execution channel accesses bytes 15-12, etc. 1230 */ 1231static void 1232align1_access_mask(uint64_t access_mask[static 32], 1233 unsigned exec_size, unsigned element_size, unsigned subreg, 1234 unsigned vstride, unsigned width, unsigned hstride) 1235{ 1236 const uint64_t mask = (1ULL << element_size) - 1; 1237 unsigned rowbase = subreg; 1238 unsigned element = 0; 1239 1240 for (int y = 0; y < exec_size / width; y++) { 1241 unsigned offset = rowbase; 1242 1243 for (int x = 0; x < width; x++) { 1244 access_mask[element++] = mask << offset; 1245 offset += hstride * element_size; 1246 } 1247 1248 rowbase += vstride * element_size; 1249 } 1250 1251 assert(element == 0 || element == exec_size); 1252} 1253 1254/** 1255 * Returns the number of registers accessed according to the \p access_mask 1256 */ 1257static int 1258registers_read(const uint64_t access_mask[static 32]) 1259{ 1260 int regs_read = 0; 1261 1262 for (unsigned i = 0; i < 32; i++) { 1263 if (access_mask[i] > 0xFFFFFFFF) { 1264 return 2; 1265 } else if (access_mask[i]) { 1266 regs_read = 1; 1267 } 1268 } 1269 1270 return regs_read; 1271} 1272 1273/** 1274 * Checks restrictions listed in "Region Alignment Rules" in the "Register 1275 * Region Restrictions" section. 1276 */ 1277static struct string 1278region_alignment_rules(const struct gen_device_info *devinfo, 1279 const brw_inst *inst) 1280{ 1281 const struct opcode_desc *desc = 1282 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 1283 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1284 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1285 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; 1286 struct string error_msg = { .str = NULL, .len = 0 }; 1287 1288 if (num_sources == 3) 1289 return (struct string){}; 1290 1291 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) 1292 return (struct string){}; 1293 1294 if (inst_is_send(devinfo, inst)) 1295 return (struct string){}; 1296 1297 memset(dst_access_mask, 0, sizeof(dst_access_mask)); 1298 memset(src0_access_mask, 0, sizeof(src0_access_mask)); 1299 memset(src1_access_mask, 0, sizeof(src1_access_mask)); 1300 1301 for (unsigned i = 0; i < num_sources; i++) { 1302 unsigned vstride, width, hstride, element_size, subreg; 1303 enum brw_reg_type type; 1304 1305 /* In Direct Addressing mode, a source cannot span more than 2 adjacent 1306 * GRF registers. 1307 */ 1308 1309#define DO_SRC(n) \ 1310 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ 1311 BRW_ADDRESS_DIRECT) \ 1312 continue; \ 1313 \ 1314 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1315 BRW_IMMEDIATE_VALUE) \ 1316 continue; \ 1317 \ 1318 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1319 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1320 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1321 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1322 element_size = brw_reg_type_to_size(type); \ 1323 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1324 align1_access_mask(src ## n ## _access_mask, \ 1325 exec_size, element_size, subreg, \ 1326 vstride, width, hstride) 1327 1328 if (i == 0) { 1329 DO_SRC(0); 1330 } else { 1331 DO_SRC(1); 1332 } 1333#undef DO_SRC 1334 1335 unsigned num_vstride = exec_size / width; 1336 unsigned num_hstride = width; 1337 unsigned vstride_elements = (num_vstride - 1) * vstride; 1338 unsigned hstride_elements = (num_hstride - 1) * hstride; 1339 unsigned offset = (vstride_elements + hstride_elements) * element_size + 1340 subreg; 1341 ERROR_IF(offset >= 64, 1342 "A source cannot span more than 2 adjacent GRF registers"); 1343 } 1344 1345 if (desc->ndst == 0 || dst_is_null(devinfo, inst)) 1346 return error_msg; 1347 1348 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1349 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1350 unsigned element_size = brw_reg_type_to_size(dst_type); 1351 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1352 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; 1353 ERROR_IF(offset >= 64, 1354 "A destination cannot span more than 2 adjacent GRF registers"); 1355 1356 if (error_msg.str) 1357 return error_msg; 1358 1359 /* On IVB/BYT, region parameters and execution size for DF are in terms of 1360 * 32-bit elements, so they are doubled. For evaluating the validity of an 1361 * instruction, we halve them. 1362 */ 1363 if (devinfo->gen == 7 && !devinfo->is_haswell && 1364 element_size == 8) 1365 element_size = 4; 1366 1367 align1_access_mask(dst_access_mask, exec_size, element_size, subreg, 1368 exec_size == 1 ? 0 : exec_size * stride, 1369 exec_size == 1 ? 1 : exec_size, 1370 exec_size == 1 ? 0 : stride); 1371 1372 unsigned dst_regs = registers_read(dst_access_mask); 1373 unsigned src0_regs = registers_read(src0_access_mask); 1374 unsigned src1_regs = registers_read(src1_access_mask); 1375 1376 /* The SNB, IVB, HSW, BDW, and CHV PRMs say: 1377 * 1378 * When an instruction has a source region spanning two registers and a 1379 * destination region contained in one register, the number of elements 1380 * must be the same between two sources and one of the following must be 1381 * true: 1382 * 1383 * 1. The destination region is entirely contained in the lower OWord 1384 * of a register. 1385 * 2. The destination region is entirely contained in the upper OWord 1386 * of a register. 1387 * 3. The destination elements are evenly split between the two OWords 1388 * of a register. 1389 */ 1390 if (devinfo->gen <= 8) { 1391 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { 1392 unsigned upper_oword_writes = 0, lower_oword_writes = 0; 1393 1394 for (unsigned i = 0; i < exec_size; i++) { 1395 if (dst_access_mask[i] > 0x0000FFFF) { 1396 upper_oword_writes++; 1397 } else { 1398 assert(dst_access_mask[i] != 0); 1399 lower_oword_writes++; 1400 } 1401 } 1402 1403 ERROR_IF(lower_oword_writes != 0 && 1404 upper_oword_writes != 0 && 1405 upper_oword_writes != lower_oword_writes, 1406 "Writes must be to only one OWord or " 1407 "evenly split between OWords"); 1408 } 1409 } 1410 1411 /* The IVB and HSW PRMs say: 1412 * 1413 * When an instruction has a source region that spans two registers and 1414 * the destination spans two registers, the destination elements must be 1415 * evenly split between the two registers [...] 1416 * 1417 * The SNB PRM contains similar wording (but written in a much more 1418 * confusing manner). 1419 * 1420 * The BDW PRM says: 1421 * 1422 * When destination spans two registers, the source may be one or two 1423 * registers. The destination elements must be evenly split between the 1424 * two registers. 1425 * 1426 * The SKL PRM says: 1427 * 1428 * When destination of MATH instruction spans two registers, the 1429 * destination elements must be evenly split between the two registers. 1430 * 1431 * It is not known whether this restriction applies to KBL other Gens after 1432 * SKL. 1433 */ 1434 if (devinfo->gen <= 8 || 1435 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 1436 1437 /* Nothing explicitly states that on Gen < 8 elements must be evenly 1438 * split between two destination registers in the two exceptional 1439 * source-region-spans-one-register cases, but since Broadwell requires 1440 * evenly split writes regardless of source region, we assume that it was 1441 * an oversight and require it. 1442 */ 1443 if (dst_regs == 2) { 1444 unsigned upper_reg_writes = 0, lower_reg_writes = 0; 1445 1446 for (unsigned i = 0; i < exec_size; i++) { 1447 if (dst_access_mask[i] > 0xFFFFFFFF) { 1448 upper_reg_writes++; 1449 } else { 1450 assert(dst_access_mask[i] != 0); 1451 lower_reg_writes++; 1452 } 1453 } 1454 1455 ERROR_IF(upper_reg_writes != lower_reg_writes, 1456 "Writes must be evenly split between the two " 1457 "destination registers"); 1458 } 1459 } 1460 1461 /* The IVB and HSW PRMs say: 1462 * 1463 * When an instruction has a source region that spans two registers and 1464 * the destination spans two registers, the destination elements must be 1465 * evenly split between the two registers and each destination register 1466 * must be entirely derived from one source register. 1467 * 1468 * Note: In such cases, the regioning parameters must ensure that the 1469 * offset from the two source registers is the same. 1470 * 1471 * The SNB PRM contains similar wording (but written in a much more 1472 * confusing manner). 1473 * 1474 * There are effectively three rules stated here: 1475 * 1476 * For an instruction with a source and a destination spanning two 1477 * registers, 1478 * 1479 * (1) destination elements must be evenly split between the two 1480 * registers 1481 * (2) all destination elements in a register must be derived 1482 * from one source register 1483 * (3) the offset (i.e. the starting location in each of the two 1484 * registers spanned by a region) must be the same in the two 1485 * registers spanned by a region 1486 * 1487 * It is impossible to violate rule (1) without violating (2) or (3), so we 1488 * do not attempt to validate it. 1489 */ 1490 if (devinfo->gen <= 7 && dst_regs == 2) { 1491 for (unsigned i = 0; i < num_sources; i++) { 1492#define DO_SRC(n) \ 1493 if (src ## n ## _regs <= 1) \ 1494 continue; \ 1495 \ 1496 for (unsigned i = 0; i < exec_size; i++) { \ 1497 if ((dst_access_mask[i] > 0xFFFFFFFF) != \ 1498 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ 1499 ERROR("Each destination register must be entirely derived " \ 1500 "from one source register"); \ 1501 break; \ 1502 } \ 1503 } \ 1504 \ 1505 unsigned offset_0 = \ 1506 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1507 unsigned offset_1 = offset_0; \ 1508 \ 1509 for (unsigned i = 0; i < exec_size; i++) { \ 1510 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ 1511 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ 1512 break; \ 1513 } \ 1514 } \ 1515 \ 1516 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \ 1517 "The offset from the two source registers " \ 1518 "must be the same") 1519 1520 if (i == 0) { 1521 DO_SRC(0); 1522 } else { 1523 DO_SRC(1); 1524 } 1525#undef DO_SRC 1526 } 1527 } 1528 1529 /* The IVB and HSW PRMs say: 1530 * 1531 * When destination spans two registers, the source MUST span two 1532 * registers. The exception to the above rule: 1533 * 1. When source is scalar, the source registers are not 1534 * incremented. 1535 * 2. When source is packed integer Word and destination is packed 1536 * integer DWord, the source register is not incremented by the 1537 * source sub register is incremented. 1538 * 1539 * The SNB PRM does not contain this rule, but the internal documentation 1540 * indicates that it applies to SNB as well. We assume that the rule applies 1541 * to Gen <= 5 although their PRMs do not state it. 1542 * 1543 * While the documentation explicitly says in exception (2) that the 1544 * destination must be an integer DWord, the hardware allows at least a 1545 * float destination type as well. We emit such instructions from 1546 * 1547 * fs_visitor::emit_interpolation_setup_gen6 1548 * fs_visitor::emit_fragcoord_interpolation 1549 * 1550 * and have for years with no ill effects. 1551 * 1552 * Additionally the simulator source code indicates that the real condition 1553 * is that the size of the destination type is 4 bytes. 1554 */ 1555 if (devinfo->gen <= 7 && dst_regs == 2) { 1556 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1557 bool dst_is_packed_dword = 1558 is_packed(exec_size * stride, exec_size, stride) && 1559 brw_reg_type_to_size(dst_type) == 4; 1560 1561 for (unsigned i = 0; i < num_sources; i++) { 1562#define DO_SRC(n) \ 1563 unsigned vstride, width, hstride; \ 1564 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1565 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1566 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1567 bool src ## n ## _is_packed_word = \ 1568 is_packed(vstride, width, hstride) && \ 1569 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \ 1570 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \ 1571 \ 1572 ERROR_IF(src ## n ## _regs == 1 && \ 1573 !src ## n ## _has_scalar_region(devinfo, inst) && \ 1574 !(dst_is_packed_dword && src ## n ## _is_packed_word), \ 1575 "When the destination spans two registers, the source must " \ 1576 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ 1577 "source and packed-word to packed-dword expansion)") 1578 1579 if (i == 0) { 1580 DO_SRC(0); 1581 } else { 1582 DO_SRC(1); 1583 } 1584#undef DO_SRC 1585 } 1586 } 1587 1588 return error_msg; 1589} 1590 1591static struct string 1592vector_immediate_restrictions(const struct gen_device_info *devinfo, 1593 const brw_inst *inst) 1594{ 1595 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1596 struct string error_msg = { .str = NULL, .len = 0 }; 1597 1598 if (num_sources == 3 || num_sources == 0) 1599 return (struct string){}; 1600 1601 unsigned file = num_sources == 1 ? 1602 brw_inst_src0_reg_file(devinfo, inst) : 1603 brw_inst_src1_reg_file(devinfo, inst); 1604 if (file != BRW_IMMEDIATE_VALUE) 1605 return (struct string){}; 1606 1607 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1608 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1609 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? 1610 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; 1611 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1612 enum brw_reg_type type = num_sources == 1 ? 1613 brw_inst_src0_type(devinfo, inst) : 1614 brw_inst_src1_type(devinfo, inst); 1615 1616 /* The PRMs say: 1617 * 1618 * When an immediate vector is used in an instruction, the destination 1619 * must be 128-bit aligned with destination horizontal stride equivalent 1620 * to a word for an immediate integer vector (v) and equivalent to a 1621 * DWord for an immediate float vector (vf). 1622 * 1623 * The text has not been updated for the addition of the immediate unsigned 1624 * integer vector type (uv) on SNB, but presumably the same restriction 1625 * applies. 1626 */ 1627 switch (type) { 1628 case BRW_REGISTER_TYPE_V: 1629 case BRW_REGISTER_TYPE_UV: 1630 case BRW_REGISTER_TYPE_VF: 1631 ERROR_IF(dst_subreg % (128 / 8) != 0, 1632 "Destination must be 128-bit aligned in order to use immediate " 1633 "vector types"); 1634 1635 if (type == BRW_REGISTER_TYPE_VF) { 1636 ERROR_IF(dst_type_size * dst_stride != 4, 1637 "Destination must have stride equivalent to dword in order " 1638 "to use the VF type"); 1639 } else { 1640 ERROR_IF(dst_type_size * dst_stride != 2, 1641 "Destination must have stride equivalent to word in order " 1642 "to use the V or UV type"); 1643 } 1644 break; 1645 default: 1646 break; 1647 } 1648 1649 return error_msg; 1650} 1651 1652static struct string 1653special_requirements_for_handling_double_precision_data_types( 1654 const struct gen_device_info *devinfo, 1655 const brw_inst *inst) 1656{ 1657 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1658 struct string error_msg = { .str = NULL, .len = 0 }; 1659 1660 if (num_sources == 3 || num_sources == 0) 1661 return (struct string){}; 1662 1663 /* Split sends don't have types so there's no doubles there. */ 1664 if (inst_is_split_send(devinfo, inst)) 1665 return (struct string){}; 1666 1667 enum brw_reg_type exec_type = execution_type(devinfo, inst); 1668 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 1669 1670 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); 1671 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1672 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1673 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1674 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); 1675 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1676 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); 1677 1678 bool is_integer_dword_multiply = 1679 devinfo->gen >= 8 && 1680 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL && 1681 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1682 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && 1683 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1684 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); 1685 1686 if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply) 1687 return (struct string){}; 1688 1689 for (unsigned i = 0; i < num_sources; i++) { 1690 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; 1691 bool is_scalar_region; 1692 enum brw_reg_file file; 1693 enum brw_reg_type type; 1694 1695#define DO_SRC(n) \ 1696 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1697 BRW_IMMEDIATE_VALUE) \ 1698 continue; \ 1699 \ 1700 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ 1701 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1702 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1703 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1704 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ 1705 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1706 type_size = brw_reg_type_to_size(type); \ 1707 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ 1708 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1709 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) 1710 1711 if (i == 0) { 1712 DO_SRC(0); 1713 } else { 1714 DO_SRC(1); 1715 } 1716#undef DO_SRC 1717 1718 /* The PRMs say that for CHV, BXT: 1719 * 1720 * When source or destination datatype is 64b or operation is integer 1721 * DWord multiply, regioning in Align1 must follow these rules: 1722 * 1723 * 1. Source and Destination horizontal stride must be aligned to the 1724 * same qword. 1725 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 1726 * 3. Source and Destination offset must be the same, except the case 1727 * of scalar source. 1728 * 1729 * We assume that the restriction applies to GLK as well. 1730 */ 1731 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 1732 (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { 1733 unsigned src_stride = hstride * type_size; 1734 unsigned dst_stride = dst_hstride * dst_type_size; 1735 1736 ERROR_IF(!is_scalar_region && 1737 (src_stride % 8 != 0 || 1738 dst_stride % 8 != 0 || 1739 src_stride != dst_stride), 1740 "Source and destination horizontal stride must equal and a " 1741 "multiple of a qword when the execution type is 64-bit"); 1742 1743 ERROR_IF(vstride != width * hstride, 1744 "Vstride must be Width * Hstride when the execution type is " 1745 "64-bit"); 1746 1747 ERROR_IF(!is_scalar_region && dst_subreg != subreg, 1748 "Source and destination offset must be the same when the " 1749 "execution type is 64-bit"); 1750 } 1751 1752 /* The PRMs say that for CHV, BXT: 1753 * 1754 * When source or destination datatype is 64b or operation is integer 1755 * DWord multiply, indirect addressing must not be used. 1756 * 1757 * We assume that the restriction applies to GLK as well. 1758 */ 1759 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1760 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || 1761 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, 1762 "Indirect addressing is not allowed when the execution type " 1763 "is 64-bit"); 1764 } 1765 1766 /* The PRMs say that for CHV, BXT: 1767 * 1768 * ARF registers must never be used with 64b datatype or when 1769 * operation is integer DWord multiply. 1770 * 1771 * We assume that the restriction applies to GLK as well. 1772 * 1773 * We assume that the restriction does not apply to the null register. 1774 */ 1775 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1776 ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC || 1777 brw_inst_acc_wr_control(devinfo, inst) || 1778 (BRW_ARCHITECTURE_REGISTER_FILE == file && 1779 reg != BRW_ARF_NULL) || 1780 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && 1781 dst_reg != BRW_ARF_NULL), 1782 "Architecture registers cannot be used when the execution " 1783 "type is 64-bit"); 1784 } 1785 } 1786 1787 /* The PRMs say that for BDW, SKL: 1788 * 1789 * If Align16 is required for an operation with QW destination and non-QW 1790 * source datatypes, the execution size cannot exceed 2. 1791 * 1792 * We assume that the restriction applies to all Gen8+ parts. 1793 */ 1794 if (devinfo->gen >= 8) { 1795 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1796 enum brw_reg_type src1_type = 1797 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; 1798 unsigned src0_type_size = brw_reg_type_to_size(src0_type); 1799 unsigned src1_type_size = brw_reg_type_to_size(src1_type); 1800 1801 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && 1802 dst_type_size == 8 && 1803 (src0_type_size != 8 || src1_type_size != 8) && 1804 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, 1805 "In Align16 exec size cannot exceed 2 with a QWord destination " 1806 "and a non-QWord source"); 1807 } 1808 1809 /* The PRMs say that for CHV, BXT: 1810 * 1811 * When source or destination datatype is 64b or operation is integer 1812 * DWord multiply, DepCtrl must not be used. 1813 * 1814 * We assume that the restriction applies to GLK as well. 1815 */ 1816 if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { 1817 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || 1818 brw_inst_no_dd_clear(devinfo, inst), 1819 "DepCtrl is not allowed when the execution type is 64-bit"); 1820 } 1821 1822 return error_msg; 1823} 1824 1825bool 1826brw_validate_instructions(const struct gen_device_info *devinfo, 1827 const void *assembly, int start_offset, int end_offset, 1828 struct disasm_info *disasm) 1829{ 1830 bool valid = true; 1831 1832 for (int src_offset = start_offset; src_offset < end_offset;) { 1833 struct string error_msg = { .str = NULL, .len = 0 }; 1834 const brw_inst *inst = assembly + src_offset; 1835 bool is_compact = brw_inst_cmpt_control(devinfo, inst); 1836 brw_inst uncompacted; 1837 1838 if (is_compact) { 1839 brw_compact_inst *compacted = (void *)inst; 1840 brw_uncompact_instruction(devinfo, &uncompacted, compacted); 1841 inst = &uncompacted; 1842 } 1843 1844 if (is_unsupported_inst(devinfo, inst)) { 1845 ERROR("Instruction not supported on this Gen"); 1846 } else { 1847 CHECK(sources_not_null); 1848 CHECK(send_restrictions); 1849 CHECK(alignment_supported); 1850 CHECK(general_restrictions_based_on_operand_types); 1851 CHECK(general_restrictions_on_region_parameters); 1852 CHECK(special_restrictions_for_mixed_float_mode); 1853 CHECK(region_alignment_rules); 1854 CHECK(vector_immediate_restrictions); 1855 CHECK(special_requirements_for_handling_double_precision_data_types); 1856 } 1857 1858 if (error_msg.str && disasm) { 1859 disasm_insert_error(disasm, src_offset, error_msg.str); 1860 } 1861 valid = valid && error_msg.len == 0; 1862 free(error_msg.str); 1863 1864 if (is_compact) { 1865 src_offset += sizeof(brw_compact_inst); 1866 } else { 1867 src_offset += sizeof(brw_inst); 1868 } 1869 } 1870 1871 return valid; 1872} 1873