1/* 2 * Copyright © 2015-2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_eu_validate.c 25 * 26 * This file implements a pass that validates shader assembly. 27 * 28 * The restrictions implemented herein are intended to verify that instructions 29 * in shader assembly do not violate restrictions documented in the graphics 30 * programming reference manuals. 31 * 32 * The restrictions are difficult for humans to quickly verify due to their 33 * complexity and abundance. 34 * 35 * It is critical that this code is thoroughly unit tested because false 36 * results will lead developers astray, which is worse than having no validator 37 * at all. Functional changes to this file without corresponding unit tests (in 38 * test_eu_validate.cpp) will be rejected. 39 */ 40 41#include <stdlib.h> 42#include "brw_eu.h" 43 44/* We're going to do lots of string concatenation, so this should help. */ 45struct string { 46 char *str; 47 size_t len; 48}; 49 50static void 51cat(struct string *dest, const struct string src) 52{ 53 dest->str = realloc(dest->str, dest->len + src.len + 1); 54 memcpy(dest->str + dest->len, src.str, src.len); 55 dest->str[dest->len + src.len] = '\0'; 56 dest->len = dest->len + src.len; 57} 58#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) 59 60static bool 61contains(const struct string haystack, const struct string needle) 62{ 63 return haystack.str && memmem(haystack.str, haystack.len, 64 needle.str, needle.len) != NULL; 65} 66#define CONTAINS(haystack, needle) \ 67 contains(haystack, (struct string){needle, strlen(needle)}) 68 69#define error(str) "\tERROR: " str "\n" 70#define ERROR_INDENT "\t " 71 72#define ERROR(msg) ERROR_IF(true, msg) 73#define ERROR_IF(cond, msg) \ 74 do { \ 75 if ((cond) && !CONTAINS(error_msg, error(msg))) { \ 76 CAT(error_msg, error(msg)); \ 77 } \ 78 } while(0) 79 80#define CHECK(func, args...) \ 81 do { \ 82 struct string __msg = func(devinfo, inst, ##args); \ 83 if (__msg.str) { \ 84 cat(&error_msg, __msg); \ 85 free(__msg.str); \ 86 } \ 87 } while (0) 88 89#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) 90#define WIDTH(width) (1 << (width)) 91 92static bool 93inst_is_send(const struct intel_device_info *devinfo, const brw_inst *inst) 94{ 95 switch (brw_inst_opcode(devinfo, inst)) { 96 case BRW_OPCODE_SEND: 97 case BRW_OPCODE_SENDC: 98 case BRW_OPCODE_SENDS: 99 case BRW_OPCODE_SENDSC: 100 return true; 101 default: 102 return false; 103 } 104} 105 106static bool 107inst_is_split_send(const struct intel_device_info *devinfo, 108 const brw_inst *inst) 109{ 110 if (devinfo->ver >= 12) { 111 return inst_is_send(devinfo, inst); 112 } else { 113 switch (brw_inst_opcode(devinfo, inst)) { 114 case BRW_OPCODE_SENDS: 115 case BRW_OPCODE_SENDSC: 116 return true; 117 default: 118 return false; 119 } 120 } 121} 122 123static unsigned 124signed_type(unsigned type) 125{ 126 switch (type) { 127 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D; 128 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W; 129 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B; 130 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q; 131 default: return type; 132 } 133} 134 135static enum brw_reg_type 136inst_dst_type(const struct intel_device_info *devinfo, const brw_inst *inst) 137{ 138 return (devinfo->ver < 12 || !inst_is_send(devinfo, inst)) ? 139 brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D; 140} 141 142static bool 143inst_is_raw_move(const struct intel_device_info *devinfo, const brw_inst *inst) 144{ 145 unsigned dst_type = signed_type(inst_dst_type(devinfo, inst)); 146 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); 147 148 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { 149 /* FIXME: not strictly true */ 150 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF || 151 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV || 152 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) { 153 return false; 154 } 155 } else if (brw_inst_src0_negate(devinfo, inst) || 156 brw_inst_src0_abs(devinfo, inst)) { 157 return false; 158 } 159 160 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV && 161 brw_inst_saturate(devinfo, inst) == 0 && 162 dst_type == src_type; 163} 164 165static bool 166dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 167{ 168 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 169 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 170} 171 172static bool 173src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 174{ 175 return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT && 176 brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 177 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 178} 179 180static bool 181src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 182{ 183 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 184 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 185} 186 187static bool 188src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 189{ 190 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 191 (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 192} 193 194static bool 195src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 196{ 197 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 198 (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 199} 200 201static bool 202src0_has_scalar_region(const struct intel_device_info *devinfo, 203 const brw_inst *inst) 204{ 205 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 206 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && 207 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 208} 209 210static bool 211src1_has_scalar_region(const struct intel_device_info *devinfo, 212 const brw_inst *inst) 213{ 214 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 215 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && 216 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 217} 218 219static unsigned 220num_sources_from_inst(const struct intel_device_info *devinfo, 221 const brw_inst *inst) 222{ 223 const struct opcode_desc *desc = 224 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 225 unsigned math_function; 226 227 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 228 math_function = brw_inst_math_function(devinfo, inst); 229 } else if (devinfo->ver < 6 && 230 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { 231 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { 232 /* src1 must be a descriptor (including the information to determine 233 * that the SEND is doing an extended math operation), but src0 can 234 * actually be null since it serves as the source of the implicit GRF 235 * to MRF move. 236 * 237 * If we stop using that functionality, we'll have to revisit this. 238 */ 239 return 2; 240 } else { 241 /* Send instructions are allowed to have null sources since they use 242 * the base_mrf field to specify which message register source. 243 */ 244 return 0; 245 } 246 } else { 247 assert(desc->nsrc < 4); 248 return desc->nsrc; 249 } 250 251 switch (math_function) { 252 case BRW_MATH_FUNCTION_INV: 253 case BRW_MATH_FUNCTION_LOG: 254 case BRW_MATH_FUNCTION_EXP: 255 case BRW_MATH_FUNCTION_SQRT: 256 case BRW_MATH_FUNCTION_RSQ: 257 case BRW_MATH_FUNCTION_SIN: 258 case BRW_MATH_FUNCTION_COS: 259 case BRW_MATH_FUNCTION_SINCOS: 260 case GFX8_MATH_FUNCTION_INVM: 261 case GFX8_MATH_FUNCTION_RSQRTM: 262 return 1; 263 case BRW_MATH_FUNCTION_FDIV: 264 case BRW_MATH_FUNCTION_POW: 265 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 266 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 267 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 268 return 2; 269 default: 270 unreachable("not reached"); 271 } 272} 273 274static struct string 275invalid_values(const struct intel_device_info *devinfo, const brw_inst *inst) 276{ 277 unsigned num_sources = num_sources_from_inst(devinfo, inst); 278 struct string error_msg = { .str = NULL, .len = 0 }; 279 280 switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) { 281 case BRW_EXECUTE_1: 282 case BRW_EXECUTE_2: 283 case BRW_EXECUTE_4: 284 case BRW_EXECUTE_8: 285 case BRW_EXECUTE_16: 286 case BRW_EXECUTE_32: 287 break; 288 default: 289 ERROR("invalid execution size"); 290 break; 291 } 292 293 if (inst_is_send(devinfo, inst)) 294 return error_msg; 295 296 if (num_sources == 3) { 297 /* Nothing to test: 298 * No 3-src instructions on Gfx4-5 299 * No reg file bits on Gfx6-10 (align16) 300 * No invalid encodings on Gfx10-12 (align1) 301 */ 302 } else { 303 if (devinfo->ver > 6) { 304 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF || 305 (num_sources > 0 && 306 brw_inst_src0_reg_file(devinfo, inst) == MRF) || 307 (num_sources > 1 && 308 brw_inst_src1_reg_file(devinfo, inst) == MRF), 309 "invalid register file encoding"); 310 } 311 } 312 313 if (error_msg.str) 314 return error_msg; 315 316 if (num_sources == 3) { 317 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 318 if (devinfo->ver >= 10) { 319 ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE || 320 brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE || 321 brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE || 322 brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE, 323 "invalid register type encoding"); 324 } else { 325 ERROR("Align1 mode not allowed on Gen < 10"); 326 } 327 } else { 328 ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE || 329 brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE, 330 "invalid register type encoding"); 331 } 332 } else { 333 ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE || 334 (num_sources > 0 && 335 brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) || 336 (num_sources > 1 && 337 brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE), 338 "invalid register type encoding"); 339 } 340 341 return error_msg; 342} 343 344static struct string 345sources_not_null(const struct intel_device_info *devinfo, 346 const brw_inst *inst) 347{ 348 unsigned num_sources = num_sources_from_inst(devinfo, inst); 349 struct string error_msg = { .str = NULL, .len = 0 }; 350 351 /* Nothing to test. 3-src instructions can only have GRF sources, and 352 * there's no bit to control the file. 353 */ 354 if (num_sources == 3) 355 return (struct string){}; 356 357 /* Nothing to test. Split sends can only encode a file in sources that are 358 * allowed to be NULL. 359 */ 360 if (inst_is_split_send(devinfo, inst)) 361 return (struct string){}; 362 363 if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC) 364 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); 365 366 if (num_sources == 2) 367 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); 368 369 return error_msg; 370} 371 372static struct string 373alignment_supported(const struct intel_device_info *devinfo, 374 const brw_inst *inst) 375{ 376 struct string error_msg = { .str = NULL, .len = 0 }; 377 378 ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, 379 "Align16 not supported"); 380 381 return error_msg; 382} 383 384static bool 385inst_uses_src_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 386{ 387 /* Check instructions that use implicit accumulator sources */ 388 switch (brw_inst_opcode(devinfo, inst)) { 389 case BRW_OPCODE_MAC: 390 case BRW_OPCODE_MACH: 391 case BRW_OPCODE_SADA2: 392 return true; 393 default: 394 break; 395 } 396 397 /* FIXME: support 3-src instructions */ 398 unsigned num_sources = num_sources_from_inst(devinfo, inst); 399 assert(num_sources < 3); 400 401 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); 402} 403 404static struct string 405send_restrictions(const struct intel_device_info *devinfo, 406 const brw_inst *inst) 407{ 408 struct string error_msg = { .str = NULL, .len = 0 }; 409 410 if (inst_is_split_send(devinfo, inst)) { 411 ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 412 brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, 413 "src1 of split send must be a GRF or NULL"); 414 415 ERROR_IF(brw_inst_eot(devinfo, inst) && 416 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 417 "send with EOT must use g112-g127"); 418 ERROR_IF(brw_inst_eot(devinfo, inst) && 419 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && 420 brw_inst_send_src1_reg_nr(devinfo, inst) < 112, 421 "send with EOT must use g112-g127"); 422 423 if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { 424 /* Assume minimums if we don't know */ 425 unsigned mlen = 1; 426 if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { 427 const uint32_t desc = brw_inst_send_desc(devinfo, inst); 428 mlen = brw_message_desc_mlen(devinfo, desc); 429 } 430 431 unsigned ex_mlen = 1; 432 if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { 433 const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst); 434 ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); 435 } 436 const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); 437 const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); 438 ERROR_IF((src0_reg_nr <= src1_reg_nr && 439 src1_reg_nr < src0_reg_nr + mlen) || 440 (src1_reg_nr <= src0_reg_nr && 441 src0_reg_nr < src1_reg_nr + ex_mlen), 442 "split send payloads must not overlap"); 443 } 444 } else if (inst_is_send(devinfo, inst)) { 445 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, 446 "send must use direct addressing"); 447 448 if (devinfo->ver >= 7) { 449 ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE, 450 "send from non-GRF"); 451 ERROR_IF(brw_inst_eot(devinfo, inst) && 452 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 453 "send with EOT must use g112-g127"); 454 } 455 456 if (devinfo->ver >= 8) { 457 ERROR_IF(!dst_is_null(devinfo, inst) && 458 (brw_inst_dst_da_reg_nr(devinfo, inst) + 459 brw_inst_rlen(devinfo, inst) > 127) && 460 (brw_inst_src0_da_reg_nr(devinfo, inst) + 461 brw_inst_mlen(devinfo, inst) > 462 brw_inst_dst_da_reg_nr(devinfo, inst)), 463 "r127 must not be used for return address when there is " 464 "a src and dest overlap"); 465 } 466 } 467 468 return error_msg; 469} 470 471static bool 472is_unsupported_inst(const struct intel_device_info *devinfo, 473 const brw_inst *inst) 474{ 475 return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL; 476} 477 478/** 479 * Returns whether a combination of two types would qualify as mixed float 480 * operation mode 481 */ 482static inline bool 483types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) 484{ 485 return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) || 486 (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF); 487} 488 489static enum brw_reg_type 490execution_type_for_type(enum brw_reg_type type) 491{ 492 switch (type) { 493 case BRW_REGISTER_TYPE_NF: 494 case BRW_REGISTER_TYPE_DF: 495 case BRW_REGISTER_TYPE_F: 496 case BRW_REGISTER_TYPE_HF: 497 return type; 498 499 case BRW_REGISTER_TYPE_VF: 500 return BRW_REGISTER_TYPE_F; 501 502 case BRW_REGISTER_TYPE_Q: 503 case BRW_REGISTER_TYPE_UQ: 504 return BRW_REGISTER_TYPE_Q; 505 506 case BRW_REGISTER_TYPE_D: 507 case BRW_REGISTER_TYPE_UD: 508 return BRW_REGISTER_TYPE_D; 509 510 case BRW_REGISTER_TYPE_W: 511 case BRW_REGISTER_TYPE_UW: 512 case BRW_REGISTER_TYPE_B: 513 case BRW_REGISTER_TYPE_UB: 514 case BRW_REGISTER_TYPE_V: 515 case BRW_REGISTER_TYPE_UV: 516 return BRW_REGISTER_TYPE_W; 517 } 518 unreachable("not reached"); 519} 520 521/** 522 * Returns the execution type of an instruction \p inst 523 */ 524static enum brw_reg_type 525execution_type(const struct intel_device_info *devinfo, const brw_inst *inst) 526{ 527 unsigned num_sources = num_sources_from_inst(devinfo, inst); 528 enum brw_reg_type src0_exec_type, src1_exec_type; 529 530 /* Execution data type is independent of destination data type, except in 531 * mixed F/HF instructions. 532 */ 533 enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst); 534 535 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); 536 if (num_sources == 1) { 537 if (src0_exec_type == BRW_REGISTER_TYPE_HF) 538 return dst_exec_type; 539 return src0_exec_type; 540 } 541 542 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); 543 if (types_are_mixed_float(src0_exec_type, src1_exec_type) || 544 types_are_mixed_float(src0_exec_type, dst_exec_type) || 545 types_are_mixed_float(src1_exec_type, dst_exec_type)) { 546 return BRW_REGISTER_TYPE_F; 547 } 548 549 if (src0_exec_type == src1_exec_type) 550 return src0_exec_type; 551 552 if (src0_exec_type == BRW_REGISTER_TYPE_NF || 553 src1_exec_type == BRW_REGISTER_TYPE_NF) 554 return BRW_REGISTER_TYPE_NF; 555 556 /* Mixed operand types where one is float is float on Gen < 6 557 * (and not allowed on later platforms) 558 */ 559 if (devinfo->ver < 6 && 560 (src0_exec_type == BRW_REGISTER_TYPE_F || 561 src1_exec_type == BRW_REGISTER_TYPE_F)) 562 return BRW_REGISTER_TYPE_F; 563 564 if (src0_exec_type == BRW_REGISTER_TYPE_Q || 565 src1_exec_type == BRW_REGISTER_TYPE_Q) 566 return BRW_REGISTER_TYPE_Q; 567 568 if (src0_exec_type == BRW_REGISTER_TYPE_D || 569 src1_exec_type == BRW_REGISTER_TYPE_D) 570 return BRW_REGISTER_TYPE_D; 571 572 if (src0_exec_type == BRW_REGISTER_TYPE_W || 573 src1_exec_type == BRW_REGISTER_TYPE_W) 574 return BRW_REGISTER_TYPE_W; 575 576 if (src0_exec_type == BRW_REGISTER_TYPE_DF || 577 src1_exec_type == BRW_REGISTER_TYPE_DF) 578 return BRW_REGISTER_TYPE_DF; 579 580 unreachable("not reached"); 581} 582 583/** 584 * Returns whether a region is packed 585 * 586 * A region is packed if its elements are adjacent in memory, with no 587 * intervening space, no overlap, and no replicated values. 588 */ 589static bool 590is_packed(unsigned vstride, unsigned width, unsigned hstride) 591{ 592 if (vstride == width) { 593 if (vstride == 1) { 594 return hstride == 0; 595 } else { 596 return hstride == 1; 597 } 598 } 599 600 return false; 601} 602 603/** 604 * Returns whether an instruction is an explicit or implicit conversion 605 * to/from half-float. 606 */ 607static bool 608is_half_float_conversion(const struct intel_device_info *devinfo, 609 const brw_inst *inst) 610{ 611 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 612 613 unsigned num_sources = num_sources_from_inst(devinfo, inst); 614 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 615 616 if (dst_type != src0_type && 617 (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { 618 return true; 619 } else if (num_sources > 1) { 620 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 621 return dst_type != src1_type && 622 (dst_type == BRW_REGISTER_TYPE_HF || 623 src1_type == BRW_REGISTER_TYPE_HF); 624 } 625 626 return false; 627} 628 629/* 630 * Returns whether an instruction is using mixed float operation mode 631 */ 632static bool 633is_mixed_float(const struct intel_device_info *devinfo, const brw_inst *inst) 634{ 635 if (devinfo->ver < 8) 636 return false; 637 638 if (inst_is_send(devinfo, inst)) 639 return false; 640 641 unsigned opcode = brw_inst_opcode(devinfo, inst); 642 const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); 643 if (desc->ndst == 0) 644 return false; 645 646 /* FIXME: support 3-src instructions */ 647 unsigned num_sources = num_sources_from_inst(devinfo, inst); 648 assert(num_sources < 3); 649 650 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 651 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 652 653 if (num_sources == 1) 654 return types_are_mixed_float(src0_type, dst_type); 655 656 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 657 658 return types_are_mixed_float(src0_type, src1_type) || 659 types_are_mixed_float(src0_type, dst_type) || 660 types_are_mixed_float(src1_type, dst_type); 661} 662 663/** 664 * Returns whether an instruction is an explicit or implicit conversion 665 * to/from byte. 666 */ 667static bool 668is_byte_conversion(const struct intel_device_info *devinfo, 669 const brw_inst *inst) 670{ 671 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 672 673 unsigned num_sources = num_sources_from_inst(devinfo, inst); 674 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 675 676 if (dst_type != src0_type && 677 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { 678 return true; 679 } else if (num_sources > 1) { 680 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 681 return dst_type != src1_type && 682 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); 683 } 684 685 return false; 686} 687 688/** 689 * Checks restrictions listed in "General Restrictions Based on Operand Types" 690 * in the "Register Region Restrictions" section. 691 */ 692static struct string 693general_restrictions_based_on_operand_types(const struct intel_device_info *devinfo, 694 const brw_inst *inst) 695{ 696 const struct opcode_desc *desc = 697 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 698 unsigned num_sources = num_sources_from_inst(devinfo, inst); 699 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 700 struct string error_msg = { .str = NULL, .len = 0 }; 701 702 if (inst_is_send(devinfo, inst)) 703 return error_msg; 704 705 if (devinfo->ver >= 11) { 706 if (num_sources == 3) { 707 ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || 708 brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, 709 "Byte data type is not supported for src1/2 register regioning. This includes " 710 "byte broadcast as well."); 711 } 712 if (num_sources == 2) { 713 ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, 714 "Byte data type is not supported for src1 register regioning. This includes " 715 "byte broadcast as well."); 716 } 717 } 718 719 if (num_sources == 3) 720 return error_msg; 721 722 if (exec_size == 1) 723 return error_msg; 724 725 if (desc->ndst == 0) 726 return error_msg; 727 728 /* The PRMs say: 729 * 730 * Where n is the largest element size in bytes for any source or 731 * destination operand type, ExecSize * n must be <= 64. 732 * 733 * But we do not attempt to enforce it, because it is implied by other 734 * rules: 735 * 736 * - that the destination stride must match the execution data type 737 * - sources may not span more than two adjacent GRF registers 738 * - destination may not span more than two adjacent GRF registers 739 * 740 * In fact, checking it would weaken testing of the other rules. 741 */ 742 743 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 744 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 745 bool dst_type_is_byte = 746 inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || 747 inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; 748 749 if (dst_type_is_byte) { 750 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { 751 if (!inst_is_raw_move(devinfo, inst)) 752 ERROR("Only raw MOV supports a packed-byte destination"); 753 return error_msg; 754 } 755 } 756 757 unsigned exec_type = execution_type(devinfo, inst); 758 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 759 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 760 761 /* On IVB/BYT, region parameters and execution size for DF are in terms of 762 * 32-bit elements, so they are doubled. For evaluating the validity of an 763 * instruction, we halve them. 764 */ 765 if (devinfo->verx10 == 70 && 766 exec_type_size == 8 && dst_type_size == 4) 767 dst_type_size = 8; 768 769 if (is_byte_conversion(devinfo, inst)) { 770 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 771 * 772 * "There is no direct conversion from B/UB to DF or DF to B/UB. 773 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." 774 * 775 * Even if these restrictions are listed for the MOV instruction, we 776 * validate this more generally, since there is the possibility 777 * of implicit conversions from other instructions. 778 */ 779 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 780 enum brw_reg_type src1_type = num_sources > 1 ? 781 brw_inst_src1_type(devinfo, inst) : 0; 782 783 ERROR_IF(type_sz(dst_type) == 1 && 784 (type_sz(src0_type) == 8 || 785 (num_sources > 1 && type_sz(src1_type) == 8)), 786 "There are no direct conversions between 64-bit types and B/UB"); 787 788 ERROR_IF(type_sz(dst_type) == 8 && 789 (type_sz(src0_type) == 1 || 790 (num_sources > 1 && type_sz(src1_type) == 1)), 791 "There are no direct conversions between 64-bit types and B/UB"); 792 } 793 794 if (is_half_float_conversion(devinfo, inst)) { 795 /** 796 * A helper to validate used in the validation of the following restriction 797 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 798 * 799 * "There is no direct conversion from HF to DF or DF to HF. 800 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." 801 * 802 * Even if these restrictions are listed for the MOV instruction, we 803 * validate this more generally, since there is the possibility 804 * of implicit conversions from other instructions, such us implicit 805 * conversion from integer to HF with the ADD instruction in SKL+. 806 */ 807 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 808 enum brw_reg_type src1_type = num_sources > 1 ? 809 brw_inst_src1_type(devinfo, inst) : 0; 810 ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && 811 (type_sz(src0_type) == 8 || 812 (num_sources > 1 && type_sz(src1_type) == 8)), 813 "There are no direct conversions between 64-bit types and HF"); 814 815 ERROR_IF(type_sz(dst_type) == 8 && 816 (src0_type == BRW_REGISTER_TYPE_HF || 817 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), 818 "There are no direct conversions between 64-bit types and HF"); 819 820 /* From the BDW+ PRM: 821 * 822 * "Conversion between Integer and HF (Half Float) must be 823 * DWord-aligned and strided by a DWord on the destination." 824 * 825 * Also, the above restrictions seems to be expanded on CHV and SKL+ by: 826 * 827 * "There is a relaxed alignment rule for word destinations. When 828 * the destination type is word (UW, W, HF), destination data types 829 * can be aligned to either the lowest word or the second lowest 830 * word of the execution channel. This means the destination data 831 * words can be either all in the even word locations or all in the 832 * odd word locations." 833 * 834 * We do not implement the second rule as is though, since empirical 835 * testing shows inconsistencies: 836 * - It suggests that packed 16-bit is not allowed, which is not true. 837 * - It suggests that conversions from Q/DF to W (which need to be 838 * 64-bit aligned on the destination) are not possible, which is 839 * not true. 840 * 841 * So from this rule we only validate the implication that conversions 842 * from F to HF need to be DWord strided (except in Align1 mixed 843 * float mode where packed fp16 destination is allowed so long as the 844 * destination is oword-aligned). 845 * 846 * Finally, we only validate this for Align1 because Align16 always 847 * requires packed destinations, so these restrictions can't possibly 848 * apply to Align16 mode. 849 */ 850 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 851 if ((dst_type == BRW_REGISTER_TYPE_HF && 852 (brw_reg_type_is_integer(src0_type) || 853 (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || 854 (brw_reg_type_is_integer(dst_type) && 855 (src0_type == BRW_REGISTER_TYPE_HF || 856 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { 857 ERROR_IF(dst_stride * dst_type_size != 4, 858 "Conversions between integer and half-float must be " 859 "strided by a DWord on the destination"); 860 861 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 862 ERROR_IF(subreg % 4 != 0, 863 "Conversions between integer and half-float must be " 864 "aligned to a DWord on the destination"); 865 } else if ((devinfo->is_cherryview || devinfo->ver >= 9) && 866 dst_type == BRW_REGISTER_TYPE_HF) { 867 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 868 ERROR_IF(dst_stride != 2 && 869 !(is_mixed_float(devinfo, inst) && 870 dst_stride == 1 && subreg % 16 == 0), 871 "Conversions to HF must have either all words in even " 872 "word locations or all words in odd word locations or " 873 "be mixed-float with Oword-aligned packed destination"); 874 } 875 } 876 } 877 878 /* There are special regioning rules for mixed-float mode in CHV and SKL that 879 * override the general rule for the ratio of sizes of the destination type 880 * and the execution type. We will add validation for those in a later patch. 881 */ 882 bool validate_dst_size_and_exec_size_ratio = 883 !is_mixed_float(devinfo, inst) || 884 !(devinfo->is_cherryview || devinfo->ver >= 9); 885 886 if (validate_dst_size_and_exec_size_ratio && 887 exec_type_size > dst_type_size) { 888 if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) { 889 ERROR_IF(dst_stride * dst_type_size != exec_type_size, 890 "Destination stride must be equal to the ratio of the sizes " 891 "of the execution data type to the destination type"); 892 } 893 894 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 895 896 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 897 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { 898 /* The i965 PRM says: 899 * 900 * Implementation Restriction: The relaxed alignment rule for byte 901 * destination (#10.5) is not supported. 902 */ 903 if ((devinfo->ver > 4 || devinfo->is_g4x) && dst_type_is_byte) { 904 ERROR_IF(subreg % exec_type_size != 0 && 905 subreg % exec_type_size != 1, 906 "Destination subreg must be aligned to the size of the " 907 "execution data type (or to the next lowest byte for byte " 908 "destinations)"); 909 } else { 910 ERROR_IF(subreg % exec_type_size != 0, 911 "Destination subreg must be aligned to the size of the " 912 "execution data type"); 913 } 914 } 915 } 916 917 return error_msg; 918} 919 920/** 921 * Checks restrictions listed in "General Restrictions on Regioning Parameters" 922 * in the "Register Region Restrictions" section. 923 */ 924static struct string 925general_restrictions_on_region_parameters(const struct intel_device_info *devinfo, 926 const brw_inst *inst) 927{ 928 const struct opcode_desc *desc = 929 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 930 unsigned num_sources = num_sources_from_inst(devinfo, inst); 931 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 932 struct string error_msg = { .str = NULL, .len = 0 }; 933 934 if (num_sources == 3) 935 return (struct string){}; 936 937 /* Split sends don't have the bits in the instruction to encode regions so 938 * there's nothing to check. 939 */ 940 if (inst_is_split_send(devinfo, inst)) 941 return (struct string){}; 942 943 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { 944 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) 945 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, 946 "Destination Horizontal Stride must be 1"); 947 948 if (num_sources >= 1) { 949 if (devinfo->verx10 >= 75) { 950 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 951 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 952 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 953 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 954 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 955 } else { 956 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 957 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 958 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 959 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 960 } 961 } 962 963 if (num_sources == 2) { 964 if (devinfo->verx10 >= 75) { 965 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 966 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 967 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 968 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 969 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 970 } else { 971 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 972 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 973 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 974 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 975 } 976 } 977 978 return error_msg; 979 } 980 981 for (unsigned i = 0; i < num_sources; i++) { 982 unsigned vstride, width, hstride, element_size, subreg; 983 enum brw_reg_type type; 984 985#define DO_SRC(n) \ 986 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 987 BRW_IMMEDIATE_VALUE) \ 988 continue; \ 989 \ 990 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 991 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 992 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 993 type = brw_inst_src ## n ## _type(devinfo, inst); \ 994 element_size = brw_reg_type_to_size(type); \ 995 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) 996 997 if (i == 0) { 998 DO_SRC(0); 999 } else { 1000 DO_SRC(1); 1001 } 1002#undef DO_SRC 1003 1004 /* On IVB/BYT, region parameters and execution size for DF are in terms of 1005 * 32-bit elements, so they are doubled. For evaluating the validity of an 1006 * instruction, we halve them. 1007 */ 1008 if (devinfo->verx10 == 70 && 1009 element_size == 8) 1010 element_size = 4; 1011 1012 /* ExecSize must be greater than or equal to Width. */ 1013 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " 1014 "to Width"); 1015 1016 /* If ExecSize = Width and HorzStride ≠ 0, 1017 * VertStride must be set to Width * HorzStride. 1018 */ 1019 if (exec_size == width && hstride != 0) { 1020 ERROR_IF(vstride != width * hstride, 1021 "If ExecSize = Width and HorzStride ≠ 0, " 1022 "VertStride must be set to Width * HorzStride"); 1023 } 1024 1025 /* If Width = 1, HorzStride must be 0 regardless of the values of 1026 * ExecSize and VertStride. 1027 */ 1028 if (width == 1) { 1029 ERROR_IF(hstride != 0, 1030 "If Width = 1, HorzStride must be 0 regardless " 1031 "of the values of ExecSize and VertStride"); 1032 } 1033 1034 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ 1035 if (exec_size == 1 && width == 1) { 1036 ERROR_IF(vstride != 0 || hstride != 0, 1037 "If ExecSize = Width = 1, both VertStride " 1038 "and HorzStride must be 0"); 1039 } 1040 1041 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the 1042 * value of ExecSize. 1043 */ 1044 if (vstride == 0 && hstride == 0) { 1045 ERROR_IF(width != 1, 1046 "If VertStride = HorzStride = 0, Width must be " 1047 "1 regardless of the value of ExecSize"); 1048 } 1049 1050 /* VertStride must be used to cross GRF register boundaries. This rule 1051 * implies that elements within a 'Width' cannot cross GRF boundaries. 1052 */ 1053 const uint64_t mask = (1ULL << element_size) - 1; 1054 unsigned rowbase = subreg; 1055 1056 for (int y = 0; y < exec_size / width; y++) { 1057 uint64_t access_mask = 0; 1058 unsigned offset = rowbase; 1059 1060 for (int x = 0; x < width; x++) { 1061 access_mask |= mask << (offset % 64); 1062 offset += hstride * element_size; 1063 } 1064 1065 rowbase += vstride * element_size; 1066 1067 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { 1068 ERROR("VertStride must be used to cross GRF register boundaries"); 1069 break; 1070 } 1071 } 1072 } 1073 1074 /* Dst.HorzStride must not be 0. */ 1075 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { 1076 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, 1077 "Destination Horizontal Stride must not be 0"); 1078 } 1079 1080 return error_msg; 1081} 1082 1083static struct string 1084special_restrictions_for_mixed_float_mode(const struct intel_device_info *devinfo, 1085 const brw_inst *inst) 1086{ 1087 struct string error_msg = { .str = NULL, .len = 0 }; 1088 1089 const unsigned opcode = brw_inst_opcode(devinfo, inst); 1090 const unsigned num_sources = num_sources_from_inst(devinfo, inst); 1091 if (num_sources >= 3) 1092 return error_msg; 1093 1094 if (!is_mixed_float(devinfo, inst)) 1095 return error_msg; 1096 1097 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1098 bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; 1099 1100 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1101 enum brw_reg_type src1_type = num_sources > 1 ? 1102 brw_inst_src1_type(devinfo, inst) : 0; 1103 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1104 1105 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1106 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); 1107 1108 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1109 * Float Operations: 1110 * 1111 * "Indirect addressing on source is not supported when source and 1112 * destination data types are mixed float." 1113 */ 1114 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || 1115 (num_sources > 1 && 1116 brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), 1117 "Indirect addressing on source is not supported when source and " 1118 "destination data types are mixed float"); 1119 1120 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1121 * Float Operations: 1122 * 1123 * "No SIMD16 in mixed mode when destination is f32. Instruction 1124 * execution size must be no more than 8." 1125 */ 1126 ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, 1127 "Mixed float mode with 32-bit float destination is limited " 1128 "to SIMD8"); 1129 1130 if (is_align16) { 1131 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1132 * Float Operations: 1133 * 1134 * "In Align16 mode, when half float and float data types are mixed 1135 * between source operands OR between source and destination operands, 1136 * the register content are assumed to be packed." 1137 * 1138 * Since Align16 doesn't have a concept of horizontal stride (or width), 1139 * it means that vertical stride must always be 4, since 0 and 2 would 1140 * lead to replicated data, and any other value is disallowed in Align16. 1141 */ 1142 ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1143 "Align16 mixed float mode assumes packed data (vstride must be 4"); 1144 1145 ERROR_IF(num_sources >= 2 && 1146 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1147 "Align16 mixed float mode assumes packed data (vstride must be 4"); 1148 1149 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1150 * Float Operations: 1151 * 1152 * "For Align16 mixed mode, both input and output packed f16 data 1153 * must be oword aligned, no oword crossing in packed f16." 1154 * 1155 * The previous rule requires that Align16 operands are always packed, 1156 * and since there is only one bit for Align16 subnr, which represents 1157 * offsets 0B and 16B, this rule is always enforced and we don't need to 1158 * validate it. 1159 */ 1160 1161 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1162 * Float Operations: 1163 * 1164 * "No SIMD16 in mixed mode when destination is packed f16 for both 1165 * Align1 and Align16." 1166 * 1167 * And: 1168 * 1169 * "In Align16 mode, when half float and float data types are mixed 1170 * between source operands OR between source and destination operands, 1171 * the register content are assumed to be packed." 1172 * 1173 * Which implies that SIMD16 is not available in Align16. This is further 1174 * confirmed by: 1175 * 1176 * "For Align16 mixed mode, both input and output packed f16 data 1177 * must be oword aligned, no oword crossing in packed f16" 1178 * 1179 * Since oword-aligned packed f16 data would cross oword boundaries when 1180 * the execution size is larger than 8. 1181 */ 1182 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); 1183 1184 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1185 * Float Operations: 1186 * 1187 * "No accumulator read access for Align16 mixed float." 1188 */ 1189 ERROR_IF(inst_uses_src_acc(devinfo, inst), 1190 "No accumulator read access for Align16 mixed float"); 1191 } else { 1192 assert(!is_align16); 1193 1194 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1195 * Float Operations: 1196 * 1197 * "No SIMD16 in mixed mode when destination is packed f16 for both 1198 * Align1 and Align16." 1199 */ 1200 ERROR_IF(exec_size > 8 && dst_is_packed && 1201 dst_type == BRW_REGISTER_TYPE_HF, 1202 "Align1 mixed float mode is limited to SIMD8 when destination " 1203 "is packed half-float"); 1204 1205 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1206 * Float Operations: 1207 * 1208 * "Math operations for mixed mode: 1209 * - In Align1, f16 inputs need to be strided" 1210 */ 1211 if (opcode == BRW_OPCODE_MATH) { 1212 if (src0_type == BRW_REGISTER_TYPE_HF) { 1213 ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, 1214 "Align1 mixed mode math needs strided half-float inputs"); 1215 } 1216 1217 if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { 1218 ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, 1219 "Align1 mixed mode math needs strided half-float inputs"); 1220 } 1221 } 1222 1223 if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { 1224 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1225 * Float Operations: 1226 * 1227 * "In Align1, destination stride can be smaller than execution 1228 * type. When destination is stride of 1, 16 bit packed data is 1229 * updated on the destination. However, output packed f16 data 1230 * must be oword aligned, no oword crossing in packed f16." 1231 * 1232 * The requirement of not crossing oword boundaries for 16-bit oword 1233 * aligned data means that execution size is limited to 8. 1234 */ 1235 unsigned subreg; 1236 if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) 1237 subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1238 else 1239 subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); 1240 ERROR_IF(subreg % 16 != 0, 1241 "Align1 mixed mode packed half-float output must be " 1242 "oword aligned"); 1243 ERROR_IF(exec_size > 8, 1244 "Align1 mixed mode packed half-float output must not " 1245 "cross oword boundaries (max exec size is 8)"); 1246 1247 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1248 * Float Operations: 1249 * 1250 * "When source is float or half float from accumulator register and 1251 * destination is half float with a stride of 1, the source must 1252 * register aligned. i.e., source must have offset zero." 1253 * 1254 * Align16 mixed float mode doesn't allow accumulator access on sources, 1255 * so we only need to check this for Align1. 1256 */ 1257 if (src0_is_acc(devinfo, inst) && 1258 (src0_type == BRW_REGISTER_TYPE_F || 1259 src0_type == BRW_REGISTER_TYPE_HF)) { 1260 ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, 1261 "Mixed float mode requires register-aligned accumulator " 1262 "source reads when destination is packed half-float"); 1263 1264 } 1265 1266 if (num_sources > 1 && 1267 src1_is_acc(devinfo, inst) && 1268 (src1_type == BRW_REGISTER_TYPE_F || 1269 src1_type == BRW_REGISTER_TYPE_HF)) { 1270 ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, 1271 "Mixed float mode requires register-aligned accumulator " 1272 "source reads when destination is packed half-float"); 1273 } 1274 } 1275 1276 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1277 * Float Operations: 1278 * 1279 * "No swizzle is allowed when an accumulator is used as an implicit 1280 * source or an explicit source in an instruction. i.e. when 1281 * destination is half float with an implicit accumulator source, 1282 * destination stride needs to be 2." 1283 * 1284 * FIXME: it is not quite clear what the first sentence actually means 1285 * or its link to the implication described after it, so we only 1286 * validate the explicit implication, which is clearly described. 1287 */ 1288 if (dst_type == BRW_REGISTER_TYPE_HF && 1289 inst_uses_src_acc(devinfo, inst)) { 1290 ERROR_IF(dst_stride != 2, 1291 "Mixed float mode with implicit/explicit accumulator " 1292 "source and half-float destination requires a stride " 1293 "of 2 on the destination"); 1294 } 1295 } 1296 1297 return error_msg; 1298} 1299 1300/** 1301 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region 1302 * 1303 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is 1304 * a bitmask of bytes accessed by the region. 1305 * 1306 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 1307 * instruction would be 1308 * 1309 * access_mask[0] = 0x00000000000000F0 1310 * access_mask[1] = 0x000000000000F000 1311 * access_mask[2] = 0x0000000000F00000 1312 * access_mask[3] = 0x00000000F0000000 1313 * access_mask[4-31] = 0 1314 * 1315 * because the first execution channel accesses bytes 7-4 and the second 1316 * execution channel accesses bytes 15-12, etc. 1317 */ 1318static void 1319align1_access_mask(uint64_t access_mask[static 32], 1320 unsigned exec_size, unsigned element_size, unsigned subreg, 1321 unsigned vstride, unsigned width, unsigned hstride) 1322{ 1323 const uint64_t mask = (1ULL << element_size) - 1; 1324 unsigned rowbase = subreg; 1325 unsigned element = 0; 1326 1327 for (int y = 0; y < exec_size / width; y++) { 1328 unsigned offset = rowbase; 1329 1330 for (int x = 0; x < width; x++) { 1331 access_mask[element++] = mask << (offset % 64); 1332 offset += hstride * element_size; 1333 } 1334 1335 rowbase += vstride * element_size; 1336 } 1337 1338 assert(element == 0 || element == exec_size); 1339} 1340 1341/** 1342 * Returns the number of registers accessed according to the \p access_mask 1343 */ 1344static int 1345registers_read(const uint64_t access_mask[static 32]) 1346{ 1347 int regs_read = 0; 1348 1349 for (unsigned i = 0; i < 32; i++) { 1350 if (access_mask[i] > 0xFFFFFFFF) { 1351 return 2; 1352 } else if (access_mask[i]) { 1353 regs_read = 1; 1354 } 1355 } 1356 1357 return regs_read; 1358} 1359 1360/** 1361 * Checks restrictions listed in "Region Alignment Rules" in the "Register 1362 * Region Restrictions" section. 1363 */ 1364static struct string 1365region_alignment_rules(const struct intel_device_info *devinfo, 1366 const brw_inst *inst) 1367{ 1368 const struct opcode_desc *desc = 1369 brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 1370 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1371 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1372 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; 1373 struct string error_msg = { .str = NULL, .len = 0 }; 1374 1375 if (num_sources == 3) 1376 return (struct string){}; 1377 1378 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) 1379 return (struct string){}; 1380 1381 if (inst_is_send(devinfo, inst)) 1382 return (struct string){}; 1383 1384 memset(dst_access_mask, 0, sizeof(dst_access_mask)); 1385 memset(src0_access_mask, 0, sizeof(src0_access_mask)); 1386 memset(src1_access_mask, 0, sizeof(src1_access_mask)); 1387 1388 for (unsigned i = 0; i < num_sources; i++) { 1389 unsigned vstride, width, hstride, element_size, subreg; 1390 enum brw_reg_type type; 1391 1392 /* In Direct Addressing mode, a source cannot span more than 2 adjacent 1393 * GRF registers. 1394 */ 1395 1396#define DO_SRC(n) \ 1397 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ 1398 BRW_ADDRESS_DIRECT) \ 1399 continue; \ 1400 \ 1401 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1402 BRW_IMMEDIATE_VALUE) \ 1403 continue; \ 1404 \ 1405 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1406 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1407 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1408 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1409 element_size = brw_reg_type_to_size(type); \ 1410 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1411 align1_access_mask(src ## n ## _access_mask, \ 1412 exec_size, element_size, subreg, \ 1413 vstride, width, hstride) 1414 1415 if (i == 0) { 1416 DO_SRC(0); 1417 } else { 1418 DO_SRC(1); 1419 } 1420#undef DO_SRC 1421 1422 unsigned num_vstride = exec_size / width; 1423 unsigned num_hstride = width; 1424 unsigned vstride_elements = (num_vstride - 1) * vstride; 1425 unsigned hstride_elements = (num_hstride - 1) * hstride; 1426 unsigned offset = (vstride_elements + hstride_elements) * element_size + 1427 subreg; 1428 ERROR_IF(offset >= 64, 1429 "A source cannot span more than 2 adjacent GRF registers"); 1430 } 1431 1432 if (desc->ndst == 0 || dst_is_null(devinfo, inst)) 1433 return error_msg; 1434 1435 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1436 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 1437 unsigned element_size = brw_reg_type_to_size(dst_type); 1438 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1439 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; 1440 ERROR_IF(offset >= 64, 1441 "A destination cannot span more than 2 adjacent GRF registers"); 1442 1443 if (error_msg.str) 1444 return error_msg; 1445 1446 /* On IVB/BYT, region parameters and execution size for DF are in terms of 1447 * 32-bit elements, so they are doubled. For evaluating the validity of an 1448 * instruction, we halve them. 1449 */ 1450 if (devinfo->verx10 == 70 && 1451 element_size == 8) 1452 element_size = 4; 1453 1454 align1_access_mask(dst_access_mask, exec_size, element_size, subreg, 1455 exec_size == 1 ? 0 : exec_size * stride, 1456 exec_size == 1 ? 1 : exec_size, 1457 exec_size == 1 ? 0 : stride); 1458 1459 unsigned dst_regs = registers_read(dst_access_mask); 1460 unsigned src0_regs = registers_read(src0_access_mask); 1461 unsigned src1_regs = registers_read(src1_access_mask); 1462 1463 /* The SNB, IVB, HSW, BDW, and CHV PRMs say: 1464 * 1465 * When an instruction has a source region spanning two registers and a 1466 * destination region contained in one register, the number of elements 1467 * must be the same between two sources and one of the following must be 1468 * true: 1469 * 1470 * 1. The destination region is entirely contained in the lower OWord 1471 * of a register. 1472 * 2. The destination region is entirely contained in the upper OWord 1473 * of a register. 1474 * 3. The destination elements are evenly split between the two OWords 1475 * of a register. 1476 */ 1477 if (devinfo->ver <= 8) { 1478 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { 1479 unsigned upper_oword_writes = 0, lower_oword_writes = 0; 1480 1481 for (unsigned i = 0; i < exec_size; i++) { 1482 if (dst_access_mask[i] > 0x0000FFFF) { 1483 upper_oword_writes++; 1484 } else { 1485 assert(dst_access_mask[i] != 0); 1486 lower_oword_writes++; 1487 } 1488 } 1489 1490 ERROR_IF(lower_oword_writes != 0 && 1491 upper_oword_writes != 0 && 1492 upper_oword_writes != lower_oword_writes, 1493 "Writes must be to only one OWord or " 1494 "evenly split between OWords"); 1495 } 1496 } 1497 1498 /* The IVB and HSW PRMs say: 1499 * 1500 * When an instruction has a source region that spans two registers and 1501 * the destination spans two registers, the destination elements must be 1502 * evenly split between the two registers [...] 1503 * 1504 * The SNB PRM contains similar wording (but written in a much more 1505 * confusing manner). 1506 * 1507 * The BDW PRM says: 1508 * 1509 * When destination spans two registers, the source may be one or two 1510 * registers. The destination elements must be evenly split between the 1511 * two registers. 1512 * 1513 * The SKL PRM says: 1514 * 1515 * When destination of MATH instruction spans two registers, the 1516 * destination elements must be evenly split between the two registers. 1517 * 1518 * It is not known whether this restriction applies to KBL other Gens after 1519 * SKL. 1520 */ 1521 if (devinfo->ver <= 8 || 1522 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 1523 1524 /* Nothing explicitly states that on Gen < 8 elements must be evenly 1525 * split between two destination registers in the two exceptional 1526 * source-region-spans-one-register cases, but since Broadwell requires 1527 * evenly split writes regardless of source region, we assume that it was 1528 * an oversight and require it. 1529 */ 1530 if (dst_regs == 2) { 1531 unsigned upper_reg_writes = 0, lower_reg_writes = 0; 1532 1533 for (unsigned i = 0; i < exec_size; i++) { 1534 if (dst_access_mask[i] > 0xFFFFFFFF) { 1535 upper_reg_writes++; 1536 } else { 1537 assert(dst_access_mask[i] != 0); 1538 lower_reg_writes++; 1539 } 1540 } 1541 1542 ERROR_IF(upper_reg_writes != lower_reg_writes, 1543 "Writes must be evenly split between the two " 1544 "destination registers"); 1545 } 1546 } 1547 1548 /* The IVB and HSW PRMs say: 1549 * 1550 * When an instruction has a source region that spans two registers and 1551 * the destination spans two registers, the destination elements must be 1552 * evenly split between the two registers and each destination register 1553 * must be entirely derived from one source register. 1554 * 1555 * Note: In such cases, the regioning parameters must ensure that the 1556 * offset from the two source registers is the same. 1557 * 1558 * The SNB PRM contains similar wording (but written in a much more 1559 * confusing manner). 1560 * 1561 * There are effectively three rules stated here: 1562 * 1563 * For an instruction with a source and a destination spanning two 1564 * registers, 1565 * 1566 * (1) destination elements must be evenly split between the two 1567 * registers 1568 * (2) all destination elements in a register must be derived 1569 * from one source register 1570 * (3) the offset (i.e. the starting location in each of the two 1571 * registers spanned by a region) must be the same in the two 1572 * registers spanned by a region 1573 * 1574 * It is impossible to violate rule (1) without violating (2) or (3), so we 1575 * do not attempt to validate it. 1576 */ 1577 if (devinfo->ver <= 7 && dst_regs == 2) { 1578 for (unsigned i = 0; i < num_sources; i++) { 1579#define DO_SRC(n) \ 1580 if (src ## n ## _regs <= 1) \ 1581 continue; \ 1582 \ 1583 for (unsigned i = 0; i < exec_size; i++) { \ 1584 if ((dst_access_mask[i] > 0xFFFFFFFF) != \ 1585 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ 1586 ERROR("Each destination register must be entirely derived " \ 1587 "from one source register"); \ 1588 break; \ 1589 } \ 1590 } \ 1591 \ 1592 unsigned offset_0 = \ 1593 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1594 unsigned offset_1 = offset_0; \ 1595 \ 1596 for (unsigned i = 0; i < exec_size; i++) { \ 1597 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ 1598 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ 1599 break; \ 1600 } \ 1601 } \ 1602 \ 1603 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \ 1604 "The offset from the two source registers " \ 1605 "must be the same") 1606 1607 if (i == 0) { 1608 DO_SRC(0); 1609 } else { 1610 DO_SRC(1); 1611 } 1612#undef DO_SRC 1613 } 1614 } 1615 1616 /* The IVB and HSW PRMs say: 1617 * 1618 * When destination spans two registers, the source MUST span two 1619 * registers. The exception to the above rule: 1620 * 1. When source is scalar, the source registers are not 1621 * incremented. 1622 * 2. When source is packed integer Word and destination is packed 1623 * integer DWord, the source register is not incremented by the 1624 * source sub register is incremented. 1625 * 1626 * The SNB PRM does not contain this rule, but the internal documentation 1627 * indicates that it applies to SNB as well. We assume that the rule applies 1628 * to Gen <= 5 although their PRMs do not state it. 1629 * 1630 * While the documentation explicitly says in exception (2) that the 1631 * destination must be an integer DWord, the hardware allows at least a 1632 * float destination type as well. We emit such instructions from 1633 * 1634 * fs_visitor::emit_interpolation_setup_gfx6 1635 * fs_visitor::emit_fragcoord_interpolation 1636 * 1637 * and have for years with no ill effects. 1638 * 1639 * Additionally the simulator source code indicates that the real condition 1640 * is that the size of the destination type is 4 bytes. 1641 */ 1642 if (devinfo->ver <= 7 && dst_regs == 2) { 1643 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 1644 bool dst_is_packed_dword = 1645 is_packed(exec_size * stride, exec_size, stride) && 1646 brw_reg_type_to_size(dst_type) == 4; 1647 1648 for (unsigned i = 0; i < num_sources; i++) { 1649#define DO_SRC(n) \ 1650 unsigned vstride, width, hstride; \ 1651 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1652 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1653 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1654 bool src ## n ## _is_packed_word = \ 1655 is_packed(vstride, width, hstride) && \ 1656 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \ 1657 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \ 1658 \ 1659 ERROR_IF(src ## n ## _regs == 1 && \ 1660 !src ## n ## _has_scalar_region(devinfo, inst) && \ 1661 !(dst_is_packed_dword && src ## n ## _is_packed_word), \ 1662 "When the destination spans two registers, the source must " \ 1663 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ 1664 "source and packed-word to packed-dword expansion)") 1665 1666 if (i == 0) { 1667 DO_SRC(0); 1668 } else { 1669 DO_SRC(1); 1670 } 1671#undef DO_SRC 1672 } 1673 } 1674 1675 return error_msg; 1676} 1677 1678static struct string 1679vector_immediate_restrictions(const struct intel_device_info *devinfo, 1680 const brw_inst *inst) 1681{ 1682 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1683 struct string error_msg = { .str = NULL, .len = 0 }; 1684 1685 if (num_sources == 3 || num_sources == 0) 1686 return (struct string){}; 1687 1688 unsigned file = num_sources == 1 ? 1689 brw_inst_src0_reg_file(devinfo, inst) : 1690 brw_inst_src1_reg_file(devinfo, inst); 1691 if (file != BRW_IMMEDIATE_VALUE) 1692 return (struct string){}; 1693 1694 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 1695 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1696 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? 1697 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; 1698 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1699 enum brw_reg_type type = num_sources == 1 ? 1700 brw_inst_src0_type(devinfo, inst) : 1701 brw_inst_src1_type(devinfo, inst); 1702 1703 /* The PRMs say: 1704 * 1705 * When an immediate vector is used in an instruction, the destination 1706 * must be 128-bit aligned with destination horizontal stride equivalent 1707 * to a word for an immediate integer vector (v) and equivalent to a 1708 * DWord for an immediate float vector (vf). 1709 * 1710 * The text has not been updated for the addition of the immediate unsigned 1711 * integer vector type (uv) on SNB, but presumably the same restriction 1712 * applies. 1713 */ 1714 switch (type) { 1715 case BRW_REGISTER_TYPE_V: 1716 case BRW_REGISTER_TYPE_UV: 1717 case BRW_REGISTER_TYPE_VF: 1718 ERROR_IF(dst_subreg % (128 / 8) != 0, 1719 "Destination must be 128-bit aligned in order to use immediate " 1720 "vector types"); 1721 1722 if (type == BRW_REGISTER_TYPE_VF) { 1723 ERROR_IF(dst_type_size * dst_stride != 4, 1724 "Destination must have stride equivalent to dword in order " 1725 "to use the VF type"); 1726 } else { 1727 ERROR_IF(dst_type_size * dst_stride != 2, 1728 "Destination must have stride equivalent to word in order " 1729 "to use the V or UV type"); 1730 } 1731 break; 1732 default: 1733 break; 1734 } 1735 1736 return error_msg; 1737} 1738 1739static struct string 1740special_requirements_for_handling_double_precision_data_types( 1741 const struct intel_device_info *devinfo, 1742 const brw_inst *inst) 1743{ 1744 unsigned num_sources = num_sources_from_inst(devinfo, inst); 1745 struct string error_msg = { .str = NULL, .len = 0 }; 1746 1747 if (num_sources == 3 || num_sources == 0) 1748 return (struct string){}; 1749 1750 /* Split sends don't have types so there's no doubles there. */ 1751 if (inst_is_split_send(devinfo, inst)) 1752 return (struct string){}; 1753 1754 enum brw_reg_type exec_type = execution_type(devinfo, inst); 1755 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 1756 1757 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); 1758 enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 1759 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1760 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1761 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); 1762 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1763 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); 1764 1765 bool is_integer_dword_multiply = 1766 devinfo->ver >= 8 && 1767 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL && 1768 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1769 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && 1770 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1771 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); 1772 1773 const bool is_double_precision = 1774 dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply; 1775 1776 for (unsigned i = 0; i < num_sources; i++) { 1777 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; 1778 bool is_scalar_region; 1779 enum brw_reg_file file; 1780 enum brw_reg_type type; 1781 1782#define DO_SRC(n) \ 1783 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1784 BRW_IMMEDIATE_VALUE) \ 1785 continue; \ 1786 \ 1787 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ 1788 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1789 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1790 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1791 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ 1792 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1793 type_size = brw_reg_type_to_size(type); \ 1794 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ 1795 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1796 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) 1797 1798 if (i == 0) { 1799 DO_SRC(0); 1800 } else { 1801 DO_SRC(1); 1802 } 1803#undef DO_SRC 1804 1805 const unsigned src_stride = hstride * type_size; 1806 const unsigned dst_stride = dst_hstride * dst_type_size; 1807 1808 /* The PRMs say that for CHV, BXT: 1809 * 1810 * When source or destination datatype is 64b or operation is integer 1811 * DWord multiply, regioning in Align1 must follow these rules: 1812 * 1813 * 1. Source and Destination horizontal stride must be aligned to the 1814 * same qword. 1815 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 1816 * 3. Source and Destination offset must be the same, except the case 1817 * of scalar source. 1818 * 1819 * We assume that the restriction applies to GLK as well. 1820 */ 1821 if (is_double_precision && 1822 brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 1823 (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 1824 ERROR_IF(!is_scalar_region && 1825 (src_stride % 8 != 0 || 1826 dst_stride % 8 != 0 || 1827 src_stride != dst_stride), 1828 "Source and destination horizontal stride must equal and a " 1829 "multiple of a qword when the execution type is 64-bit"); 1830 1831 ERROR_IF(vstride != width * hstride, 1832 "Vstride must be Width * Hstride when the execution type is " 1833 "64-bit"); 1834 1835 ERROR_IF(!is_scalar_region && dst_subreg != subreg, 1836 "Source and destination offset must be the same when the " 1837 "execution type is 64-bit"); 1838 } 1839 1840 /* The PRMs say that for CHV, BXT: 1841 * 1842 * When source or destination datatype is 64b or operation is integer 1843 * DWord multiply, indirect addressing must not be used. 1844 * 1845 * We assume that the restriction applies to GLK as well. 1846 */ 1847 if (is_double_precision && 1848 (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 1849 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || 1850 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, 1851 "Indirect addressing is not allowed when the execution type " 1852 "is 64-bit"); 1853 } 1854 1855 /* The PRMs say that for CHV, BXT: 1856 * 1857 * ARF registers must never be used with 64b datatype or when 1858 * operation is integer DWord multiply. 1859 * 1860 * We assume that the restriction applies to GLK as well. 1861 * 1862 * We assume that the restriction does not apply to the null register. 1863 */ 1864 if (is_double_precision && 1865 (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 1866 ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC || 1867 brw_inst_acc_wr_control(devinfo, inst) || 1868 (BRW_ARCHITECTURE_REGISTER_FILE == file && 1869 reg != BRW_ARF_NULL) || 1870 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && 1871 dst_reg != BRW_ARF_NULL), 1872 "Architecture registers cannot be used when the execution " 1873 "type is 64-bit"); 1874 } 1875 1876 /* From the hardware spec section "Register Region Restrictions": 1877 * 1878 * "In case where source or destination datatype is 64b or operation is 1879 * integer DWord multiply [or in case where a floating point data type 1880 * is used as destination]: 1881 * 1882 * 1. Register Regioning patterns where register data bit locations 1883 * are changed between source and destination are not supported on 1884 * Src0 and Src1 except for broadcast of a scalar. 1885 * 1886 * 2. Explicit ARF registers except null and accumulator must not be 1887 * used." 1888 */ 1889 if (devinfo->verx10 >= 125 && 1890 (brw_reg_type_is_floating_point(dst_type) || 1891 is_double_precision)) { 1892 ERROR_IF(!is_scalar_region && 1893 (vstride != width * hstride || 1894 src_stride != dst_stride || 1895 subreg != dst_subreg), 1896 "Register Regioning patterns where register data bit " 1897 "locations are changed between source and destination are not " 1898 "supported except for broadcast of a scalar."); 1899 1900 ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE && 1901 reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) || 1902 (dst_file == BRW_ARCHITECTURE_REGISTER_FILE && 1903 dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR), 1904 "Explicit ARF registers except null and accumulator must not " 1905 "be used."); 1906 } 1907 1908 /* From the hardware spec section "Register Region Restrictions": 1909 * 1910 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and 1911 * Quad-Word data must not be used." 1912 */ 1913 if (devinfo->verx10 >= 125 && 1914 (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) { 1915 ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && 1916 vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, 1917 "Vx1 and VxH indirect addressing for Float, Half-Float, " 1918 "Double-Float and Quad-Word data must not be used"); 1919 } 1920 } 1921 1922 /* The PRMs say that for BDW, SKL: 1923 * 1924 * If Align16 is required for an operation with QW destination and non-QW 1925 * source datatypes, the execution size cannot exceed 2. 1926 * 1927 * We assume that the restriction applies to all Gfx8+ parts. 1928 */ 1929 if (is_double_precision && devinfo->ver >= 8) { 1930 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1931 enum brw_reg_type src1_type = 1932 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; 1933 unsigned src0_type_size = brw_reg_type_to_size(src0_type); 1934 unsigned src1_type_size = brw_reg_type_to_size(src1_type); 1935 1936 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && 1937 dst_type_size == 8 && 1938 (src0_type_size != 8 || src1_type_size != 8) && 1939 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, 1940 "In Align16 exec size cannot exceed 2 with a QWord destination " 1941 "and a non-QWord source"); 1942 } 1943 1944 /* The PRMs say that for CHV, BXT: 1945 * 1946 * When source or destination datatype is 64b or operation is integer 1947 * DWord multiply, DepCtrl must not be used. 1948 * 1949 * We assume that the restriction applies to GLK as well. 1950 */ 1951 if (is_double_precision && 1952 (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 1953 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || 1954 brw_inst_no_dd_clear(devinfo, inst), 1955 "DepCtrl is not allowed when the execution type is 64-bit"); 1956 } 1957 1958 return error_msg; 1959} 1960 1961static struct string 1962instruction_restrictions(const struct intel_device_info *devinfo, 1963 const brw_inst *inst) 1964{ 1965 struct string error_msg = { .str = NULL, .len = 0 }; 1966 1967 /* From Wa_1604601757: 1968 * 1969 * "When multiplying a DW and any lower precision integer, source modifier 1970 * is not supported." 1971 */ 1972 if (devinfo->ver >= 12 && 1973 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) { 1974 enum brw_reg_type exec_type = execution_type(devinfo, inst); 1975 const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 || 1976 brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || 1977 !(brw_inst_src0_negate(devinfo, inst) || 1978 brw_inst_src0_abs(devinfo, inst)); 1979 const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 || 1980 brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || 1981 !(brw_inst_src1_negate(devinfo, inst) || 1982 brw_inst_src1_abs(devinfo, inst)); 1983 1984 ERROR_IF(!brw_reg_type_is_floating_point(exec_type) && 1985 type_sz(exec_type) == 4 && !(src0_valid && src1_valid), 1986 "When multiplying a DW and any lower precision integer, source " 1987 "modifier is not supported."); 1988 } 1989 1990 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMP || 1991 brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMPN) { 1992 if (devinfo->ver <= 7) { 1993 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit 1994 * ISA) says: 1995 * 1996 * Accumulator cannot be destination, implicit or explicit. The 1997 * destination must be a general register or the null register. 1998 * 1999 * Page 77 of the Haswell PRM Volume 2b contains the same text. The 2000 * 965G PRMs contain similar text. 2001 * 2002 * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says: 2003 * 2004 * For the cmp and cmpn instructions, remove the accumulator 2005 * restrictions. 2006 */ 2007 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 2008 brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL, 2009 "Accumulator cannot be destination, implicit or explicit."); 2010 } 2011 2012 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA) 2013 * says: 2014 * 2015 * If the destination is the null register, the {Switch} instruction 2016 * option must be used. 2017 * 2018 * Page 77 of the Haswell PRM Volume 2b contains the same text. 2019 */ 2020 if (devinfo->ver == 7) { 2021 ERROR_IF(dst_is_null(devinfo, inst) && 2022 brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH, 2023 "If the destination is the null register, the {Switch} " 2024 "instruction option must be used."); 2025 } 2026 } 2027 2028 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 2029 unsigned math_function = brw_inst_math_function(devinfo, inst); 2030 switch (math_function) { 2031 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 2032 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 2033 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: { 2034 /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says: 2035 * INT DIV function does not support source modifiers. 2036 * Bspec 6647 extends it back to Ivy Bridge. 2037 */ 2038 bool src0_valid = !brw_inst_src0_negate(devinfo, inst) && 2039 !brw_inst_src0_abs(devinfo, inst); 2040 bool src1_valid = !brw_inst_src1_negate(devinfo, inst) && 2041 !brw_inst_src1_abs(devinfo, inst); 2042 ERROR_IF(!src0_valid || !src1_valid, 2043 "INT DIV function does not support source modifiers."); 2044 break; 2045 } 2046 default: 2047 break; 2048 } 2049 } 2050 2051 if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DP4A) { 2052 /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says: 2053 * 2054 * Only one of src0 or src1 operand may be an the (sic) accumulator 2055 * register (acc#). 2056 */ 2057 ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst), 2058 "Only one of src0 or src1 operand may be an accumulator " 2059 "register (acc#)."); 2060 2061 } 2062 2063 return error_msg; 2064} 2065 2066static struct string 2067send_descriptor_restrictions(const struct intel_device_info *devinfo, 2068 const brw_inst *inst) 2069{ 2070 struct string error_msg = { .str = NULL, .len = 0 }; 2071 2072 if (inst_is_split_send(devinfo, inst)) { 2073 /* We can only validate immediate descriptors */ 2074 if (brw_inst_send_sel_reg32_desc(devinfo, inst)) 2075 return error_msg; 2076 } else if (inst_is_send(devinfo, inst)) { 2077 /* We can only validate immediate descriptors */ 2078 if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) 2079 return error_msg; 2080 } else { 2081 return error_msg; 2082 } 2083 2084 const uint32_t desc = brw_inst_send_desc(devinfo, inst); 2085 2086 switch (brw_inst_sfid(devinfo, inst)) { 2087 case GFX12_SFID_TGM: 2088 case GFX12_SFID_SLM: 2089 case GFX12_SFID_UGM: 2090 ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC"); 2091 2092 ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) && 2093 lsc_msg_desc_transpose(devinfo, desc) && 2094 brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1, 2095 "Transposed vectors are restricted to Exec_Mask = 1."); 2096 break; 2097 2098 default: 2099 break; 2100 } 2101 2102 return error_msg; 2103} 2104 2105bool 2106brw_validate_instruction(const struct intel_device_info *devinfo, 2107 const brw_inst *inst, int offset, 2108 struct disasm_info *disasm) 2109{ 2110 struct string error_msg = { .str = NULL, .len = 0 }; 2111 2112 if (is_unsupported_inst(devinfo, inst)) { 2113 ERROR("Instruction not supported on this Gen"); 2114 } else { 2115 CHECK(invalid_values); 2116 2117 if (error_msg.str == NULL) { 2118 CHECK(sources_not_null); 2119 CHECK(send_restrictions); 2120 CHECK(alignment_supported); 2121 CHECK(general_restrictions_based_on_operand_types); 2122 CHECK(general_restrictions_on_region_parameters); 2123 CHECK(special_restrictions_for_mixed_float_mode); 2124 CHECK(region_alignment_rules); 2125 CHECK(vector_immediate_restrictions); 2126 CHECK(special_requirements_for_handling_double_precision_data_types); 2127 CHECK(instruction_restrictions); 2128 CHECK(send_descriptor_restrictions); 2129 } 2130 } 2131 2132 if (error_msg.str && disasm) { 2133 disasm_insert_error(disasm, offset, error_msg.str); 2134 } 2135 free(error_msg.str); 2136 2137 return error_msg.len == 0; 2138} 2139 2140bool 2141brw_validate_instructions(const struct intel_device_info *devinfo, 2142 const void *assembly, int start_offset, int end_offset, 2143 struct disasm_info *disasm) 2144{ 2145 bool valid = true; 2146 2147 for (int src_offset = start_offset; src_offset < end_offset;) { 2148 const brw_inst *inst = assembly + src_offset; 2149 bool is_compact = brw_inst_cmpt_control(devinfo, inst); 2150 unsigned inst_size = is_compact ? sizeof(brw_compact_inst) 2151 : sizeof(brw_inst); 2152 brw_inst uncompacted; 2153 2154 if (is_compact) { 2155 brw_compact_inst *compacted = (void *)inst; 2156 brw_uncompact_instruction(devinfo, &uncompacted, compacted); 2157 inst = &uncompacted; 2158 } 2159 2160 bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm); 2161 valid = valid && v; 2162 2163 src_offset += inst_size; 2164 } 2165 2166 return valid; 2167} 2168