101e04c3fSmrg/* 29f464c52Smaya * Copyright © 2015-2019 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** @file brw_eu_validate.c 2501e04c3fSmrg * 2601e04c3fSmrg * This file implements a pass that validates shader assembly. 279f464c52Smaya * 289f464c52Smaya * The restrictions implemented herein are intended to verify that instructions 299f464c52Smaya * in shader assembly do not violate restrictions documented in the graphics 309f464c52Smaya * programming reference manuals. 319f464c52Smaya * 329f464c52Smaya * The restrictions are difficult for humans to quickly verify due to their 339f464c52Smaya * complexity and abundance. 349f464c52Smaya * 359f464c52Smaya * It is critical that this code is thoroughly unit tested because false 369f464c52Smaya * results will lead developers astray, which is worse than having no validator 379f464c52Smaya * at all. Functional changes to this file without corresponding unit tests (in 389f464c52Smaya * test_eu_validate.cpp) will be rejected. 3901e04c3fSmrg */ 4001e04c3fSmrg 417ec681f3Smrg#include <stdlib.h> 4201e04c3fSmrg#include "brw_eu.h" 4301e04c3fSmrg 4401e04c3fSmrg/* We're going to do lots of string concatenation, so this should help. */ 4501e04c3fSmrgstruct string { 4601e04c3fSmrg char *str; 4701e04c3fSmrg size_t len; 4801e04c3fSmrg}; 4901e04c3fSmrg 5001e04c3fSmrgstatic void 5101e04c3fSmrgcat(struct string *dest, const struct string src) 5201e04c3fSmrg{ 5301e04c3fSmrg dest->str = realloc(dest->str, dest->len + src.len + 1); 5401e04c3fSmrg memcpy(dest->str + dest->len, src.str, src.len); 5501e04c3fSmrg dest->str[dest->len + src.len] = '\0'; 5601e04c3fSmrg dest->len = dest->len + src.len; 5701e04c3fSmrg} 5801e04c3fSmrg#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) 5901e04c3fSmrg 6001e04c3fSmrgstatic bool 6101e04c3fSmrgcontains(const struct string haystack, const struct string needle) 6201e04c3fSmrg{ 6301e04c3fSmrg return haystack.str && memmem(haystack.str, haystack.len, 6401e04c3fSmrg needle.str, needle.len) != NULL; 6501e04c3fSmrg} 6601e04c3fSmrg#define CONTAINS(haystack, needle) \ 6701e04c3fSmrg contains(haystack, (struct string){needle, strlen(needle)}) 6801e04c3fSmrg 6901e04c3fSmrg#define error(str) "\tERROR: " str "\n" 7001e04c3fSmrg#define ERROR_INDENT "\t " 7101e04c3fSmrg 7201e04c3fSmrg#define ERROR(msg) ERROR_IF(true, msg) 7301e04c3fSmrg#define ERROR_IF(cond, msg) \ 7401e04c3fSmrg do { \ 7501e04c3fSmrg if ((cond) && !CONTAINS(error_msg, error(msg))) { \ 7601e04c3fSmrg CAT(error_msg, error(msg)); \ 7701e04c3fSmrg } \ 7801e04c3fSmrg } while(0) 7901e04c3fSmrg 8001e04c3fSmrg#define CHECK(func, args...) \ 8101e04c3fSmrg do { \ 8201e04c3fSmrg struct string __msg = func(devinfo, inst, ##args); \ 8301e04c3fSmrg if (__msg.str) { \ 8401e04c3fSmrg cat(&error_msg, __msg); \ 8501e04c3fSmrg free(__msg.str); \ 8601e04c3fSmrg } \ 8701e04c3fSmrg } while (0) 8801e04c3fSmrg 8901e04c3fSmrg#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) 9001e04c3fSmrg#define WIDTH(width) (1 << (width)) 9101e04c3fSmrg 9201e04c3fSmrgstatic bool 937ec681f3Smrginst_is_send(const struct intel_device_info *devinfo, const brw_inst *inst) 9401e04c3fSmrg{ 9501e04c3fSmrg switch (brw_inst_opcode(devinfo, inst)) { 9601e04c3fSmrg case BRW_OPCODE_SEND: 9701e04c3fSmrg case BRW_OPCODE_SENDC: 9801e04c3fSmrg case BRW_OPCODE_SENDS: 9901e04c3fSmrg case BRW_OPCODE_SENDSC: 10001e04c3fSmrg return true; 10101e04c3fSmrg default: 10201e04c3fSmrg return false; 10301e04c3fSmrg } 10401e04c3fSmrg} 10501e04c3fSmrg 1069f464c52Smayastatic bool 1077ec681f3Smrginst_is_split_send(const struct intel_device_info *devinfo, 1087ec681f3Smrg const brw_inst *inst) 1099f464c52Smaya{ 1107ec681f3Smrg if (devinfo->ver >= 12) { 1117ec681f3Smrg return inst_is_send(devinfo, inst); 1127ec681f3Smrg } else { 1137ec681f3Smrg switch (brw_inst_opcode(devinfo, inst)) { 1147ec681f3Smrg case BRW_OPCODE_SENDS: 1157ec681f3Smrg case BRW_OPCODE_SENDSC: 1167ec681f3Smrg return true; 1177ec681f3Smrg default: 1187ec681f3Smrg return false; 1197ec681f3Smrg } 1209f464c52Smaya } 1219f464c52Smaya} 1229f464c52Smaya 12301e04c3fSmrgstatic unsigned 12401e04c3fSmrgsigned_type(unsigned type) 12501e04c3fSmrg{ 12601e04c3fSmrg switch (type) { 12701e04c3fSmrg case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D; 12801e04c3fSmrg case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W; 12901e04c3fSmrg case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B; 13001e04c3fSmrg case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q; 13101e04c3fSmrg default: return type; 13201e04c3fSmrg } 13301e04c3fSmrg} 13401e04c3fSmrg 1357ec681f3Smrgstatic enum brw_reg_type 1367ec681f3Smrginst_dst_type(const struct intel_device_info *devinfo, const brw_inst *inst) 1377ec681f3Smrg{ 1387ec681f3Smrg return (devinfo->ver < 12 || !inst_is_send(devinfo, inst)) ? 1397ec681f3Smrg brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D; 1407ec681f3Smrg} 1417ec681f3Smrg 14201e04c3fSmrgstatic bool 1437ec681f3Smrginst_is_raw_move(const struct intel_device_info *devinfo, const brw_inst *inst) 14401e04c3fSmrg{ 1457ec681f3Smrg unsigned dst_type = signed_type(inst_dst_type(devinfo, inst)); 14601e04c3fSmrg unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); 14701e04c3fSmrg 14801e04c3fSmrg if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { 14901e04c3fSmrg /* FIXME: not strictly true */ 15001e04c3fSmrg if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF || 15101e04c3fSmrg brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV || 15201e04c3fSmrg brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) { 15301e04c3fSmrg return false; 15401e04c3fSmrg } 15501e04c3fSmrg } else if (brw_inst_src0_negate(devinfo, inst) || 15601e04c3fSmrg brw_inst_src0_abs(devinfo, inst)) { 15701e04c3fSmrg return false; 15801e04c3fSmrg } 15901e04c3fSmrg 16001e04c3fSmrg return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV && 16101e04c3fSmrg brw_inst_saturate(devinfo, inst) == 0 && 16201e04c3fSmrg dst_type == src_type; 16301e04c3fSmrg} 16401e04c3fSmrg 16501e04c3fSmrgstatic bool 1667ec681f3Smrgdst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 16701e04c3fSmrg{ 16801e04c3fSmrg return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 16901e04c3fSmrg brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 17001e04c3fSmrg} 17101e04c3fSmrg 17201e04c3fSmrgstatic bool 1737ec681f3Smrgsrc0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 17401e04c3fSmrg{ 1757ec681f3Smrg return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT && 1767ec681f3Smrg brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 17701e04c3fSmrg brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 17801e04c3fSmrg} 17901e04c3fSmrg 18001e04c3fSmrgstatic bool 1817ec681f3Smrgsrc1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 18201e04c3fSmrg{ 18301e04c3fSmrg return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 18401e04c3fSmrg brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 18501e04c3fSmrg} 18601e04c3fSmrg 1879f464c52Smayastatic bool 1887ec681f3Smrgsrc0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 1899f464c52Smaya{ 1909f464c52Smaya return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 1919f464c52Smaya (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 1929f464c52Smaya} 1939f464c52Smaya 1949f464c52Smayastatic bool 1957ec681f3Smrgsrc1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 1969f464c52Smaya{ 1979f464c52Smaya return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 1989f464c52Smaya (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 1999f464c52Smaya} 2009f464c52Smaya 20101e04c3fSmrgstatic bool 2027ec681f3Smrgsrc0_has_scalar_region(const struct intel_device_info *devinfo, 2037ec681f3Smrg const brw_inst *inst) 20401e04c3fSmrg{ 20501e04c3fSmrg return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 20601e04c3fSmrg brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && 20701e04c3fSmrg brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 20801e04c3fSmrg} 20901e04c3fSmrg 21001e04c3fSmrgstatic bool 2117ec681f3Smrgsrc1_has_scalar_region(const struct intel_device_info *devinfo, 2127ec681f3Smrg const brw_inst *inst) 21301e04c3fSmrg{ 21401e04c3fSmrg return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 21501e04c3fSmrg brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && 21601e04c3fSmrg brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 21701e04c3fSmrg} 21801e04c3fSmrg 21901e04c3fSmrgstatic unsigned 2207ec681f3Smrgnum_sources_from_inst(const struct intel_device_info *devinfo, 22101e04c3fSmrg const brw_inst *inst) 22201e04c3fSmrg{ 22301e04c3fSmrg const struct opcode_desc *desc = 22401e04c3fSmrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 22501e04c3fSmrg unsigned math_function; 22601e04c3fSmrg 22701e04c3fSmrg if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 22801e04c3fSmrg math_function = brw_inst_math_function(devinfo, inst); 2297ec681f3Smrg } else if (devinfo->ver < 6 && 23001e04c3fSmrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { 23101e04c3fSmrg if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { 23201e04c3fSmrg /* src1 must be a descriptor (including the information to determine 23301e04c3fSmrg * that the SEND is doing an extended math operation), but src0 can 23401e04c3fSmrg * actually be null since it serves as the source of the implicit GRF 23501e04c3fSmrg * to MRF move. 23601e04c3fSmrg * 23701e04c3fSmrg * If we stop using that functionality, we'll have to revisit this. 23801e04c3fSmrg */ 23901e04c3fSmrg return 2; 24001e04c3fSmrg } else { 24101e04c3fSmrg /* Send instructions are allowed to have null sources since they use 24201e04c3fSmrg * the base_mrf field to specify which message register source. 24301e04c3fSmrg */ 24401e04c3fSmrg return 0; 24501e04c3fSmrg } 24601e04c3fSmrg } else { 24701e04c3fSmrg assert(desc->nsrc < 4); 24801e04c3fSmrg return desc->nsrc; 24901e04c3fSmrg } 25001e04c3fSmrg 25101e04c3fSmrg switch (math_function) { 25201e04c3fSmrg case BRW_MATH_FUNCTION_INV: 25301e04c3fSmrg case BRW_MATH_FUNCTION_LOG: 25401e04c3fSmrg case BRW_MATH_FUNCTION_EXP: 25501e04c3fSmrg case BRW_MATH_FUNCTION_SQRT: 25601e04c3fSmrg case BRW_MATH_FUNCTION_RSQ: 25701e04c3fSmrg case BRW_MATH_FUNCTION_SIN: 25801e04c3fSmrg case BRW_MATH_FUNCTION_COS: 25901e04c3fSmrg case BRW_MATH_FUNCTION_SINCOS: 2607ec681f3Smrg case GFX8_MATH_FUNCTION_INVM: 2617ec681f3Smrg case GFX8_MATH_FUNCTION_RSQRTM: 26201e04c3fSmrg return 1; 26301e04c3fSmrg case BRW_MATH_FUNCTION_FDIV: 26401e04c3fSmrg case BRW_MATH_FUNCTION_POW: 26501e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 26601e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 26701e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 26801e04c3fSmrg return 2; 26901e04c3fSmrg default: 27001e04c3fSmrg unreachable("not reached"); 27101e04c3fSmrg } 27201e04c3fSmrg} 27301e04c3fSmrg 27401e04c3fSmrgstatic struct string 2757ec681f3Smrginvalid_values(const struct intel_device_info *devinfo, const brw_inst *inst) 2767ec681f3Smrg{ 2777ec681f3Smrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 2787ec681f3Smrg struct string error_msg = { .str = NULL, .len = 0 }; 2797ec681f3Smrg 2807ec681f3Smrg switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) { 2817ec681f3Smrg case BRW_EXECUTE_1: 2827ec681f3Smrg case BRW_EXECUTE_2: 2837ec681f3Smrg case BRW_EXECUTE_4: 2847ec681f3Smrg case BRW_EXECUTE_8: 2857ec681f3Smrg case BRW_EXECUTE_16: 2867ec681f3Smrg case BRW_EXECUTE_32: 2877ec681f3Smrg break; 2887ec681f3Smrg default: 2897ec681f3Smrg ERROR("invalid execution size"); 2907ec681f3Smrg break; 2917ec681f3Smrg } 2927ec681f3Smrg 2937ec681f3Smrg if (inst_is_send(devinfo, inst)) 2947ec681f3Smrg return error_msg; 2957ec681f3Smrg 2967ec681f3Smrg if (num_sources == 3) { 2977ec681f3Smrg /* Nothing to test: 2987ec681f3Smrg * No 3-src instructions on Gfx4-5 2997ec681f3Smrg * No reg file bits on Gfx6-10 (align16) 3007ec681f3Smrg * No invalid encodings on Gfx10-12 (align1) 3017ec681f3Smrg */ 3027ec681f3Smrg } else { 3037ec681f3Smrg if (devinfo->ver > 6) { 3047ec681f3Smrg ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF || 3057ec681f3Smrg (num_sources > 0 && 3067ec681f3Smrg brw_inst_src0_reg_file(devinfo, inst) == MRF) || 3077ec681f3Smrg (num_sources > 1 && 3087ec681f3Smrg brw_inst_src1_reg_file(devinfo, inst) == MRF), 3097ec681f3Smrg "invalid register file encoding"); 3107ec681f3Smrg } 3117ec681f3Smrg } 3127ec681f3Smrg 3137ec681f3Smrg if (error_msg.str) 3147ec681f3Smrg return error_msg; 3157ec681f3Smrg 3167ec681f3Smrg if (num_sources == 3) { 3177ec681f3Smrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 3187ec681f3Smrg if (devinfo->ver >= 10) { 3197ec681f3Smrg ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE || 3207ec681f3Smrg brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE || 3217ec681f3Smrg brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE || 3227ec681f3Smrg brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE, 3237ec681f3Smrg "invalid register type encoding"); 3247ec681f3Smrg } else { 3257ec681f3Smrg ERROR("Align1 mode not allowed on Gen < 10"); 3267ec681f3Smrg } 3277ec681f3Smrg } else { 3287ec681f3Smrg ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE || 3297ec681f3Smrg brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE, 3307ec681f3Smrg "invalid register type encoding"); 3317ec681f3Smrg } 3327ec681f3Smrg } else { 3337ec681f3Smrg ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE || 3347ec681f3Smrg (num_sources > 0 && 3357ec681f3Smrg brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) || 3367ec681f3Smrg (num_sources > 1 && 3377ec681f3Smrg brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE), 3387ec681f3Smrg "invalid register type encoding"); 3397ec681f3Smrg } 3407ec681f3Smrg 3417ec681f3Smrg return error_msg; 3427ec681f3Smrg} 3437ec681f3Smrg 3447ec681f3Smrgstatic struct string 3457ec681f3Smrgsources_not_null(const struct intel_device_info *devinfo, 34601e04c3fSmrg const brw_inst *inst) 34701e04c3fSmrg{ 34801e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 34901e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 35001e04c3fSmrg 35101e04c3fSmrg /* Nothing to test. 3-src instructions can only have GRF sources, and 35201e04c3fSmrg * there's no bit to control the file. 35301e04c3fSmrg */ 35401e04c3fSmrg if (num_sources == 3) 35501e04c3fSmrg return (struct string){}; 35601e04c3fSmrg 3579f464c52Smaya /* Nothing to test. Split sends can only encode a file in sources that are 3589f464c52Smaya * allowed to be NULL. 3599f464c52Smaya */ 3609f464c52Smaya if (inst_is_split_send(devinfo, inst)) 3619f464c52Smaya return (struct string){}; 3629f464c52Smaya 3637ec681f3Smrg if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC) 36401e04c3fSmrg ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); 36501e04c3fSmrg 36601e04c3fSmrg if (num_sources == 2) 36701e04c3fSmrg ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); 36801e04c3fSmrg 36901e04c3fSmrg return error_msg; 37001e04c3fSmrg} 37101e04c3fSmrg 3729f464c52Smayastatic struct string 3737ec681f3Smrgalignment_supported(const struct intel_device_info *devinfo, 3749f464c52Smaya const brw_inst *inst) 3759f464c52Smaya{ 3769f464c52Smaya struct string error_msg = { .str = NULL, .len = 0 }; 3779f464c52Smaya 3787ec681f3Smrg ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, 3799f464c52Smaya "Align16 not supported"); 3809f464c52Smaya 3819f464c52Smaya return error_msg; 3829f464c52Smaya} 3839f464c52Smaya 3849f464c52Smayastatic bool 3857ec681f3Smrginst_uses_src_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 3869f464c52Smaya{ 3879f464c52Smaya /* Check instructions that use implicit accumulator sources */ 3889f464c52Smaya switch (brw_inst_opcode(devinfo, inst)) { 3899f464c52Smaya case BRW_OPCODE_MAC: 3909f464c52Smaya case BRW_OPCODE_MACH: 3919f464c52Smaya case BRW_OPCODE_SADA2: 3929f464c52Smaya return true; 3937ec681f3Smrg default: 3947ec681f3Smrg break; 3959f464c52Smaya } 3969f464c52Smaya 3979f464c52Smaya /* FIXME: support 3-src instructions */ 3989f464c52Smaya unsigned num_sources = num_sources_from_inst(devinfo, inst); 3999f464c52Smaya assert(num_sources < 3); 4009f464c52Smaya 4019f464c52Smaya return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); 4029f464c52Smaya} 4039f464c52Smaya 40401e04c3fSmrgstatic struct string 4057ec681f3Smrgsend_restrictions(const struct intel_device_info *devinfo, 40601e04c3fSmrg const brw_inst *inst) 40701e04c3fSmrg{ 40801e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 40901e04c3fSmrg 4109f464c52Smaya if (inst_is_split_send(devinfo, inst)) { 4119f464c52Smaya ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 4129f464c52Smaya brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, 4139f464c52Smaya "src1 of split send must be a GRF or NULL"); 4149f464c52Smaya 4159f464c52Smaya ERROR_IF(brw_inst_eot(devinfo, inst) && 4169f464c52Smaya brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 4179f464c52Smaya "send with EOT must use g112-g127"); 4189f464c52Smaya ERROR_IF(brw_inst_eot(devinfo, inst) && 4199f464c52Smaya brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && 4209f464c52Smaya brw_inst_send_src1_reg_nr(devinfo, inst) < 112, 4219f464c52Smaya "send with EOT must use g112-g127"); 4229f464c52Smaya 4239f464c52Smaya if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { 4249f464c52Smaya /* Assume minimums if we don't know */ 4259f464c52Smaya unsigned mlen = 1; 4269f464c52Smaya if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { 4279f464c52Smaya const uint32_t desc = brw_inst_send_desc(devinfo, inst); 4289f464c52Smaya mlen = brw_message_desc_mlen(devinfo, desc); 4299f464c52Smaya } 4309f464c52Smaya 4319f464c52Smaya unsigned ex_mlen = 1; 4329f464c52Smaya if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { 4337ec681f3Smrg const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst); 4349f464c52Smaya ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); 4359f464c52Smaya } 4369f464c52Smaya const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); 4379f464c52Smaya const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); 4389f464c52Smaya ERROR_IF((src0_reg_nr <= src1_reg_nr && 4399f464c52Smaya src1_reg_nr < src0_reg_nr + mlen) || 4409f464c52Smaya (src1_reg_nr <= src0_reg_nr && 4419f464c52Smaya src0_reg_nr < src1_reg_nr + ex_mlen), 4429f464c52Smaya "split send payloads must not overlap"); 4439f464c52Smaya } 4449f464c52Smaya } else if (inst_is_send(devinfo, inst)) { 44501e04c3fSmrg ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, 44601e04c3fSmrg "send must use direct addressing"); 44701e04c3fSmrg 4487ec681f3Smrg if (devinfo->ver >= 7) { 4497ec681f3Smrg ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE, 4507ec681f3Smrg "send from non-GRF"); 45101e04c3fSmrg ERROR_IF(brw_inst_eot(devinfo, inst) && 45201e04c3fSmrg brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 45301e04c3fSmrg "send with EOT must use g112-g127"); 45401e04c3fSmrg } 45501e04c3fSmrg 4567ec681f3Smrg if (devinfo->ver >= 8) { 45701e04c3fSmrg ERROR_IF(!dst_is_null(devinfo, inst) && 45801e04c3fSmrg (brw_inst_dst_da_reg_nr(devinfo, inst) + 45901e04c3fSmrg brw_inst_rlen(devinfo, inst) > 127) && 46001e04c3fSmrg (brw_inst_src0_da_reg_nr(devinfo, inst) + 46101e04c3fSmrg brw_inst_mlen(devinfo, inst) > 46201e04c3fSmrg brw_inst_dst_da_reg_nr(devinfo, inst)), 46301e04c3fSmrg "r127 must not be used for return address when there is " 46401e04c3fSmrg "a src and dest overlap"); 46501e04c3fSmrg } 46601e04c3fSmrg } 46701e04c3fSmrg 46801e04c3fSmrg return error_msg; 46901e04c3fSmrg} 47001e04c3fSmrg 47101e04c3fSmrgstatic bool 4727ec681f3Smrgis_unsupported_inst(const struct intel_device_info *devinfo, 47301e04c3fSmrg const brw_inst *inst) 47401e04c3fSmrg{ 4757ec681f3Smrg return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL; 47601e04c3fSmrg} 47701e04c3fSmrg 4789f464c52Smaya/** 4799f464c52Smaya * Returns whether a combination of two types would qualify as mixed float 4809f464c52Smaya * operation mode 4819f464c52Smaya */ 4829f464c52Smayastatic inline bool 4839f464c52Smayatypes_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) 4849f464c52Smaya{ 4859f464c52Smaya return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) || 4869f464c52Smaya (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF); 4879f464c52Smaya} 4889f464c52Smaya 48901e04c3fSmrgstatic enum brw_reg_type 49001e04c3fSmrgexecution_type_for_type(enum brw_reg_type type) 49101e04c3fSmrg{ 49201e04c3fSmrg switch (type) { 49301e04c3fSmrg case BRW_REGISTER_TYPE_NF: 49401e04c3fSmrg case BRW_REGISTER_TYPE_DF: 49501e04c3fSmrg case BRW_REGISTER_TYPE_F: 49601e04c3fSmrg case BRW_REGISTER_TYPE_HF: 49701e04c3fSmrg return type; 49801e04c3fSmrg 49901e04c3fSmrg case BRW_REGISTER_TYPE_VF: 50001e04c3fSmrg return BRW_REGISTER_TYPE_F; 50101e04c3fSmrg 50201e04c3fSmrg case BRW_REGISTER_TYPE_Q: 50301e04c3fSmrg case BRW_REGISTER_TYPE_UQ: 50401e04c3fSmrg return BRW_REGISTER_TYPE_Q; 50501e04c3fSmrg 50601e04c3fSmrg case BRW_REGISTER_TYPE_D: 50701e04c3fSmrg case BRW_REGISTER_TYPE_UD: 50801e04c3fSmrg return BRW_REGISTER_TYPE_D; 50901e04c3fSmrg 51001e04c3fSmrg case BRW_REGISTER_TYPE_W: 51101e04c3fSmrg case BRW_REGISTER_TYPE_UW: 51201e04c3fSmrg case BRW_REGISTER_TYPE_B: 51301e04c3fSmrg case BRW_REGISTER_TYPE_UB: 51401e04c3fSmrg case BRW_REGISTER_TYPE_V: 51501e04c3fSmrg case BRW_REGISTER_TYPE_UV: 51601e04c3fSmrg return BRW_REGISTER_TYPE_W; 51701e04c3fSmrg } 51801e04c3fSmrg unreachable("not reached"); 51901e04c3fSmrg} 52001e04c3fSmrg 52101e04c3fSmrg/** 52201e04c3fSmrg * Returns the execution type of an instruction \p inst 52301e04c3fSmrg */ 52401e04c3fSmrgstatic enum brw_reg_type 5257ec681f3Smrgexecution_type(const struct intel_device_info *devinfo, const brw_inst *inst) 52601e04c3fSmrg{ 52701e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 52801e04c3fSmrg enum brw_reg_type src0_exec_type, src1_exec_type; 52901e04c3fSmrg 53001e04c3fSmrg /* Execution data type is independent of destination data type, except in 5319f464c52Smaya * mixed F/HF instructions. 53201e04c3fSmrg */ 5337ec681f3Smrg enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst); 53401e04c3fSmrg 53501e04c3fSmrg src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); 53601e04c3fSmrg if (num_sources == 1) { 5379f464c52Smaya if (src0_exec_type == BRW_REGISTER_TYPE_HF) 53801e04c3fSmrg return dst_exec_type; 53901e04c3fSmrg return src0_exec_type; 54001e04c3fSmrg } 54101e04c3fSmrg 54201e04c3fSmrg src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); 5439f464c52Smaya if (types_are_mixed_float(src0_exec_type, src1_exec_type) || 5449f464c52Smaya types_are_mixed_float(src0_exec_type, dst_exec_type) || 5459f464c52Smaya types_are_mixed_float(src1_exec_type, dst_exec_type)) { 5469f464c52Smaya return BRW_REGISTER_TYPE_F; 5479f464c52Smaya } 5489f464c52Smaya 54901e04c3fSmrg if (src0_exec_type == src1_exec_type) 55001e04c3fSmrg return src0_exec_type; 55101e04c3fSmrg 5527ec681f3Smrg if (src0_exec_type == BRW_REGISTER_TYPE_NF || 5537ec681f3Smrg src1_exec_type == BRW_REGISTER_TYPE_NF) 5547ec681f3Smrg return BRW_REGISTER_TYPE_NF; 5557ec681f3Smrg 55601e04c3fSmrg /* Mixed operand types where one is float is float on Gen < 6 55701e04c3fSmrg * (and not allowed on later platforms) 55801e04c3fSmrg */ 5597ec681f3Smrg if (devinfo->ver < 6 && 56001e04c3fSmrg (src0_exec_type == BRW_REGISTER_TYPE_F || 56101e04c3fSmrg src1_exec_type == BRW_REGISTER_TYPE_F)) 56201e04c3fSmrg return BRW_REGISTER_TYPE_F; 56301e04c3fSmrg 56401e04c3fSmrg if (src0_exec_type == BRW_REGISTER_TYPE_Q || 56501e04c3fSmrg src1_exec_type == BRW_REGISTER_TYPE_Q) 56601e04c3fSmrg return BRW_REGISTER_TYPE_Q; 56701e04c3fSmrg 56801e04c3fSmrg if (src0_exec_type == BRW_REGISTER_TYPE_D || 56901e04c3fSmrg src1_exec_type == BRW_REGISTER_TYPE_D) 57001e04c3fSmrg return BRW_REGISTER_TYPE_D; 57101e04c3fSmrg 57201e04c3fSmrg if (src0_exec_type == BRW_REGISTER_TYPE_W || 57301e04c3fSmrg src1_exec_type == BRW_REGISTER_TYPE_W) 57401e04c3fSmrg return BRW_REGISTER_TYPE_W; 57501e04c3fSmrg 57601e04c3fSmrg if (src0_exec_type == BRW_REGISTER_TYPE_DF || 57701e04c3fSmrg src1_exec_type == BRW_REGISTER_TYPE_DF) 57801e04c3fSmrg return BRW_REGISTER_TYPE_DF; 57901e04c3fSmrg 5809f464c52Smaya unreachable("not reached"); 58101e04c3fSmrg} 58201e04c3fSmrg 58301e04c3fSmrg/** 58401e04c3fSmrg * Returns whether a region is packed 58501e04c3fSmrg * 58601e04c3fSmrg * A region is packed if its elements are adjacent in memory, with no 58701e04c3fSmrg * intervening space, no overlap, and no replicated values. 58801e04c3fSmrg */ 58901e04c3fSmrgstatic bool 59001e04c3fSmrgis_packed(unsigned vstride, unsigned width, unsigned hstride) 59101e04c3fSmrg{ 59201e04c3fSmrg if (vstride == width) { 59301e04c3fSmrg if (vstride == 1) { 59401e04c3fSmrg return hstride == 0; 59501e04c3fSmrg } else { 59601e04c3fSmrg return hstride == 1; 59701e04c3fSmrg } 59801e04c3fSmrg } 59901e04c3fSmrg 60001e04c3fSmrg return false; 60101e04c3fSmrg} 60201e04c3fSmrg 6039f464c52Smaya/** 6049f464c52Smaya * Returns whether an instruction is an explicit or implicit conversion 6059f464c52Smaya * to/from half-float. 6069f464c52Smaya */ 6079f464c52Smayastatic bool 6087ec681f3Smrgis_half_float_conversion(const struct intel_device_info *devinfo, 6099f464c52Smaya const brw_inst *inst) 6109f464c52Smaya{ 6119f464c52Smaya enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 6129f464c52Smaya 6139f464c52Smaya unsigned num_sources = num_sources_from_inst(devinfo, inst); 6149f464c52Smaya enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 6159f464c52Smaya 6169f464c52Smaya if (dst_type != src0_type && 6179f464c52Smaya (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { 6189f464c52Smaya return true; 6199f464c52Smaya } else if (num_sources > 1) { 6209f464c52Smaya enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 6219f464c52Smaya return dst_type != src1_type && 6229f464c52Smaya (dst_type == BRW_REGISTER_TYPE_HF || 6239f464c52Smaya src1_type == BRW_REGISTER_TYPE_HF); 6249f464c52Smaya } 6259f464c52Smaya 6269f464c52Smaya return false; 6279f464c52Smaya} 6289f464c52Smaya 6299f464c52Smaya/* 6309f464c52Smaya * Returns whether an instruction is using mixed float operation mode 6319f464c52Smaya */ 6329f464c52Smayastatic bool 6337ec681f3Smrgis_mixed_float(const struct intel_device_info *devinfo, const brw_inst *inst) 6349f464c52Smaya{ 6357ec681f3Smrg if (devinfo->ver < 8) 6369f464c52Smaya return false; 6379f464c52Smaya 6389f464c52Smaya if (inst_is_send(devinfo, inst)) 6399f464c52Smaya return false; 6409f464c52Smaya 6419f464c52Smaya unsigned opcode = brw_inst_opcode(devinfo, inst); 6429f464c52Smaya const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); 6439f464c52Smaya if (desc->ndst == 0) 6449f464c52Smaya return false; 6459f464c52Smaya 6469f464c52Smaya /* FIXME: support 3-src instructions */ 6479f464c52Smaya unsigned num_sources = num_sources_from_inst(devinfo, inst); 6489f464c52Smaya assert(num_sources < 3); 6499f464c52Smaya 6509f464c52Smaya enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 6519f464c52Smaya enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 6529f464c52Smaya 6539f464c52Smaya if (num_sources == 1) 6549f464c52Smaya return types_are_mixed_float(src0_type, dst_type); 6559f464c52Smaya 6569f464c52Smaya enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 6579f464c52Smaya 6589f464c52Smaya return types_are_mixed_float(src0_type, src1_type) || 6599f464c52Smaya types_are_mixed_float(src0_type, dst_type) || 6609f464c52Smaya types_are_mixed_float(src1_type, dst_type); 6619f464c52Smaya} 6629f464c52Smaya 6639f464c52Smaya/** 6649f464c52Smaya * Returns whether an instruction is an explicit or implicit conversion 6659f464c52Smaya * to/from byte. 6669f464c52Smaya */ 6679f464c52Smayastatic bool 6687ec681f3Smrgis_byte_conversion(const struct intel_device_info *devinfo, 6699f464c52Smaya const brw_inst *inst) 6709f464c52Smaya{ 6719f464c52Smaya enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 6729f464c52Smaya 6739f464c52Smaya unsigned num_sources = num_sources_from_inst(devinfo, inst); 6749f464c52Smaya enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 6759f464c52Smaya 6769f464c52Smaya if (dst_type != src0_type && 6779f464c52Smaya (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { 6789f464c52Smaya return true; 6799f464c52Smaya } else if (num_sources > 1) { 6809f464c52Smaya enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 6819f464c52Smaya return dst_type != src1_type && 6829f464c52Smaya (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); 6839f464c52Smaya } 6849f464c52Smaya 6859f464c52Smaya return false; 6869f464c52Smaya} 6879f464c52Smaya 68801e04c3fSmrg/** 68901e04c3fSmrg * Checks restrictions listed in "General Restrictions Based on Operand Types" 69001e04c3fSmrg * in the "Register Region Restrictions" section. 69101e04c3fSmrg */ 69201e04c3fSmrgstatic struct string 6937ec681f3Smrggeneral_restrictions_based_on_operand_types(const struct intel_device_info *devinfo, 69401e04c3fSmrg const brw_inst *inst) 69501e04c3fSmrg{ 69601e04c3fSmrg const struct opcode_desc *desc = 69701e04c3fSmrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 69801e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 69901e04c3fSmrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 70001e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 70101e04c3fSmrg 7027ec681f3Smrg if (inst_is_send(devinfo, inst)) 7037ec681f3Smrg return error_msg; 7047ec681f3Smrg 7057ec681f3Smrg if (devinfo->ver >= 11) { 7069f464c52Smaya if (num_sources == 3) { 7079f464c52Smaya ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || 7089f464c52Smaya brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, 7099f464c52Smaya "Byte data type is not supported for src1/2 register regioning. This includes " 7109f464c52Smaya "byte broadcast as well."); 7119f464c52Smaya } 7129f464c52Smaya if (num_sources == 2) { 7139f464c52Smaya ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, 7149f464c52Smaya "Byte data type is not supported for src1 register regioning. This includes " 7159f464c52Smaya "byte broadcast as well."); 7169f464c52Smaya } 7179f464c52Smaya } 7189f464c52Smaya 71901e04c3fSmrg if (num_sources == 3) 7209f464c52Smaya return error_msg; 72101e04c3fSmrg 72201e04c3fSmrg if (exec_size == 1) 7239f464c52Smaya return error_msg; 72401e04c3fSmrg 72501e04c3fSmrg if (desc->ndst == 0) 7269f464c52Smaya return error_msg; 72701e04c3fSmrg 72801e04c3fSmrg /* The PRMs say: 72901e04c3fSmrg * 73001e04c3fSmrg * Where n is the largest element size in bytes for any source or 73101e04c3fSmrg * destination operand type, ExecSize * n must be <= 64. 73201e04c3fSmrg * 73301e04c3fSmrg * But we do not attempt to enforce it, because it is implied by other 73401e04c3fSmrg * rules: 73501e04c3fSmrg * 73601e04c3fSmrg * - that the destination stride must match the execution data type 73701e04c3fSmrg * - sources may not span more than two adjacent GRF registers 73801e04c3fSmrg * - destination may not span more than two adjacent GRF registers 73901e04c3fSmrg * 74001e04c3fSmrg * In fact, checking it would weaken testing of the other rules. 74101e04c3fSmrg */ 74201e04c3fSmrg 74301e04c3fSmrg unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 7447ec681f3Smrg enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 74501e04c3fSmrg bool dst_type_is_byte = 7467ec681f3Smrg inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || 7477ec681f3Smrg inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; 74801e04c3fSmrg 74901e04c3fSmrg if (dst_type_is_byte) { 75001e04c3fSmrg if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { 7519f464c52Smaya if (!inst_is_raw_move(devinfo, inst)) 75201e04c3fSmrg ERROR("Only raw MOV supports a packed-byte destination"); 7539f464c52Smaya return error_msg; 75401e04c3fSmrg } 75501e04c3fSmrg } 75601e04c3fSmrg 75701e04c3fSmrg unsigned exec_type = execution_type(devinfo, inst); 75801e04c3fSmrg unsigned exec_type_size = brw_reg_type_to_size(exec_type); 75901e04c3fSmrg unsigned dst_type_size = brw_reg_type_to_size(dst_type); 76001e04c3fSmrg 76101e04c3fSmrg /* On IVB/BYT, region parameters and execution size for DF are in terms of 76201e04c3fSmrg * 32-bit elements, so they are doubled. For evaluating the validity of an 76301e04c3fSmrg * instruction, we halve them. 76401e04c3fSmrg */ 7657ec681f3Smrg if (devinfo->verx10 == 70 && 76601e04c3fSmrg exec_type_size == 8 && dst_type_size == 4) 76701e04c3fSmrg dst_type_size = 8; 76801e04c3fSmrg 7699f464c52Smaya if (is_byte_conversion(devinfo, inst)) { 7709f464c52Smaya /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 7719f464c52Smaya * 7729f464c52Smaya * "There is no direct conversion from B/UB to DF or DF to B/UB. 7739f464c52Smaya * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." 7749f464c52Smaya * 7759f464c52Smaya * Even if these restrictions are listed for the MOV instruction, we 7769f464c52Smaya * validate this more generally, since there is the possibility 7779f464c52Smaya * of implicit conversions from other instructions. 7789f464c52Smaya */ 7799f464c52Smaya enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 7809f464c52Smaya enum brw_reg_type src1_type = num_sources > 1 ? 7819f464c52Smaya brw_inst_src1_type(devinfo, inst) : 0; 7829f464c52Smaya 7839f464c52Smaya ERROR_IF(type_sz(dst_type) == 1 && 7849f464c52Smaya (type_sz(src0_type) == 8 || 7859f464c52Smaya (num_sources > 1 && type_sz(src1_type) == 8)), 7869f464c52Smaya "There are no direct conversions between 64-bit types and B/UB"); 7879f464c52Smaya 7889f464c52Smaya ERROR_IF(type_sz(dst_type) == 8 && 7899f464c52Smaya (type_sz(src0_type) == 1 || 7909f464c52Smaya (num_sources > 1 && type_sz(src1_type) == 1)), 7919f464c52Smaya "There are no direct conversions between 64-bit types and B/UB"); 7929f464c52Smaya } 7939f464c52Smaya 7949f464c52Smaya if (is_half_float_conversion(devinfo, inst)) { 7959f464c52Smaya /** 7969f464c52Smaya * A helper to validate used in the validation of the following restriction 7979f464c52Smaya * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 7989f464c52Smaya * 7999f464c52Smaya * "There is no direct conversion from HF to DF or DF to HF. 8009f464c52Smaya * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." 8019f464c52Smaya * 8029f464c52Smaya * Even if these restrictions are listed for the MOV instruction, we 8039f464c52Smaya * validate this more generally, since there is the possibility 8049f464c52Smaya * of implicit conversions from other instructions, such us implicit 8059f464c52Smaya * conversion from integer to HF with the ADD instruction in SKL+. 8069f464c52Smaya */ 8079f464c52Smaya enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 8089f464c52Smaya enum brw_reg_type src1_type = num_sources > 1 ? 8099f464c52Smaya brw_inst_src1_type(devinfo, inst) : 0; 8109f464c52Smaya ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && 8119f464c52Smaya (type_sz(src0_type) == 8 || 8129f464c52Smaya (num_sources > 1 && type_sz(src1_type) == 8)), 8139f464c52Smaya "There are no direct conversions between 64-bit types and HF"); 8149f464c52Smaya 8159f464c52Smaya ERROR_IF(type_sz(dst_type) == 8 && 8169f464c52Smaya (src0_type == BRW_REGISTER_TYPE_HF || 8179f464c52Smaya (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), 8189f464c52Smaya "There are no direct conversions between 64-bit types and HF"); 8199f464c52Smaya 8209f464c52Smaya /* From the BDW+ PRM: 8219f464c52Smaya * 8229f464c52Smaya * "Conversion between Integer and HF (Half Float) must be 8239f464c52Smaya * DWord-aligned and strided by a DWord on the destination." 8249f464c52Smaya * 8259f464c52Smaya * Also, the above restrictions seems to be expanded on CHV and SKL+ by: 8269f464c52Smaya * 8279f464c52Smaya * "There is a relaxed alignment rule for word destinations. When 8289f464c52Smaya * the destination type is word (UW, W, HF), destination data types 8299f464c52Smaya * can be aligned to either the lowest word or the second lowest 8309f464c52Smaya * word of the execution channel. This means the destination data 8319f464c52Smaya * words can be either all in the even word locations or all in the 8329f464c52Smaya * odd word locations." 8339f464c52Smaya * 8349f464c52Smaya * We do not implement the second rule as is though, since empirical 8359f464c52Smaya * testing shows inconsistencies: 8369f464c52Smaya * - It suggests that packed 16-bit is not allowed, which is not true. 8379f464c52Smaya * - It suggests that conversions from Q/DF to W (which need to be 8389f464c52Smaya * 64-bit aligned on the destination) are not possible, which is 8399f464c52Smaya * not true. 8409f464c52Smaya * 8419f464c52Smaya * So from this rule we only validate the implication that conversions 8429f464c52Smaya * from F to HF need to be DWord strided (except in Align1 mixed 8439f464c52Smaya * float mode where packed fp16 destination is allowed so long as the 8449f464c52Smaya * destination is oword-aligned). 8459f464c52Smaya * 8469f464c52Smaya * Finally, we only validate this for Align1 because Align16 always 8479f464c52Smaya * requires packed destinations, so these restrictions can't possibly 8489f464c52Smaya * apply to Align16 mode. 8499f464c52Smaya */ 8509f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 8519f464c52Smaya if ((dst_type == BRW_REGISTER_TYPE_HF && 8529f464c52Smaya (brw_reg_type_is_integer(src0_type) || 8539f464c52Smaya (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || 8549f464c52Smaya (brw_reg_type_is_integer(dst_type) && 8559f464c52Smaya (src0_type == BRW_REGISTER_TYPE_HF || 8569f464c52Smaya (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { 8579f464c52Smaya ERROR_IF(dst_stride * dst_type_size != 4, 8589f464c52Smaya "Conversions between integer and half-float must be " 8599f464c52Smaya "strided by a DWord on the destination"); 8609f464c52Smaya 8619f464c52Smaya unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 8629f464c52Smaya ERROR_IF(subreg % 4 != 0, 8639f464c52Smaya "Conversions between integer and half-float must be " 8649f464c52Smaya "aligned to a DWord on the destination"); 8657ec681f3Smrg } else if ((devinfo->is_cherryview || devinfo->ver >= 9) && 8669f464c52Smaya dst_type == BRW_REGISTER_TYPE_HF) { 8679f464c52Smaya unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 8689f464c52Smaya ERROR_IF(dst_stride != 2 && 8699f464c52Smaya !(is_mixed_float(devinfo, inst) && 8709f464c52Smaya dst_stride == 1 && subreg % 16 == 0), 8719f464c52Smaya "Conversions to HF must have either all words in even " 8729f464c52Smaya "word locations or all words in odd word locations or " 8739f464c52Smaya "be mixed-float with Oword-aligned packed destination"); 8749f464c52Smaya } 8759f464c52Smaya } 8769f464c52Smaya } 8779f464c52Smaya 8789f464c52Smaya /* There are special regioning rules for mixed-float mode in CHV and SKL that 8799f464c52Smaya * override the general rule for the ratio of sizes of the destination type 8809f464c52Smaya * and the execution type. We will add validation for those in a later patch. 8819f464c52Smaya */ 8829f464c52Smaya bool validate_dst_size_and_exec_size_ratio = 8839f464c52Smaya !is_mixed_float(devinfo, inst) || 8847ec681f3Smrg !(devinfo->is_cherryview || devinfo->ver >= 9); 8859f464c52Smaya 8869f464c52Smaya if (validate_dst_size_and_exec_size_ratio && 8879f464c52Smaya exec_type_size > dst_type_size) { 88801e04c3fSmrg if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) { 88901e04c3fSmrg ERROR_IF(dst_stride * dst_type_size != exec_type_size, 89001e04c3fSmrg "Destination stride must be equal to the ratio of the sizes " 89101e04c3fSmrg "of the execution data type to the destination type"); 89201e04c3fSmrg } 89301e04c3fSmrg 89401e04c3fSmrg unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 89501e04c3fSmrg 89601e04c3fSmrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 89701e04c3fSmrg brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { 89801e04c3fSmrg /* The i965 PRM says: 89901e04c3fSmrg * 90001e04c3fSmrg * Implementation Restriction: The relaxed alignment rule for byte 90101e04c3fSmrg * destination (#10.5) is not supported. 90201e04c3fSmrg */ 9037ec681f3Smrg if ((devinfo->ver > 4 || devinfo->is_g4x) && dst_type_is_byte) { 90401e04c3fSmrg ERROR_IF(subreg % exec_type_size != 0 && 90501e04c3fSmrg subreg % exec_type_size != 1, 90601e04c3fSmrg "Destination subreg must be aligned to the size of the " 90701e04c3fSmrg "execution data type (or to the next lowest byte for byte " 90801e04c3fSmrg "destinations)"); 90901e04c3fSmrg } else { 91001e04c3fSmrg ERROR_IF(subreg % exec_type_size != 0, 91101e04c3fSmrg "Destination subreg must be aligned to the size of the " 91201e04c3fSmrg "execution data type"); 91301e04c3fSmrg } 91401e04c3fSmrg } 91501e04c3fSmrg } 91601e04c3fSmrg 91701e04c3fSmrg return error_msg; 91801e04c3fSmrg} 91901e04c3fSmrg 92001e04c3fSmrg/** 92101e04c3fSmrg * Checks restrictions listed in "General Restrictions on Regioning Parameters" 92201e04c3fSmrg * in the "Register Region Restrictions" section. 92301e04c3fSmrg */ 92401e04c3fSmrgstatic struct string 9257ec681f3Smrggeneral_restrictions_on_region_parameters(const struct intel_device_info *devinfo, 92601e04c3fSmrg const brw_inst *inst) 92701e04c3fSmrg{ 92801e04c3fSmrg const struct opcode_desc *desc = 92901e04c3fSmrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 93001e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 93101e04c3fSmrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 93201e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 93301e04c3fSmrg 93401e04c3fSmrg if (num_sources == 3) 93501e04c3fSmrg return (struct string){}; 93601e04c3fSmrg 9379f464c52Smaya /* Split sends don't have the bits in the instruction to encode regions so 9389f464c52Smaya * there's nothing to check. 9399f464c52Smaya */ 9409f464c52Smaya if (inst_is_split_send(devinfo, inst)) 9419f464c52Smaya return (struct string){}; 9429f464c52Smaya 94301e04c3fSmrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { 94401e04c3fSmrg if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) 94501e04c3fSmrg ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, 94601e04c3fSmrg "Destination Horizontal Stride must be 1"); 94701e04c3fSmrg 94801e04c3fSmrg if (num_sources >= 1) { 9497ec681f3Smrg if (devinfo->verx10 >= 75) { 95001e04c3fSmrg ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 95101e04c3fSmrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 95201e04c3fSmrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 95301e04c3fSmrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 95401e04c3fSmrg "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 95501e04c3fSmrg } else { 95601e04c3fSmrg ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 95701e04c3fSmrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 95801e04c3fSmrg brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 95901e04c3fSmrg "In Align16 mode, only VertStride of 0 or 4 is allowed"); 96001e04c3fSmrg } 96101e04c3fSmrg } 96201e04c3fSmrg 96301e04c3fSmrg if (num_sources == 2) { 9647ec681f3Smrg if (devinfo->verx10 >= 75) { 96501e04c3fSmrg ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 96601e04c3fSmrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 96701e04c3fSmrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 96801e04c3fSmrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 96901e04c3fSmrg "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 97001e04c3fSmrg } else { 97101e04c3fSmrg ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 97201e04c3fSmrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 97301e04c3fSmrg brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 97401e04c3fSmrg "In Align16 mode, only VertStride of 0 or 4 is allowed"); 97501e04c3fSmrg } 97601e04c3fSmrg } 97701e04c3fSmrg 97801e04c3fSmrg return error_msg; 97901e04c3fSmrg } 98001e04c3fSmrg 98101e04c3fSmrg for (unsigned i = 0; i < num_sources; i++) { 98201e04c3fSmrg unsigned vstride, width, hstride, element_size, subreg; 98301e04c3fSmrg enum brw_reg_type type; 98401e04c3fSmrg 98501e04c3fSmrg#define DO_SRC(n) \ 98601e04c3fSmrg if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 98701e04c3fSmrg BRW_IMMEDIATE_VALUE) \ 98801e04c3fSmrg continue; \ 98901e04c3fSmrg \ 99001e04c3fSmrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 99101e04c3fSmrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 99201e04c3fSmrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 99301e04c3fSmrg type = brw_inst_src ## n ## _type(devinfo, inst); \ 99401e04c3fSmrg element_size = brw_reg_type_to_size(type); \ 99501e04c3fSmrg subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) 99601e04c3fSmrg 99701e04c3fSmrg if (i == 0) { 99801e04c3fSmrg DO_SRC(0); 99901e04c3fSmrg } else { 100001e04c3fSmrg DO_SRC(1); 100101e04c3fSmrg } 100201e04c3fSmrg#undef DO_SRC 100301e04c3fSmrg 100401e04c3fSmrg /* On IVB/BYT, region parameters and execution size for DF are in terms of 100501e04c3fSmrg * 32-bit elements, so they are doubled. For evaluating the validity of an 100601e04c3fSmrg * instruction, we halve them. 100701e04c3fSmrg */ 10087ec681f3Smrg if (devinfo->verx10 == 70 && 100901e04c3fSmrg element_size == 8) 101001e04c3fSmrg element_size = 4; 101101e04c3fSmrg 101201e04c3fSmrg /* ExecSize must be greater than or equal to Width. */ 101301e04c3fSmrg ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " 101401e04c3fSmrg "to Width"); 101501e04c3fSmrg 101601e04c3fSmrg /* If ExecSize = Width and HorzStride ≠ 0, 101701e04c3fSmrg * VertStride must be set to Width * HorzStride. 101801e04c3fSmrg */ 101901e04c3fSmrg if (exec_size == width && hstride != 0) { 102001e04c3fSmrg ERROR_IF(vstride != width * hstride, 102101e04c3fSmrg "If ExecSize = Width and HorzStride ≠ 0, " 102201e04c3fSmrg "VertStride must be set to Width * HorzStride"); 102301e04c3fSmrg } 102401e04c3fSmrg 102501e04c3fSmrg /* If Width = 1, HorzStride must be 0 regardless of the values of 102601e04c3fSmrg * ExecSize and VertStride. 102701e04c3fSmrg */ 102801e04c3fSmrg if (width == 1) { 102901e04c3fSmrg ERROR_IF(hstride != 0, 103001e04c3fSmrg "If Width = 1, HorzStride must be 0 regardless " 103101e04c3fSmrg "of the values of ExecSize and VertStride"); 103201e04c3fSmrg } 103301e04c3fSmrg 103401e04c3fSmrg /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ 103501e04c3fSmrg if (exec_size == 1 && width == 1) { 103601e04c3fSmrg ERROR_IF(vstride != 0 || hstride != 0, 103701e04c3fSmrg "If ExecSize = Width = 1, both VertStride " 103801e04c3fSmrg "and HorzStride must be 0"); 103901e04c3fSmrg } 104001e04c3fSmrg 104101e04c3fSmrg /* If VertStride = HorzStride = 0, Width must be 1 regardless of the 104201e04c3fSmrg * value of ExecSize. 104301e04c3fSmrg */ 104401e04c3fSmrg if (vstride == 0 && hstride == 0) { 104501e04c3fSmrg ERROR_IF(width != 1, 104601e04c3fSmrg "If VertStride = HorzStride = 0, Width must be " 104701e04c3fSmrg "1 regardless of the value of ExecSize"); 104801e04c3fSmrg } 104901e04c3fSmrg 105001e04c3fSmrg /* VertStride must be used to cross GRF register boundaries. This rule 105101e04c3fSmrg * implies that elements within a 'Width' cannot cross GRF boundaries. 105201e04c3fSmrg */ 105301e04c3fSmrg const uint64_t mask = (1ULL << element_size) - 1; 105401e04c3fSmrg unsigned rowbase = subreg; 105501e04c3fSmrg 105601e04c3fSmrg for (int y = 0; y < exec_size / width; y++) { 105701e04c3fSmrg uint64_t access_mask = 0; 105801e04c3fSmrg unsigned offset = rowbase; 105901e04c3fSmrg 106001e04c3fSmrg for (int x = 0; x < width; x++) { 10617ec681f3Smrg access_mask |= mask << (offset % 64); 106201e04c3fSmrg offset += hstride * element_size; 106301e04c3fSmrg } 106401e04c3fSmrg 106501e04c3fSmrg rowbase += vstride * element_size; 106601e04c3fSmrg 106701e04c3fSmrg if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { 106801e04c3fSmrg ERROR("VertStride must be used to cross GRF register boundaries"); 106901e04c3fSmrg break; 107001e04c3fSmrg } 107101e04c3fSmrg } 107201e04c3fSmrg } 107301e04c3fSmrg 107401e04c3fSmrg /* Dst.HorzStride must not be 0. */ 107501e04c3fSmrg if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { 107601e04c3fSmrg ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, 107701e04c3fSmrg "Destination Horizontal Stride must not be 0"); 107801e04c3fSmrg } 107901e04c3fSmrg 108001e04c3fSmrg return error_msg; 108101e04c3fSmrg} 108201e04c3fSmrg 10839f464c52Smayastatic struct string 10847ec681f3Smrgspecial_restrictions_for_mixed_float_mode(const struct intel_device_info *devinfo, 10859f464c52Smaya const brw_inst *inst) 10869f464c52Smaya{ 10879f464c52Smaya struct string error_msg = { .str = NULL, .len = 0 }; 10889f464c52Smaya 10899f464c52Smaya const unsigned opcode = brw_inst_opcode(devinfo, inst); 10909f464c52Smaya const unsigned num_sources = num_sources_from_inst(devinfo, inst); 10919f464c52Smaya if (num_sources >= 3) 10929f464c52Smaya return error_msg; 10939f464c52Smaya 10949f464c52Smaya if (!is_mixed_float(devinfo, inst)) 10959f464c52Smaya return error_msg; 10969f464c52Smaya 10979f464c52Smaya unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 10989f464c52Smaya bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; 10999f464c52Smaya 11009f464c52Smaya enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 11019f464c52Smaya enum brw_reg_type src1_type = num_sources > 1 ? 11029f464c52Smaya brw_inst_src1_type(devinfo, inst) : 0; 11039f464c52Smaya enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 11049f464c52Smaya 11059f464c52Smaya unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 11069f464c52Smaya bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); 11079f464c52Smaya 11089f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11099f464c52Smaya * Float Operations: 11109f464c52Smaya * 11119f464c52Smaya * "Indirect addressing on source is not supported when source and 11129f464c52Smaya * destination data types are mixed float." 11139f464c52Smaya */ 11149f464c52Smaya ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || 11159f464c52Smaya (num_sources > 1 && 11169f464c52Smaya brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), 11179f464c52Smaya "Indirect addressing on source is not supported when source and " 11189f464c52Smaya "destination data types are mixed float"); 11199f464c52Smaya 11209f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11219f464c52Smaya * Float Operations: 11229f464c52Smaya * 11239f464c52Smaya * "No SIMD16 in mixed mode when destination is f32. Instruction 11249f464c52Smaya * execution size must be no more than 8." 11259f464c52Smaya */ 11269f464c52Smaya ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, 11279f464c52Smaya "Mixed float mode with 32-bit float destination is limited " 11289f464c52Smaya "to SIMD8"); 11299f464c52Smaya 11309f464c52Smaya if (is_align16) { 11319f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11329f464c52Smaya * Float Operations: 11339f464c52Smaya * 11349f464c52Smaya * "In Align16 mode, when half float and float data types are mixed 11359f464c52Smaya * between source operands OR between source and destination operands, 11369f464c52Smaya * the register content are assumed to be packed." 11379f464c52Smaya * 11389f464c52Smaya * Since Align16 doesn't have a concept of horizontal stride (or width), 11399f464c52Smaya * it means that vertical stride must always be 4, since 0 and 2 would 11409f464c52Smaya * lead to replicated data, and any other value is disallowed in Align16. 11419f464c52Smaya */ 11429f464c52Smaya ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 11439f464c52Smaya "Align16 mixed float mode assumes packed data (vstride must be 4"); 11449f464c52Smaya 11459f464c52Smaya ERROR_IF(num_sources >= 2 && 11469f464c52Smaya brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 11479f464c52Smaya "Align16 mixed float mode assumes packed data (vstride must be 4"); 11489f464c52Smaya 11499f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11509f464c52Smaya * Float Operations: 11519f464c52Smaya * 11529f464c52Smaya * "For Align16 mixed mode, both input and output packed f16 data 11539f464c52Smaya * must be oword aligned, no oword crossing in packed f16." 11549f464c52Smaya * 11559f464c52Smaya * The previous rule requires that Align16 operands are always packed, 11569f464c52Smaya * and since there is only one bit for Align16 subnr, which represents 11579f464c52Smaya * offsets 0B and 16B, this rule is always enforced and we don't need to 11589f464c52Smaya * validate it. 11599f464c52Smaya */ 11609f464c52Smaya 11619f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11629f464c52Smaya * Float Operations: 11639f464c52Smaya * 11649f464c52Smaya * "No SIMD16 in mixed mode when destination is packed f16 for both 11659f464c52Smaya * Align1 and Align16." 11669f464c52Smaya * 11679f464c52Smaya * And: 11689f464c52Smaya * 11699f464c52Smaya * "In Align16 mode, when half float and float data types are mixed 11709f464c52Smaya * between source operands OR between source and destination operands, 11719f464c52Smaya * the register content are assumed to be packed." 11729f464c52Smaya * 11739f464c52Smaya * Which implies that SIMD16 is not available in Align16. This is further 11749f464c52Smaya * confirmed by: 11759f464c52Smaya * 11769f464c52Smaya * "For Align16 mixed mode, both input and output packed f16 data 11779f464c52Smaya * must be oword aligned, no oword crossing in packed f16" 11789f464c52Smaya * 11799f464c52Smaya * Since oword-aligned packed f16 data would cross oword boundaries when 11809f464c52Smaya * the execution size is larger than 8. 11819f464c52Smaya */ 11829f464c52Smaya ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); 11839f464c52Smaya 11849f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11859f464c52Smaya * Float Operations: 11869f464c52Smaya * 11879f464c52Smaya * "No accumulator read access for Align16 mixed float." 11889f464c52Smaya */ 11899f464c52Smaya ERROR_IF(inst_uses_src_acc(devinfo, inst), 11909f464c52Smaya "No accumulator read access for Align16 mixed float"); 11919f464c52Smaya } else { 11929f464c52Smaya assert(!is_align16); 11939f464c52Smaya 11949f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 11959f464c52Smaya * Float Operations: 11969f464c52Smaya * 11979f464c52Smaya * "No SIMD16 in mixed mode when destination is packed f16 for both 11989f464c52Smaya * Align1 and Align16." 11999f464c52Smaya */ 12009f464c52Smaya ERROR_IF(exec_size > 8 && dst_is_packed && 12019f464c52Smaya dst_type == BRW_REGISTER_TYPE_HF, 12029f464c52Smaya "Align1 mixed float mode is limited to SIMD8 when destination " 12039f464c52Smaya "is packed half-float"); 12049f464c52Smaya 12059f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 12069f464c52Smaya * Float Operations: 12079f464c52Smaya * 12089f464c52Smaya * "Math operations for mixed mode: 12099f464c52Smaya * - In Align1, f16 inputs need to be strided" 12109f464c52Smaya */ 12119f464c52Smaya if (opcode == BRW_OPCODE_MATH) { 12129f464c52Smaya if (src0_type == BRW_REGISTER_TYPE_HF) { 12139f464c52Smaya ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, 12149f464c52Smaya "Align1 mixed mode math needs strided half-float inputs"); 12159f464c52Smaya } 12169f464c52Smaya 12179f464c52Smaya if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { 12189f464c52Smaya ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, 12199f464c52Smaya "Align1 mixed mode math needs strided half-float inputs"); 12209f464c52Smaya } 12219f464c52Smaya } 12229f464c52Smaya 12239f464c52Smaya if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { 12249f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 12259f464c52Smaya * Float Operations: 12269f464c52Smaya * 12279f464c52Smaya * "In Align1, destination stride can be smaller than execution 12289f464c52Smaya * type. When destination is stride of 1, 16 bit packed data is 12299f464c52Smaya * updated on the destination. However, output packed f16 data 12309f464c52Smaya * must be oword aligned, no oword crossing in packed f16." 12319f464c52Smaya * 12329f464c52Smaya * The requirement of not crossing oword boundaries for 16-bit oword 12339f464c52Smaya * aligned data means that execution size is limited to 8. 12349f464c52Smaya */ 12359f464c52Smaya unsigned subreg; 12369f464c52Smaya if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) 12379f464c52Smaya subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 12389f464c52Smaya else 12399f464c52Smaya subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); 12409f464c52Smaya ERROR_IF(subreg % 16 != 0, 12419f464c52Smaya "Align1 mixed mode packed half-float output must be " 12429f464c52Smaya "oword aligned"); 12439f464c52Smaya ERROR_IF(exec_size > 8, 12449f464c52Smaya "Align1 mixed mode packed half-float output must not " 12459f464c52Smaya "cross oword boundaries (max exec size is 8)"); 12469f464c52Smaya 12479f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 12489f464c52Smaya * Float Operations: 12499f464c52Smaya * 12509f464c52Smaya * "When source is float or half float from accumulator register and 12519f464c52Smaya * destination is half float with a stride of 1, the source must 12529f464c52Smaya * register aligned. i.e., source must have offset zero." 12539f464c52Smaya * 12549f464c52Smaya * Align16 mixed float mode doesn't allow accumulator access on sources, 12559f464c52Smaya * so we only need to check this for Align1. 12569f464c52Smaya */ 12579f464c52Smaya if (src0_is_acc(devinfo, inst) && 12589f464c52Smaya (src0_type == BRW_REGISTER_TYPE_F || 12599f464c52Smaya src0_type == BRW_REGISTER_TYPE_HF)) { 12609f464c52Smaya ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, 12619f464c52Smaya "Mixed float mode requires register-aligned accumulator " 12629f464c52Smaya "source reads when destination is packed half-float"); 12639f464c52Smaya 12649f464c52Smaya } 12659f464c52Smaya 12669f464c52Smaya if (num_sources > 1 && 12679f464c52Smaya src1_is_acc(devinfo, inst) && 12689f464c52Smaya (src1_type == BRW_REGISTER_TYPE_F || 12699f464c52Smaya src1_type == BRW_REGISTER_TYPE_HF)) { 12709f464c52Smaya ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, 12719f464c52Smaya "Mixed float mode requires register-aligned accumulator " 12729f464c52Smaya "source reads when destination is packed half-float"); 12739f464c52Smaya } 12749f464c52Smaya } 12759f464c52Smaya 12769f464c52Smaya /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 12779f464c52Smaya * Float Operations: 12789f464c52Smaya * 12799f464c52Smaya * "No swizzle is allowed when an accumulator is used as an implicit 12809f464c52Smaya * source or an explicit source in an instruction. i.e. when 12819f464c52Smaya * destination is half float with an implicit accumulator source, 12829f464c52Smaya * destination stride needs to be 2." 12839f464c52Smaya * 12849f464c52Smaya * FIXME: it is not quite clear what the first sentence actually means 12859f464c52Smaya * or its link to the implication described after it, so we only 12869f464c52Smaya * validate the explicit implication, which is clearly described. 12879f464c52Smaya */ 12889f464c52Smaya if (dst_type == BRW_REGISTER_TYPE_HF && 12899f464c52Smaya inst_uses_src_acc(devinfo, inst)) { 12909f464c52Smaya ERROR_IF(dst_stride != 2, 12919f464c52Smaya "Mixed float mode with implicit/explicit accumulator " 12929f464c52Smaya "source and half-float destination requires a stride " 12939f464c52Smaya "of 2 on the destination"); 12949f464c52Smaya } 12959f464c52Smaya } 12969f464c52Smaya 12979f464c52Smaya return error_msg; 12989f464c52Smaya} 12999f464c52Smaya 130001e04c3fSmrg/** 130101e04c3fSmrg * Creates an \p access_mask for an \p exec_size, \p element_size, and a region 130201e04c3fSmrg * 130301e04c3fSmrg * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is 130401e04c3fSmrg * a bitmask of bytes accessed by the region. 130501e04c3fSmrg * 130601e04c3fSmrg * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 130701e04c3fSmrg * instruction would be 130801e04c3fSmrg * 130901e04c3fSmrg * access_mask[0] = 0x00000000000000F0 131001e04c3fSmrg * access_mask[1] = 0x000000000000F000 131101e04c3fSmrg * access_mask[2] = 0x0000000000F00000 131201e04c3fSmrg * access_mask[3] = 0x00000000F0000000 131301e04c3fSmrg * access_mask[4-31] = 0 131401e04c3fSmrg * 131501e04c3fSmrg * because the first execution channel accesses bytes 7-4 and the second 131601e04c3fSmrg * execution channel accesses bytes 15-12, etc. 131701e04c3fSmrg */ 131801e04c3fSmrgstatic void 131901e04c3fSmrgalign1_access_mask(uint64_t access_mask[static 32], 132001e04c3fSmrg unsigned exec_size, unsigned element_size, unsigned subreg, 132101e04c3fSmrg unsigned vstride, unsigned width, unsigned hstride) 132201e04c3fSmrg{ 132301e04c3fSmrg const uint64_t mask = (1ULL << element_size) - 1; 132401e04c3fSmrg unsigned rowbase = subreg; 132501e04c3fSmrg unsigned element = 0; 132601e04c3fSmrg 132701e04c3fSmrg for (int y = 0; y < exec_size / width; y++) { 132801e04c3fSmrg unsigned offset = rowbase; 132901e04c3fSmrg 133001e04c3fSmrg for (int x = 0; x < width; x++) { 13317ec681f3Smrg access_mask[element++] = mask << (offset % 64); 133201e04c3fSmrg offset += hstride * element_size; 133301e04c3fSmrg } 133401e04c3fSmrg 133501e04c3fSmrg rowbase += vstride * element_size; 133601e04c3fSmrg } 133701e04c3fSmrg 133801e04c3fSmrg assert(element == 0 || element == exec_size); 133901e04c3fSmrg} 134001e04c3fSmrg 134101e04c3fSmrg/** 134201e04c3fSmrg * Returns the number of registers accessed according to the \p access_mask 134301e04c3fSmrg */ 134401e04c3fSmrgstatic int 134501e04c3fSmrgregisters_read(const uint64_t access_mask[static 32]) 134601e04c3fSmrg{ 134701e04c3fSmrg int regs_read = 0; 134801e04c3fSmrg 134901e04c3fSmrg for (unsigned i = 0; i < 32; i++) { 135001e04c3fSmrg if (access_mask[i] > 0xFFFFFFFF) { 135101e04c3fSmrg return 2; 135201e04c3fSmrg } else if (access_mask[i]) { 135301e04c3fSmrg regs_read = 1; 135401e04c3fSmrg } 135501e04c3fSmrg } 135601e04c3fSmrg 135701e04c3fSmrg return regs_read; 135801e04c3fSmrg} 135901e04c3fSmrg 136001e04c3fSmrg/** 136101e04c3fSmrg * Checks restrictions listed in "Region Alignment Rules" in the "Register 136201e04c3fSmrg * Region Restrictions" section. 136301e04c3fSmrg */ 136401e04c3fSmrgstatic struct string 13657ec681f3Smrgregion_alignment_rules(const struct intel_device_info *devinfo, 136601e04c3fSmrg const brw_inst *inst) 136701e04c3fSmrg{ 136801e04c3fSmrg const struct opcode_desc *desc = 136901e04c3fSmrg brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); 137001e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 137101e04c3fSmrg unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 137201e04c3fSmrg uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; 137301e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 137401e04c3fSmrg 137501e04c3fSmrg if (num_sources == 3) 137601e04c3fSmrg return (struct string){}; 137701e04c3fSmrg 137801e04c3fSmrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) 137901e04c3fSmrg return (struct string){}; 138001e04c3fSmrg 138101e04c3fSmrg if (inst_is_send(devinfo, inst)) 138201e04c3fSmrg return (struct string){}; 138301e04c3fSmrg 138401e04c3fSmrg memset(dst_access_mask, 0, sizeof(dst_access_mask)); 138501e04c3fSmrg memset(src0_access_mask, 0, sizeof(src0_access_mask)); 138601e04c3fSmrg memset(src1_access_mask, 0, sizeof(src1_access_mask)); 138701e04c3fSmrg 138801e04c3fSmrg for (unsigned i = 0; i < num_sources; i++) { 138901e04c3fSmrg unsigned vstride, width, hstride, element_size, subreg; 139001e04c3fSmrg enum brw_reg_type type; 139101e04c3fSmrg 139201e04c3fSmrg /* In Direct Addressing mode, a source cannot span more than 2 adjacent 139301e04c3fSmrg * GRF registers. 139401e04c3fSmrg */ 139501e04c3fSmrg 139601e04c3fSmrg#define DO_SRC(n) \ 139701e04c3fSmrg if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ 139801e04c3fSmrg BRW_ADDRESS_DIRECT) \ 139901e04c3fSmrg continue; \ 140001e04c3fSmrg \ 140101e04c3fSmrg if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 140201e04c3fSmrg BRW_IMMEDIATE_VALUE) \ 140301e04c3fSmrg continue; \ 140401e04c3fSmrg \ 140501e04c3fSmrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 140601e04c3fSmrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 140701e04c3fSmrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 140801e04c3fSmrg type = brw_inst_src ## n ## _type(devinfo, inst); \ 140901e04c3fSmrg element_size = brw_reg_type_to_size(type); \ 141001e04c3fSmrg subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 141101e04c3fSmrg align1_access_mask(src ## n ## _access_mask, \ 141201e04c3fSmrg exec_size, element_size, subreg, \ 141301e04c3fSmrg vstride, width, hstride) 141401e04c3fSmrg 141501e04c3fSmrg if (i == 0) { 141601e04c3fSmrg DO_SRC(0); 141701e04c3fSmrg } else { 141801e04c3fSmrg DO_SRC(1); 141901e04c3fSmrg } 142001e04c3fSmrg#undef DO_SRC 142101e04c3fSmrg 142201e04c3fSmrg unsigned num_vstride = exec_size / width; 142301e04c3fSmrg unsigned num_hstride = width; 142401e04c3fSmrg unsigned vstride_elements = (num_vstride - 1) * vstride; 142501e04c3fSmrg unsigned hstride_elements = (num_hstride - 1) * hstride; 142601e04c3fSmrg unsigned offset = (vstride_elements + hstride_elements) * element_size + 142701e04c3fSmrg subreg; 142801e04c3fSmrg ERROR_IF(offset >= 64, 142901e04c3fSmrg "A source cannot span more than 2 adjacent GRF registers"); 143001e04c3fSmrg } 143101e04c3fSmrg 143201e04c3fSmrg if (desc->ndst == 0 || dst_is_null(devinfo, inst)) 143301e04c3fSmrg return error_msg; 143401e04c3fSmrg 143501e04c3fSmrg unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 14367ec681f3Smrg enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 143701e04c3fSmrg unsigned element_size = brw_reg_type_to_size(dst_type); 143801e04c3fSmrg unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 143901e04c3fSmrg unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; 144001e04c3fSmrg ERROR_IF(offset >= 64, 144101e04c3fSmrg "A destination cannot span more than 2 adjacent GRF registers"); 144201e04c3fSmrg 144301e04c3fSmrg if (error_msg.str) 144401e04c3fSmrg return error_msg; 144501e04c3fSmrg 144601e04c3fSmrg /* On IVB/BYT, region parameters and execution size for DF are in terms of 144701e04c3fSmrg * 32-bit elements, so they are doubled. For evaluating the validity of an 144801e04c3fSmrg * instruction, we halve them. 144901e04c3fSmrg */ 14507ec681f3Smrg if (devinfo->verx10 == 70 && 145101e04c3fSmrg element_size == 8) 145201e04c3fSmrg element_size = 4; 145301e04c3fSmrg 145401e04c3fSmrg align1_access_mask(dst_access_mask, exec_size, element_size, subreg, 145501e04c3fSmrg exec_size == 1 ? 0 : exec_size * stride, 145601e04c3fSmrg exec_size == 1 ? 1 : exec_size, 145701e04c3fSmrg exec_size == 1 ? 0 : stride); 145801e04c3fSmrg 145901e04c3fSmrg unsigned dst_regs = registers_read(dst_access_mask); 146001e04c3fSmrg unsigned src0_regs = registers_read(src0_access_mask); 146101e04c3fSmrg unsigned src1_regs = registers_read(src1_access_mask); 146201e04c3fSmrg 146301e04c3fSmrg /* The SNB, IVB, HSW, BDW, and CHV PRMs say: 146401e04c3fSmrg * 146501e04c3fSmrg * When an instruction has a source region spanning two registers and a 146601e04c3fSmrg * destination region contained in one register, the number of elements 146701e04c3fSmrg * must be the same between two sources and one of the following must be 146801e04c3fSmrg * true: 146901e04c3fSmrg * 147001e04c3fSmrg * 1. The destination region is entirely contained in the lower OWord 147101e04c3fSmrg * of a register. 147201e04c3fSmrg * 2. The destination region is entirely contained in the upper OWord 147301e04c3fSmrg * of a register. 147401e04c3fSmrg * 3. The destination elements are evenly split between the two OWords 147501e04c3fSmrg * of a register. 147601e04c3fSmrg */ 14777ec681f3Smrg if (devinfo->ver <= 8) { 147801e04c3fSmrg if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { 147901e04c3fSmrg unsigned upper_oword_writes = 0, lower_oword_writes = 0; 148001e04c3fSmrg 148101e04c3fSmrg for (unsigned i = 0; i < exec_size; i++) { 148201e04c3fSmrg if (dst_access_mask[i] > 0x0000FFFF) { 148301e04c3fSmrg upper_oword_writes++; 148401e04c3fSmrg } else { 148501e04c3fSmrg assert(dst_access_mask[i] != 0); 148601e04c3fSmrg lower_oword_writes++; 148701e04c3fSmrg } 148801e04c3fSmrg } 148901e04c3fSmrg 149001e04c3fSmrg ERROR_IF(lower_oword_writes != 0 && 149101e04c3fSmrg upper_oword_writes != 0 && 149201e04c3fSmrg upper_oword_writes != lower_oword_writes, 149301e04c3fSmrg "Writes must be to only one OWord or " 149401e04c3fSmrg "evenly split between OWords"); 149501e04c3fSmrg } 149601e04c3fSmrg } 149701e04c3fSmrg 149801e04c3fSmrg /* The IVB and HSW PRMs say: 149901e04c3fSmrg * 150001e04c3fSmrg * When an instruction has a source region that spans two registers and 150101e04c3fSmrg * the destination spans two registers, the destination elements must be 150201e04c3fSmrg * evenly split between the two registers [...] 150301e04c3fSmrg * 150401e04c3fSmrg * The SNB PRM contains similar wording (but written in a much more 150501e04c3fSmrg * confusing manner). 150601e04c3fSmrg * 150701e04c3fSmrg * The BDW PRM says: 150801e04c3fSmrg * 150901e04c3fSmrg * When destination spans two registers, the source may be one or two 151001e04c3fSmrg * registers. The destination elements must be evenly split between the 151101e04c3fSmrg * two registers. 151201e04c3fSmrg * 151301e04c3fSmrg * The SKL PRM says: 151401e04c3fSmrg * 151501e04c3fSmrg * When destination of MATH instruction spans two registers, the 151601e04c3fSmrg * destination elements must be evenly split between the two registers. 151701e04c3fSmrg * 151801e04c3fSmrg * It is not known whether this restriction applies to KBL other Gens after 151901e04c3fSmrg * SKL. 152001e04c3fSmrg */ 15217ec681f3Smrg if (devinfo->ver <= 8 || 152201e04c3fSmrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 152301e04c3fSmrg 152401e04c3fSmrg /* Nothing explicitly states that on Gen < 8 elements must be evenly 152501e04c3fSmrg * split between two destination registers in the two exceptional 152601e04c3fSmrg * source-region-spans-one-register cases, but since Broadwell requires 152701e04c3fSmrg * evenly split writes regardless of source region, we assume that it was 152801e04c3fSmrg * an oversight and require it. 152901e04c3fSmrg */ 153001e04c3fSmrg if (dst_regs == 2) { 153101e04c3fSmrg unsigned upper_reg_writes = 0, lower_reg_writes = 0; 153201e04c3fSmrg 153301e04c3fSmrg for (unsigned i = 0; i < exec_size; i++) { 153401e04c3fSmrg if (dst_access_mask[i] > 0xFFFFFFFF) { 153501e04c3fSmrg upper_reg_writes++; 153601e04c3fSmrg } else { 153701e04c3fSmrg assert(dst_access_mask[i] != 0); 153801e04c3fSmrg lower_reg_writes++; 153901e04c3fSmrg } 154001e04c3fSmrg } 154101e04c3fSmrg 154201e04c3fSmrg ERROR_IF(upper_reg_writes != lower_reg_writes, 154301e04c3fSmrg "Writes must be evenly split between the two " 154401e04c3fSmrg "destination registers"); 154501e04c3fSmrg } 154601e04c3fSmrg } 154701e04c3fSmrg 154801e04c3fSmrg /* The IVB and HSW PRMs say: 154901e04c3fSmrg * 155001e04c3fSmrg * When an instruction has a source region that spans two registers and 155101e04c3fSmrg * the destination spans two registers, the destination elements must be 155201e04c3fSmrg * evenly split between the two registers and each destination register 155301e04c3fSmrg * must be entirely derived from one source register. 155401e04c3fSmrg * 155501e04c3fSmrg * Note: In such cases, the regioning parameters must ensure that the 155601e04c3fSmrg * offset from the two source registers is the same. 155701e04c3fSmrg * 155801e04c3fSmrg * The SNB PRM contains similar wording (but written in a much more 155901e04c3fSmrg * confusing manner). 156001e04c3fSmrg * 156101e04c3fSmrg * There are effectively three rules stated here: 156201e04c3fSmrg * 156301e04c3fSmrg * For an instruction with a source and a destination spanning two 156401e04c3fSmrg * registers, 156501e04c3fSmrg * 156601e04c3fSmrg * (1) destination elements must be evenly split between the two 156701e04c3fSmrg * registers 156801e04c3fSmrg * (2) all destination elements in a register must be derived 156901e04c3fSmrg * from one source register 157001e04c3fSmrg * (3) the offset (i.e. the starting location in each of the two 157101e04c3fSmrg * registers spanned by a region) must be the same in the two 157201e04c3fSmrg * registers spanned by a region 157301e04c3fSmrg * 157401e04c3fSmrg * It is impossible to violate rule (1) without violating (2) or (3), so we 157501e04c3fSmrg * do not attempt to validate it. 157601e04c3fSmrg */ 15777ec681f3Smrg if (devinfo->ver <= 7 && dst_regs == 2) { 157801e04c3fSmrg for (unsigned i = 0; i < num_sources; i++) { 157901e04c3fSmrg#define DO_SRC(n) \ 158001e04c3fSmrg if (src ## n ## _regs <= 1) \ 158101e04c3fSmrg continue; \ 158201e04c3fSmrg \ 158301e04c3fSmrg for (unsigned i = 0; i < exec_size; i++) { \ 158401e04c3fSmrg if ((dst_access_mask[i] > 0xFFFFFFFF) != \ 158501e04c3fSmrg (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ 158601e04c3fSmrg ERROR("Each destination register must be entirely derived " \ 158701e04c3fSmrg "from one source register"); \ 158801e04c3fSmrg break; \ 158901e04c3fSmrg } \ 159001e04c3fSmrg } \ 159101e04c3fSmrg \ 159201e04c3fSmrg unsigned offset_0 = \ 159301e04c3fSmrg brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 159401e04c3fSmrg unsigned offset_1 = offset_0; \ 159501e04c3fSmrg \ 159601e04c3fSmrg for (unsigned i = 0; i < exec_size; i++) { \ 159701e04c3fSmrg if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ 159801e04c3fSmrg offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ 159901e04c3fSmrg break; \ 160001e04c3fSmrg } \ 160101e04c3fSmrg } \ 160201e04c3fSmrg \ 160301e04c3fSmrg ERROR_IF(num_sources == 2 && offset_0 != offset_1, \ 160401e04c3fSmrg "The offset from the two source registers " \ 160501e04c3fSmrg "must be the same") 160601e04c3fSmrg 160701e04c3fSmrg if (i == 0) { 160801e04c3fSmrg DO_SRC(0); 160901e04c3fSmrg } else { 161001e04c3fSmrg DO_SRC(1); 161101e04c3fSmrg } 161201e04c3fSmrg#undef DO_SRC 161301e04c3fSmrg } 161401e04c3fSmrg } 161501e04c3fSmrg 161601e04c3fSmrg /* The IVB and HSW PRMs say: 161701e04c3fSmrg * 161801e04c3fSmrg * When destination spans two registers, the source MUST span two 161901e04c3fSmrg * registers. The exception to the above rule: 162001e04c3fSmrg * 1. When source is scalar, the source registers are not 162101e04c3fSmrg * incremented. 162201e04c3fSmrg * 2. When source is packed integer Word and destination is packed 162301e04c3fSmrg * integer DWord, the source register is not incremented by the 162401e04c3fSmrg * source sub register is incremented. 162501e04c3fSmrg * 162601e04c3fSmrg * The SNB PRM does not contain this rule, but the internal documentation 162701e04c3fSmrg * indicates that it applies to SNB as well. We assume that the rule applies 162801e04c3fSmrg * to Gen <= 5 although their PRMs do not state it. 162901e04c3fSmrg * 163001e04c3fSmrg * While the documentation explicitly says in exception (2) that the 163101e04c3fSmrg * destination must be an integer DWord, the hardware allows at least a 163201e04c3fSmrg * float destination type as well. We emit such instructions from 163301e04c3fSmrg * 16347ec681f3Smrg * fs_visitor::emit_interpolation_setup_gfx6 163501e04c3fSmrg * fs_visitor::emit_fragcoord_interpolation 163601e04c3fSmrg * 163701e04c3fSmrg * and have for years with no ill effects. 163801e04c3fSmrg * 163901e04c3fSmrg * Additionally the simulator source code indicates that the real condition 164001e04c3fSmrg * is that the size of the destination type is 4 bytes. 164101e04c3fSmrg */ 16427ec681f3Smrg if (devinfo->ver <= 7 && dst_regs == 2) { 16437ec681f3Smrg enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 164401e04c3fSmrg bool dst_is_packed_dword = 164501e04c3fSmrg is_packed(exec_size * stride, exec_size, stride) && 164601e04c3fSmrg brw_reg_type_to_size(dst_type) == 4; 164701e04c3fSmrg 164801e04c3fSmrg for (unsigned i = 0; i < num_sources; i++) { 164901e04c3fSmrg#define DO_SRC(n) \ 165001e04c3fSmrg unsigned vstride, width, hstride; \ 165101e04c3fSmrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 165201e04c3fSmrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 165301e04c3fSmrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 165401e04c3fSmrg bool src ## n ## _is_packed_word = \ 165501e04c3fSmrg is_packed(vstride, width, hstride) && \ 165601e04c3fSmrg (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \ 165701e04c3fSmrg brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \ 165801e04c3fSmrg \ 165901e04c3fSmrg ERROR_IF(src ## n ## _regs == 1 && \ 166001e04c3fSmrg !src ## n ## _has_scalar_region(devinfo, inst) && \ 166101e04c3fSmrg !(dst_is_packed_dword && src ## n ## _is_packed_word), \ 166201e04c3fSmrg "When the destination spans two registers, the source must " \ 166301e04c3fSmrg "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ 166401e04c3fSmrg "source and packed-word to packed-dword expansion)") 166501e04c3fSmrg 166601e04c3fSmrg if (i == 0) { 166701e04c3fSmrg DO_SRC(0); 166801e04c3fSmrg } else { 166901e04c3fSmrg DO_SRC(1); 167001e04c3fSmrg } 167101e04c3fSmrg#undef DO_SRC 167201e04c3fSmrg } 167301e04c3fSmrg } 167401e04c3fSmrg 167501e04c3fSmrg return error_msg; 167601e04c3fSmrg} 167701e04c3fSmrg 167801e04c3fSmrgstatic struct string 16797ec681f3Smrgvector_immediate_restrictions(const struct intel_device_info *devinfo, 168001e04c3fSmrg const brw_inst *inst) 168101e04c3fSmrg{ 168201e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 168301e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 168401e04c3fSmrg 168501e04c3fSmrg if (num_sources == 3 || num_sources == 0) 168601e04c3fSmrg return (struct string){}; 168701e04c3fSmrg 168801e04c3fSmrg unsigned file = num_sources == 1 ? 168901e04c3fSmrg brw_inst_src0_reg_file(devinfo, inst) : 169001e04c3fSmrg brw_inst_src1_reg_file(devinfo, inst); 169101e04c3fSmrg if (file != BRW_IMMEDIATE_VALUE) 169201e04c3fSmrg return (struct string){}; 169301e04c3fSmrg 16947ec681f3Smrg enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 169501e04c3fSmrg unsigned dst_type_size = brw_reg_type_to_size(dst_type); 169601e04c3fSmrg unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? 169701e04c3fSmrg brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; 169801e04c3fSmrg unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 169901e04c3fSmrg enum brw_reg_type type = num_sources == 1 ? 170001e04c3fSmrg brw_inst_src0_type(devinfo, inst) : 170101e04c3fSmrg brw_inst_src1_type(devinfo, inst); 170201e04c3fSmrg 170301e04c3fSmrg /* The PRMs say: 170401e04c3fSmrg * 170501e04c3fSmrg * When an immediate vector is used in an instruction, the destination 170601e04c3fSmrg * must be 128-bit aligned with destination horizontal stride equivalent 170701e04c3fSmrg * to a word for an immediate integer vector (v) and equivalent to a 170801e04c3fSmrg * DWord for an immediate float vector (vf). 170901e04c3fSmrg * 171001e04c3fSmrg * The text has not been updated for the addition of the immediate unsigned 171101e04c3fSmrg * integer vector type (uv) on SNB, but presumably the same restriction 171201e04c3fSmrg * applies. 171301e04c3fSmrg */ 171401e04c3fSmrg switch (type) { 171501e04c3fSmrg case BRW_REGISTER_TYPE_V: 171601e04c3fSmrg case BRW_REGISTER_TYPE_UV: 171701e04c3fSmrg case BRW_REGISTER_TYPE_VF: 171801e04c3fSmrg ERROR_IF(dst_subreg % (128 / 8) != 0, 171901e04c3fSmrg "Destination must be 128-bit aligned in order to use immediate " 172001e04c3fSmrg "vector types"); 172101e04c3fSmrg 172201e04c3fSmrg if (type == BRW_REGISTER_TYPE_VF) { 172301e04c3fSmrg ERROR_IF(dst_type_size * dst_stride != 4, 172401e04c3fSmrg "Destination must have stride equivalent to dword in order " 172501e04c3fSmrg "to use the VF type"); 172601e04c3fSmrg } else { 172701e04c3fSmrg ERROR_IF(dst_type_size * dst_stride != 2, 172801e04c3fSmrg "Destination must have stride equivalent to word in order " 172901e04c3fSmrg "to use the V or UV type"); 173001e04c3fSmrg } 173101e04c3fSmrg break; 173201e04c3fSmrg default: 173301e04c3fSmrg break; 173401e04c3fSmrg } 173501e04c3fSmrg 173601e04c3fSmrg return error_msg; 173701e04c3fSmrg} 173801e04c3fSmrg 173901e04c3fSmrgstatic struct string 174001e04c3fSmrgspecial_requirements_for_handling_double_precision_data_types( 17417ec681f3Smrg const struct intel_device_info *devinfo, 174201e04c3fSmrg const brw_inst *inst) 174301e04c3fSmrg{ 174401e04c3fSmrg unsigned num_sources = num_sources_from_inst(devinfo, inst); 174501e04c3fSmrg struct string error_msg = { .str = NULL, .len = 0 }; 174601e04c3fSmrg 174701e04c3fSmrg if (num_sources == 3 || num_sources == 0) 174801e04c3fSmrg return (struct string){}; 174901e04c3fSmrg 17509f464c52Smaya /* Split sends don't have types so there's no doubles there. */ 17519f464c52Smaya if (inst_is_split_send(devinfo, inst)) 17529f464c52Smaya return (struct string){}; 17539f464c52Smaya 175401e04c3fSmrg enum brw_reg_type exec_type = execution_type(devinfo, inst); 175501e04c3fSmrg unsigned exec_type_size = brw_reg_type_to_size(exec_type); 175601e04c3fSmrg 175701e04c3fSmrg enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); 17587ec681f3Smrg enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); 175901e04c3fSmrg unsigned dst_type_size = brw_reg_type_to_size(dst_type); 176001e04c3fSmrg unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 176101e04c3fSmrg unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); 176201e04c3fSmrg unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 176301e04c3fSmrg unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); 176401e04c3fSmrg 176501e04c3fSmrg bool is_integer_dword_multiply = 17667ec681f3Smrg devinfo->ver >= 8 && 176701e04c3fSmrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL && 176801e04c3fSmrg (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 176901e04c3fSmrg brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && 177001e04c3fSmrg (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 177101e04c3fSmrg brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); 177201e04c3fSmrg 17737ec681f3Smrg const bool is_double_precision = 17747ec681f3Smrg dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply; 177501e04c3fSmrg 177601e04c3fSmrg for (unsigned i = 0; i < num_sources; i++) { 177701e04c3fSmrg unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; 177801e04c3fSmrg bool is_scalar_region; 177901e04c3fSmrg enum brw_reg_file file; 178001e04c3fSmrg enum brw_reg_type type; 178101e04c3fSmrg 178201e04c3fSmrg#define DO_SRC(n) \ 178301e04c3fSmrg if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 178401e04c3fSmrg BRW_IMMEDIATE_VALUE) \ 178501e04c3fSmrg continue; \ 178601e04c3fSmrg \ 178701e04c3fSmrg is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ 178801e04c3fSmrg vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 178901e04c3fSmrg width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 179001e04c3fSmrg hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 179101e04c3fSmrg file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ 179201e04c3fSmrg type = brw_inst_src ## n ## _type(devinfo, inst); \ 179301e04c3fSmrg type_size = brw_reg_type_to_size(type); \ 179401e04c3fSmrg reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ 179501e04c3fSmrg subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 179601e04c3fSmrg address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) 179701e04c3fSmrg 179801e04c3fSmrg if (i == 0) { 179901e04c3fSmrg DO_SRC(0); 180001e04c3fSmrg } else { 180101e04c3fSmrg DO_SRC(1); 180201e04c3fSmrg } 180301e04c3fSmrg#undef DO_SRC 180401e04c3fSmrg 18057ec681f3Smrg const unsigned src_stride = hstride * type_size; 18067ec681f3Smrg const unsigned dst_stride = dst_hstride * dst_type_size; 18077ec681f3Smrg 180801e04c3fSmrg /* The PRMs say that for CHV, BXT: 180901e04c3fSmrg * 181001e04c3fSmrg * When source or destination datatype is 64b or operation is integer 181101e04c3fSmrg * DWord multiply, regioning in Align1 must follow these rules: 181201e04c3fSmrg * 181301e04c3fSmrg * 1. Source and Destination horizontal stride must be aligned to the 181401e04c3fSmrg * same qword. 181501e04c3fSmrg * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 181601e04c3fSmrg * 3. Source and Destination offset must be the same, except the case 181701e04c3fSmrg * of scalar source. 181801e04c3fSmrg * 181901e04c3fSmrg * We assume that the restriction applies to GLK as well. 182001e04c3fSmrg */ 18217ec681f3Smrg if (is_double_precision && 18227ec681f3Smrg brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 18237ec681f3Smrg (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 182401e04c3fSmrg ERROR_IF(!is_scalar_region && 182501e04c3fSmrg (src_stride % 8 != 0 || 182601e04c3fSmrg dst_stride % 8 != 0 || 182701e04c3fSmrg src_stride != dst_stride), 182801e04c3fSmrg "Source and destination horizontal stride must equal and a " 182901e04c3fSmrg "multiple of a qword when the execution type is 64-bit"); 183001e04c3fSmrg 183101e04c3fSmrg ERROR_IF(vstride != width * hstride, 183201e04c3fSmrg "Vstride must be Width * Hstride when the execution type is " 183301e04c3fSmrg "64-bit"); 183401e04c3fSmrg 183501e04c3fSmrg ERROR_IF(!is_scalar_region && dst_subreg != subreg, 183601e04c3fSmrg "Source and destination offset must be the same when the " 183701e04c3fSmrg "execution type is 64-bit"); 183801e04c3fSmrg } 183901e04c3fSmrg 184001e04c3fSmrg /* The PRMs say that for CHV, BXT: 184101e04c3fSmrg * 184201e04c3fSmrg * When source or destination datatype is 64b or operation is integer 184301e04c3fSmrg * DWord multiply, indirect addressing must not be used. 184401e04c3fSmrg * 184501e04c3fSmrg * We assume that the restriction applies to GLK as well. 184601e04c3fSmrg */ 18477ec681f3Smrg if (is_double_precision && 18487ec681f3Smrg (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 184901e04c3fSmrg ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || 185001e04c3fSmrg BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, 185101e04c3fSmrg "Indirect addressing is not allowed when the execution type " 185201e04c3fSmrg "is 64-bit"); 185301e04c3fSmrg } 185401e04c3fSmrg 185501e04c3fSmrg /* The PRMs say that for CHV, BXT: 185601e04c3fSmrg * 185701e04c3fSmrg * ARF registers must never be used with 64b datatype or when 185801e04c3fSmrg * operation is integer DWord multiply. 185901e04c3fSmrg * 186001e04c3fSmrg * We assume that the restriction applies to GLK as well. 186101e04c3fSmrg * 186201e04c3fSmrg * We assume that the restriction does not apply to the null register. 186301e04c3fSmrg */ 18647ec681f3Smrg if (is_double_precision && 18657ec681f3Smrg (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 186601e04c3fSmrg ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC || 186701e04c3fSmrg brw_inst_acc_wr_control(devinfo, inst) || 186801e04c3fSmrg (BRW_ARCHITECTURE_REGISTER_FILE == file && 186901e04c3fSmrg reg != BRW_ARF_NULL) || 187001e04c3fSmrg (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && 187101e04c3fSmrg dst_reg != BRW_ARF_NULL), 187201e04c3fSmrg "Architecture registers cannot be used when the execution " 187301e04c3fSmrg "type is 64-bit"); 187401e04c3fSmrg } 18757ec681f3Smrg 18767ec681f3Smrg /* From the hardware spec section "Register Region Restrictions": 18777ec681f3Smrg * 18787ec681f3Smrg * "In case where source or destination datatype is 64b or operation is 18797ec681f3Smrg * integer DWord multiply [or in case where a floating point data type 18807ec681f3Smrg * is used as destination]: 18817ec681f3Smrg * 18827ec681f3Smrg * 1. Register Regioning patterns where register data bit locations 18837ec681f3Smrg * are changed between source and destination are not supported on 18847ec681f3Smrg * Src0 and Src1 except for broadcast of a scalar. 18857ec681f3Smrg * 18867ec681f3Smrg * 2. Explicit ARF registers except null and accumulator must not be 18877ec681f3Smrg * used." 18887ec681f3Smrg */ 18897ec681f3Smrg if (devinfo->verx10 >= 125 && 18907ec681f3Smrg (brw_reg_type_is_floating_point(dst_type) || 18917ec681f3Smrg is_double_precision)) { 18927ec681f3Smrg ERROR_IF(!is_scalar_region && 18937ec681f3Smrg (vstride != width * hstride || 18947ec681f3Smrg src_stride != dst_stride || 18957ec681f3Smrg subreg != dst_subreg), 18967ec681f3Smrg "Register Regioning patterns where register data bit " 18977ec681f3Smrg "locations are changed between source and destination are not " 18987ec681f3Smrg "supported except for broadcast of a scalar."); 18997ec681f3Smrg 19007ec681f3Smrg ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE && 19017ec681f3Smrg reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) || 19027ec681f3Smrg (dst_file == BRW_ARCHITECTURE_REGISTER_FILE && 19037ec681f3Smrg dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR), 19047ec681f3Smrg "Explicit ARF registers except null and accumulator must not " 19057ec681f3Smrg "be used."); 19067ec681f3Smrg } 19077ec681f3Smrg 19087ec681f3Smrg /* From the hardware spec section "Register Region Restrictions": 19097ec681f3Smrg * 19107ec681f3Smrg * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and 19117ec681f3Smrg * Quad-Word data must not be used." 19127ec681f3Smrg */ 19137ec681f3Smrg if (devinfo->verx10 >= 125 && 19147ec681f3Smrg (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) { 19157ec681f3Smrg ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && 19167ec681f3Smrg vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, 19177ec681f3Smrg "Vx1 and VxH indirect addressing for Float, Half-Float, " 19187ec681f3Smrg "Double-Float and Quad-Word data must not be used"); 19197ec681f3Smrg } 192001e04c3fSmrg } 192101e04c3fSmrg 192201e04c3fSmrg /* The PRMs say that for BDW, SKL: 192301e04c3fSmrg * 192401e04c3fSmrg * If Align16 is required for an operation with QW destination and non-QW 192501e04c3fSmrg * source datatypes, the execution size cannot exceed 2. 192601e04c3fSmrg * 19277ec681f3Smrg * We assume that the restriction applies to all Gfx8+ parts. 192801e04c3fSmrg */ 19297ec681f3Smrg if (is_double_precision && devinfo->ver >= 8) { 193001e04c3fSmrg enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 193101e04c3fSmrg enum brw_reg_type src1_type = 193201e04c3fSmrg num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; 193301e04c3fSmrg unsigned src0_type_size = brw_reg_type_to_size(src0_type); 193401e04c3fSmrg unsigned src1_type_size = brw_reg_type_to_size(src1_type); 193501e04c3fSmrg 193601e04c3fSmrg ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && 193701e04c3fSmrg dst_type_size == 8 && 193801e04c3fSmrg (src0_type_size != 8 || src1_type_size != 8) && 193901e04c3fSmrg brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, 194001e04c3fSmrg "In Align16 exec size cannot exceed 2 with a QWord destination " 194101e04c3fSmrg "and a non-QWord source"); 194201e04c3fSmrg } 194301e04c3fSmrg 194401e04c3fSmrg /* The PRMs say that for CHV, BXT: 194501e04c3fSmrg * 194601e04c3fSmrg * When source or destination datatype is 64b or operation is integer 194701e04c3fSmrg * DWord multiply, DepCtrl must not be used. 194801e04c3fSmrg * 194901e04c3fSmrg * We assume that the restriction applies to GLK as well. 195001e04c3fSmrg */ 19517ec681f3Smrg if (is_double_precision && 19527ec681f3Smrg (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) { 195301e04c3fSmrg ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || 195401e04c3fSmrg brw_inst_no_dd_clear(devinfo, inst), 195501e04c3fSmrg "DepCtrl is not allowed when the execution type is 64-bit"); 195601e04c3fSmrg } 195701e04c3fSmrg 195801e04c3fSmrg return error_msg; 195901e04c3fSmrg} 196001e04c3fSmrg 19617ec681f3Smrgstatic struct string 19627ec681f3Smrginstruction_restrictions(const struct intel_device_info *devinfo, 19637ec681f3Smrg const brw_inst *inst) 196401e04c3fSmrg{ 19657ec681f3Smrg struct string error_msg = { .str = NULL, .len = 0 }; 196601e04c3fSmrg 19677ec681f3Smrg /* From Wa_1604601757: 19687ec681f3Smrg * 19697ec681f3Smrg * "When multiplying a DW and any lower precision integer, source modifier 19707ec681f3Smrg * is not supported." 19717ec681f3Smrg */ 19727ec681f3Smrg if (devinfo->ver >= 12 && 19737ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) { 19747ec681f3Smrg enum brw_reg_type exec_type = execution_type(devinfo, inst); 19757ec681f3Smrg const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 || 19767ec681f3Smrg brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || 19777ec681f3Smrg !(brw_inst_src0_negate(devinfo, inst) || 19787ec681f3Smrg brw_inst_src0_abs(devinfo, inst)); 19797ec681f3Smrg const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 || 19807ec681f3Smrg brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || 19817ec681f3Smrg !(brw_inst_src1_negate(devinfo, inst) || 19827ec681f3Smrg brw_inst_src1_abs(devinfo, inst)); 19837ec681f3Smrg 19847ec681f3Smrg ERROR_IF(!brw_reg_type_is_floating_point(exec_type) && 19857ec681f3Smrg type_sz(exec_type) == 4 && !(src0_valid && src1_valid), 19867ec681f3Smrg "When multiplying a DW and any lower precision integer, source " 19877ec681f3Smrg "modifier is not supported."); 19887ec681f3Smrg } 198901e04c3fSmrg 19907ec681f3Smrg if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMP || 19917ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMPN) { 19927ec681f3Smrg if (devinfo->ver <= 7) { 19937ec681f3Smrg /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit 19947ec681f3Smrg * ISA) says: 19957ec681f3Smrg * 19967ec681f3Smrg * Accumulator cannot be destination, implicit or explicit. The 19977ec681f3Smrg * destination must be a general register or the null register. 19987ec681f3Smrg * 19997ec681f3Smrg * Page 77 of the Haswell PRM Volume 2b contains the same text. The 20007ec681f3Smrg * 965G PRMs contain similar text. 20017ec681f3Smrg * 20027ec681f3Smrg * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says: 20037ec681f3Smrg * 20047ec681f3Smrg * For the cmp and cmpn instructions, remove the accumulator 20057ec681f3Smrg * restrictions. 20067ec681f3Smrg */ 20077ec681f3Smrg ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 20087ec681f3Smrg brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL, 20097ec681f3Smrg "Accumulator cannot be destination, implicit or explicit."); 201001e04c3fSmrg } 201101e04c3fSmrg 20127ec681f3Smrg /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA) 20137ec681f3Smrg * says: 20147ec681f3Smrg * 20157ec681f3Smrg * If the destination is the null register, the {Switch} instruction 20167ec681f3Smrg * option must be used. 20177ec681f3Smrg * 20187ec681f3Smrg * Page 77 of the Haswell PRM Volume 2b contains the same text. 20197ec681f3Smrg */ 20207ec681f3Smrg if (devinfo->ver == 7) { 20217ec681f3Smrg ERROR_IF(dst_is_null(devinfo, inst) && 20227ec681f3Smrg brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH, 20237ec681f3Smrg "If the destination is the null register, the {Switch} " 20247ec681f3Smrg "instruction option must be used."); 20257ec681f3Smrg } 20267ec681f3Smrg } 20277ec681f3Smrg 20287ec681f3Smrg if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { 20297ec681f3Smrg unsigned math_function = brw_inst_math_function(devinfo, inst); 20307ec681f3Smrg switch (math_function) { 20317ec681f3Smrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 20327ec681f3Smrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 20337ec681f3Smrg case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: { 20347ec681f3Smrg /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says: 20357ec681f3Smrg * INT DIV function does not support source modifiers. 20367ec681f3Smrg * Bspec 6647 extends it back to Ivy Bridge. 20377ec681f3Smrg */ 20387ec681f3Smrg bool src0_valid = !brw_inst_src0_negate(devinfo, inst) && 20397ec681f3Smrg !brw_inst_src0_abs(devinfo, inst); 20407ec681f3Smrg bool src1_valid = !brw_inst_src1_negate(devinfo, inst) && 20417ec681f3Smrg !brw_inst_src1_abs(devinfo, inst); 20427ec681f3Smrg ERROR_IF(!src0_valid || !src1_valid, 20437ec681f3Smrg "INT DIV function does not support source modifiers."); 20447ec681f3Smrg break; 20457ec681f3Smrg } 20467ec681f3Smrg default: 20477ec681f3Smrg break; 20487ec681f3Smrg } 20497ec681f3Smrg } 20507ec681f3Smrg 20517ec681f3Smrg if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DP4A) { 20527ec681f3Smrg /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says: 20537ec681f3Smrg * 20547ec681f3Smrg * Only one of src0 or src1 operand may be an the (sic) accumulator 20557ec681f3Smrg * register (acc#). 20567ec681f3Smrg */ 20577ec681f3Smrg ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst), 20587ec681f3Smrg "Only one of src0 or src1 operand may be an accumulator " 20597ec681f3Smrg "register (acc#)."); 20607ec681f3Smrg 20617ec681f3Smrg } 20627ec681f3Smrg 20637ec681f3Smrg return error_msg; 20647ec681f3Smrg} 20657ec681f3Smrg 20667ec681f3Smrgstatic struct string 20677ec681f3Smrgsend_descriptor_restrictions(const struct intel_device_info *devinfo, 20687ec681f3Smrg const brw_inst *inst) 20697ec681f3Smrg{ 20707ec681f3Smrg struct string error_msg = { .str = NULL, .len = 0 }; 20717ec681f3Smrg 20727ec681f3Smrg if (inst_is_split_send(devinfo, inst)) { 20737ec681f3Smrg /* We can only validate immediate descriptors */ 20747ec681f3Smrg if (brw_inst_send_sel_reg32_desc(devinfo, inst)) 20757ec681f3Smrg return error_msg; 20767ec681f3Smrg } else if (inst_is_send(devinfo, inst)) { 20777ec681f3Smrg /* We can only validate immediate descriptors */ 20787ec681f3Smrg if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) 20797ec681f3Smrg return error_msg; 20807ec681f3Smrg } else { 20817ec681f3Smrg return error_msg; 20827ec681f3Smrg } 20837ec681f3Smrg 20847ec681f3Smrg const uint32_t desc = brw_inst_send_desc(devinfo, inst); 20857ec681f3Smrg 20867ec681f3Smrg switch (brw_inst_sfid(devinfo, inst)) { 20877ec681f3Smrg case GFX12_SFID_TGM: 20887ec681f3Smrg case GFX12_SFID_SLM: 20897ec681f3Smrg case GFX12_SFID_UGM: 20907ec681f3Smrg ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC"); 20917ec681f3Smrg 20927ec681f3Smrg ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) && 20937ec681f3Smrg lsc_msg_desc_transpose(devinfo, desc) && 20947ec681f3Smrg brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1, 20957ec681f3Smrg "Transposed vectors are restricted to Exec_Mask = 1."); 20967ec681f3Smrg break; 20977ec681f3Smrg 20987ec681f3Smrg default: 20997ec681f3Smrg break; 21007ec681f3Smrg } 21017ec681f3Smrg 21027ec681f3Smrg return error_msg; 21037ec681f3Smrg} 21047ec681f3Smrg 21057ec681f3Smrgbool 21067ec681f3Smrgbrw_validate_instruction(const struct intel_device_info *devinfo, 21077ec681f3Smrg const brw_inst *inst, int offset, 21087ec681f3Smrg struct disasm_info *disasm) 21097ec681f3Smrg{ 21107ec681f3Smrg struct string error_msg = { .str = NULL, .len = 0 }; 21117ec681f3Smrg 21127ec681f3Smrg if (is_unsupported_inst(devinfo, inst)) { 21137ec681f3Smrg ERROR("Instruction not supported on this Gen"); 21147ec681f3Smrg } else { 21157ec681f3Smrg CHECK(invalid_values); 21167ec681f3Smrg 21177ec681f3Smrg if (error_msg.str == NULL) { 211801e04c3fSmrg CHECK(sources_not_null); 211901e04c3fSmrg CHECK(send_restrictions); 21209f464c52Smaya CHECK(alignment_supported); 212101e04c3fSmrg CHECK(general_restrictions_based_on_operand_types); 212201e04c3fSmrg CHECK(general_restrictions_on_region_parameters); 21239f464c52Smaya CHECK(special_restrictions_for_mixed_float_mode); 212401e04c3fSmrg CHECK(region_alignment_rules); 212501e04c3fSmrg CHECK(vector_immediate_restrictions); 212601e04c3fSmrg CHECK(special_requirements_for_handling_double_precision_data_types); 21277ec681f3Smrg CHECK(instruction_restrictions); 21287ec681f3Smrg CHECK(send_descriptor_restrictions); 212901e04c3fSmrg } 21307ec681f3Smrg } 213101e04c3fSmrg 21327ec681f3Smrg if (error_msg.str && disasm) { 21337ec681f3Smrg disasm_insert_error(disasm, offset, error_msg.str); 21347ec681f3Smrg } 21357ec681f3Smrg free(error_msg.str); 21367ec681f3Smrg 21377ec681f3Smrg return error_msg.len == 0; 21387ec681f3Smrg} 21397ec681f3Smrg 21407ec681f3Smrgbool 21417ec681f3Smrgbrw_validate_instructions(const struct intel_device_info *devinfo, 21427ec681f3Smrg const void *assembly, int start_offset, int end_offset, 21437ec681f3Smrg struct disasm_info *disasm) 21447ec681f3Smrg{ 21457ec681f3Smrg bool valid = true; 21467ec681f3Smrg 21477ec681f3Smrg for (int src_offset = start_offset; src_offset < end_offset;) { 21487ec681f3Smrg const brw_inst *inst = assembly + src_offset; 21497ec681f3Smrg bool is_compact = brw_inst_cmpt_control(devinfo, inst); 21507ec681f3Smrg unsigned inst_size = is_compact ? sizeof(brw_compact_inst) 21517ec681f3Smrg : sizeof(brw_inst); 21527ec681f3Smrg brw_inst uncompacted; 215301e04c3fSmrg 215401e04c3fSmrg if (is_compact) { 21557ec681f3Smrg brw_compact_inst *compacted = (void *)inst; 21567ec681f3Smrg brw_uncompact_instruction(devinfo, &uncompacted, compacted); 21577ec681f3Smrg inst = &uncompacted; 215801e04c3fSmrg } 21597ec681f3Smrg 21607ec681f3Smrg bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm); 21617ec681f3Smrg valid = valid && v; 21627ec681f3Smrg 21637ec681f3Smrg src_offset += inst_size; 216401e04c3fSmrg } 216501e04c3fSmrg 216601e04c3fSmrg return valid; 216701e04c3fSmrg} 2168