101e04c3fSmrg/* 201e04c3fSmrg Copyright (C) Intel Corp. 2006. All Rights Reserved. 301e04c3fSmrg Intel funded Tungsten Graphics to 401e04c3fSmrg develop this 3D driver. 501e04c3fSmrg 601e04c3fSmrg Permission is hereby granted, free of charge, to any person obtaining 701e04c3fSmrg a copy of this software and associated documentation files (the 801e04c3fSmrg "Software"), to deal in the Software without restriction, including 901e04c3fSmrg without limitation the rights to use, copy, modify, merge, publish, 1001e04c3fSmrg distribute, sublicense, and/or sell copies of the Software, and to 1101e04c3fSmrg permit persons to whom the Software is furnished to do so, subject to 1201e04c3fSmrg the following conditions: 1301e04c3fSmrg 1401e04c3fSmrg The above copyright notice and this permission notice (including the 1501e04c3fSmrg next paragraph) shall be included in all copies or substantial 1601e04c3fSmrg portions of the Software. 1701e04c3fSmrg 1801e04c3fSmrg THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 1901e04c3fSmrg EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2001e04c3fSmrg MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 2101e04c3fSmrg IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 2201e04c3fSmrg LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 2301e04c3fSmrg OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 2401e04c3fSmrg WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2501e04c3fSmrg 2601e04c3fSmrg **********************************************************************/ 2701e04c3fSmrg /* 2801e04c3fSmrg * Authors: 2901e04c3fSmrg * Keith Whitwell <keithw@vmware.com> 3001e04c3fSmrg */ 3101e04c3fSmrg 3201e04c3fSmrg 3301e04c3fSmrg#include "brw_eu_defines.h" 3401e04c3fSmrg#include "brw_eu.h" 3501e04c3fSmrg 3601e04c3fSmrg#include "util/ralloc.h" 3701e04c3fSmrg 3801e04c3fSmrg/** 3901e04c3fSmrg * Prior to Sandybridge, the SEND instruction accepted non-MRF source 4001e04c3fSmrg * registers, implicitly moving the operand to a message register. 4101e04c3fSmrg * 4201e04c3fSmrg * On Sandybridge, this is no longer the case. This function performs the 4301e04c3fSmrg * explicit move; it should be called before emitting a SEND instruction. 4401e04c3fSmrg */ 4501e04c3fSmrgvoid 467ec681f3Smrggfx6_resolve_implied_move(struct brw_codegen *p, 4701e04c3fSmrg struct brw_reg *src, 4801e04c3fSmrg unsigned msg_reg_nr) 4901e04c3fSmrg{ 507ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 517ec681f3Smrg if (devinfo->ver < 6) 5201e04c3fSmrg return; 5301e04c3fSmrg 5401e04c3fSmrg if (src->file == BRW_MESSAGE_REGISTER_FILE) 5501e04c3fSmrg return; 5601e04c3fSmrg 5701e04c3fSmrg if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 587ec681f3Smrg assert(devinfo->ver < 12); 5901e04c3fSmrg brw_push_insn_state(p); 6001e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_8); 6101e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 6201e04c3fSmrg brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); 6301e04c3fSmrg brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 6401e04c3fSmrg retype(*src, BRW_REGISTER_TYPE_UD)); 6501e04c3fSmrg brw_pop_insn_state(p); 6601e04c3fSmrg } 6701e04c3fSmrg *src = brw_message_reg(msg_reg_nr); 6801e04c3fSmrg} 6901e04c3fSmrg 7001e04c3fSmrgstatic void 717ec681f3Smrggfx7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg) 7201e04c3fSmrg{ 7301e04c3fSmrg /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): 7401e04c3fSmrg * "The send with EOT should use register space R112-R127 for <src>. This is 7501e04c3fSmrg * to enable loading of a new thread into the same slot while the message 7601e04c3fSmrg * with EOT for current thread is pending dispatch." 7701e04c3fSmrg * 7801e04c3fSmrg * Since we're pretending to have 16 MRFs anyway, we may as well use the 7901e04c3fSmrg * registers required for messages with EOT. 8001e04c3fSmrg */ 817ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 827ec681f3Smrg if (devinfo->ver >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 8301e04c3fSmrg reg->file = BRW_GENERAL_REGISTER_FILE; 847ec681f3Smrg reg->nr += GFX7_MRF_HACK_START; 8501e04c3fSmrg } 8601e04c3fSmrg} 8701e04c3fSmrg 8801e04c3fSmrgvoid 8901e04c3fSmrgbrw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) 9001e04c3fSmrg{ 917ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 9201e04c3fSmrg 9301e04c3fSmrg if (dest.file == BRW_MESSAGE_REGISTER_FILE) 947ec681f3Smrg assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver)); 959f464c52Smaya else if (dest.file == BRW_GENERAL_REGISTER_FILE) 9601e04c3fSmrg assert(dest.nr < 128); 9701e04c3fSmrg 987ec681f3Smrg /* The hardware has a restriction where a destination of size Byte with 997ec681f3Smrg * a stride of 1 is only allowed for a packed byte MOV. For any other 1007ec681f3Smrg * instruction, the stride must be at least 2, even when the destination 1017ec681f3Smrg * is the NULL register. 1029f464c52Smaya */ 1039f464c52Smaya if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1049f464c52Smaya dest.nr == BRW_ARF_NULL && 1057ec681f3Smrg type_sz(dest.type) == 1 && 1067ec681f3Smrg dest.hstride == BRW_HORIZONTAL_STRIDE_1) { 1079f464c52Smaya dest.hstride = BRW_HORIZONTAL_STRIDE_2; 1089f464c52Smaya } 10901e04c3fSmrg 1107ec681f3Smrg gfx7_convert_mrf_to_grf(p, &dest); 11101e04c3fSmrg 1127ec681f3Smrg if (devinfo->ver >= 12 && 1137ec681f3Smrg (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || 1147ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { 1157ec681f3Smrg assert(dest.file == BRW_GENERAL_REGISTER_FILE || 1167ec681f3Smrg dest.file == BRW_ARCHITECTURE_REGISTER_FILE); 1177ec681f3Smrg assert(dest.address_mode == BRW_ADDRESS_DIRECT); 1187ec681f3Smrg assert(dest.subnr == 0); 1197ec681f3Smrg assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 || 1207ec681f3Smrg (dest.hstride == BRW_HORIZONTAL_STRIDE_1 && 1217ec681f3Smrg dest.vstride == dest.width + 1)); 1227ec681f3Smrg assert(!dest.negate && !dest.abs); 1237ec681f3Smrg brw_inst_set_dst_reg_file(devinfo, inst, dest.file); 1247ec681f3Smrg brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); 1257ec681f3Smrg 1267ec681f3Smrg } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || 1277ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { 1287ec681f3Smrg assert(devinfo->ver < 12); 1299f464c52Smaya assert(dest.file == BRW_GENERAL_REGISTER_FILE || 1309f464c52Smaya dest.file == BRW_ARCHITECTURE_REGISTER_FILE); 1319f464c52Smaya assert(dest.address_mode == BRW_ADDRESS_DIRECT); 1329f464c52Smaya assert(dest.subnr % 16 == 0); 1339f464c52Smaya assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && 1349f464c52Smaya dest.vstride == dest.width + 1); 1359f464c52Smaya assert(!dest.negate && !dest.abs); 13601e04c3fSmrg brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); 1379f464c52Smaya brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); 1389f464c52Smaya brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); 13901e04c3fSmrg } else { 1409f464c52Smaya brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); 1419f464c52Smaya brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); 14201e04c3fSmrg 1439f464c52Smaya if (dest.address_mode == BRW_ADDRESS_DIRECT) { 1449f464c52Smaya brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); 1459f464c52Smaya 1469f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 1479f464c52Smaya brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); 1489f464c52Smaya if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 1499f464c52Smaya dest.hstride = BRW_HORIZONTAL_STRIDE_1; 1509f464c52Smaya brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); 1519f464c52Smaya } else { 1529f464c52Smaya brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); 1539f464c52Smaya brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); 1549f464c52Smaya if (dest.file == BRW_GENERAL_REGISTER_FILE || 1559f464c52Smaya dest.file == BRW_MESSAGE_REGISTER_FILE) { 1569f464c52Smaya assert(dest.writemask != 0); 1579f464c52Smaya } 1589f464c52Smaya /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: 1599f464c52Smaya * Although Dst.HorzStride is a don't care for Align16, HW needs 1609f464c52Smaya * this to be programmed as "01". 1619f464c52Smaya */ 1629f464c52Smaya brw_inst_set_dst_hstride(devinfo, inst, 1); 1639f464c52Smaya } 16401e04c3fSmrg } else { 1659f464c52Smaya brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); 1669f464c52Smaya 1679f464c52Smaya /* These are different sizes in align1 vs align16: 1689f464c52Smaya */ 1699f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 1709f464c52Smaya brw_inst_set_dst_ia1_addr_imm(devinfo, inst, 1719f464c52Smaya dest.indirect_offset); 1729f464c52Smaya if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 1739f464c52Smaya dest.hstride = BRW_HORIZONTAL_STRIDE_1; 1749f464c52Smaya brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); 1759f464c52Smaya } else { 1769f464c52Smaya brw_inst_set_dst_ia16_addr_imm(devinfo, inst, 1779f464c52Smaya dest.indirect_offset); 1789f464c52Smaya /* even ignored in da16, still need to set as '01' */ 1799f464c52Smaya brw_inst_set_dst_hstride(devinfo, inst, 1); 1809f464c52Smaya } 18101e04c3fSmrg } 18201e04c3fSmrg } 18301e04c3fSmrg 18401e04c3fSmrg /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8) 18501e04c3fSmrg * or 16 (SIMD16), as that's normally correct. However, when dealing with 18601e04c3fSmrg * small registers, it can be useful for us to automatically reduce it to 18701e04c3fSmrg * match the register size. 18801e04c3fSmrg */ 18901e04c3fSmrg if (p->automatic_exec_sizes) { 19001e04c3fSmrg /* 19101e04c3fSmrg * In platforms that support fp64 we can emit instructions with a width 19201e04c3fSmrg * of 4 that need two SIMD8 registers and an exec_size of 8 or 16. In 19301e04c3fSmrg * these cases we need to make sure that these instructions have their 19401e04c3fSmrg * exec sizes set properly when they are emitted and we can't rely on 19501e04c3fSmrg * this code to fix it. 19601e04c3fSmrg */ 19701e04c3fSmrg bool fix_exec_size; 1987ec681f3Smrg if (devinfo->ver >= 6) 19901e04c3fSmrg fix_exec_size = dest.width < BRW_EXECUTE_4; 20001e04c3fSmrg else 20101e04c3fSmrg fix_exec_size = dest.width < BRW_EXECUTE_8; 20201e04c3fSmrg 20301e04c3fSmrg if (fix_exec_size) 20401e04c3fSmrg brw_inst_set_exec_size(devinfo, inst, dest.width); 20501e04c3fSmrg } 20601e04c3fSmrg} 20701e04c3fSmrg 20801e04c3fSmrgvoid 20901e04c3fSmrgbrw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) 21001e04c3fSmrg{ 2117ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 21201e04c3fSmrg 21301e04c3fSmrg if (reg.file == BRW_MESSAGE_REGISTER_FILE) 2147ec681f3Smrg assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->ver)); 2159f464c52Smaya else if (reg.file == BRW_GENERAL_REGISTER_FILE) 21601e04c3fSmrg assert(reg.nr < 128); 21701e04c3fSmrg 2187ec681f3Smrg gfx7_convert_mrf_to_grf(p, ®); 21901e04c3fSmrg 2207ec681f3Smrg if (devinfo->ver >= 6 && 2219f464c52Smaya (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || 2229f464c52Smaya brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || 2239f464c52Smaya brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || 2249f464c52Smaya brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { 22501e04c3fSmrg /* Any source modifiers or regions will be ignored, since this just 22601e04c3fSmrg * identifies the MRF/GRF to start reading the message contents from. 22701e04c3fSmrg * Check for some likely failures. 22801e04c3fSmrg */ 22901e04c3fSmrg assert(!reg.negate); 23001e04c3fSmrg assert(!reg.abs); 23101e04c3fSmrg assert(reg.address_mode == BRW_ADDRESS_DIRECT); 23201e04c3fSmrg } 23301e04c3fSmrg 2347ec681f3Smrg if (devinfo->ver >= 12 && 2357ec681f3Smrg (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || 2367ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { 2377ec681f3Smrg assert(reg.file != BRW_IMMEDIATE_VALUE); 2387ec681f3Smrg assert(reg.address_mode == BRW_ADDRESS_DIRECT); 2397ec681f3Smrg assert(reg.subnr == 0); 2407ec681f3Smrg assert(has_scalar_region(reg) || 2417ec681f3Smrg (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && 2427ec681f3Smrg reg.vstride == reg.width + 1)); 2437ec681f3Smrg assert(!reg.negate && !reg.abs); 2447ec681f3Smrg brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file); 2457ec681f3Smrg brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); 2467ec681f3Smrg 2477ec681f3Smrg } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || 2487ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { 2499f464c52Smaya assert(reg.file == BRW_GENERAL_REGISTER_FILE); 2509f464c52Smaya assert(reg.address_mode == BRW_ADDRESS_DIRECT); 2519f464c52Smaya assert(reg.subnr % 16 == 0); 2527ec681f3Smrg assert(has_scalar_region(reg) || 2537ec681f3Smrg (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && 2547ec681f3Smrg reg.vstride == reg.width + 1)); 2559f464c52Smaya assert(!reg.negate && !reg.abs); 2569f464c52Smaya brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); 2579f464c52Smaya brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); 25801e04c3fSmrg } else { 2599f464c52Smaya brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); 2609f464c52Smaya brw_inst_set_src0_abs(devinfo, inst, reg.abs); 2619f464c52Smaya brw_inst_set_src0_negate(devinfo, inst, reg.negate); 2629f464c52Smaya brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); 2639f464c52Smaya 2649f464c52Smaya if (reg.file == BRW_IMMEDIATE_VALUE) { 2659f464c52Smaya if (reg.type == BRW_REGISTER_TYPE_DF || 2669f464c52Smaya brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) 2679f464c52Smaya brw_inst_set_imm_df(devinfo, inst, reg.df); 2689f464c52Smaya else if (reg.type == BRW_REGISTER_TYPE_UQ || 2699f464c52Smaya reg.type == BRW_REGISTER_TYPE_Q) 2709f464c52Smaya brw_inst_set_imm_uq(devinfo, inst, reg.u64); 2719f464c52Smaya else 2729f464c52Smaya brw_inst_set_imm_ud(devinfo, inst, reg.ud); 2739f464c52Smaya 2747ec681f3Smrg if (devinfo->ver < 12 && type_sz(reg.type) < 8) { 2759f464c52Smaya brw_inst_set_src1_reg_file(devinfo, inst, 2769f464c52Smaya BRW_ARCHITECTURE_REGISTER_FILE); 2779f464c52Smaya brw_inst_set_src1_reg_hw_type(devinfo, inst, 2789f464c52Smaya brw_inst_src0_reg_hw_type(devinfo, inst)); 2799f464c52Smaya } 28001e04c3fSmrg } else { 2819f464c52Smaya if (reg.address_mode == BRW_ADDRESS_DIRECT) { 2829f464c52Smaya brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); 2839f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 2849f464c52Smaya brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); 2859f464c52Smaya } else { 2869f464c52Smaya brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); 2879f464c52Smaya } 2889f464c52Smaya } else { 2899f464c52Smaya brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); 29001e04c3fSmrg 2919f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 2929f464c52Smaya brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); 2939f464c52Smaya } else { 2949f464c52Smaya brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); 2959f464c52Smaya } 2969f464c52Smaya } 29701e04c3fSmrg 2989f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 2999f464c52Smaya if (reg.width == BRW_WIDTH_1 && 3009f464c52Smaya brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { 3019f464c52Smaya brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); 3029f464c52Smaya brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); 3039f464c52Smaya brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); 3049f464c52Smaya } else { 3059f464c52Smaya brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); 3069f464c52Smaya brw_inst_set_src0_width(devinfo, inst, reg.width); 3079f464c52Smaya brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); 3089f464c52Smaya } 30901e04c3fSmrg } else { 3109f464c52Smaya brw_inst_set_src0_da16_swiz_x(devinfo, inst, 3119f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); 3129f464c52Smaya brw_inst_set_src0_da16_swiz_y(devinfo, inst, 3139f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); 3149f464c52Smaya brw_inst_set_src0_da16_swiz_z(devinfo, inst, 3159f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); 3169f464c52Smaya brw_inst_set_src0_da16_swiz_w(devinfo, inst, 3179f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); 3189f464c52Smaya 3199f464c52Smaya if (reg.vstride == BRW_VERTICAL_STRIDE_8) { 3209f464c52Smaya /* This is an oddity of the fact we're using the same 3219f464c52Smaya * descriptions for registers in align_16 as align_1: 3229f464c52Smaya */ 3239f464c52Smaya brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); 3247ec681f3Smrg } else if (devinfo->verx10 == 70 && 3259f464c52Smaya reg.type == BRW_REGISTER_TYPE_DF && 3269f464c52Smaya reg.vstride == BRW_VERTICAL_STRIDE_2) { 3279f464c52Smaya /* From SNB PRM: 3289f464c52Smaya * 3299f464c52Smaya * "For Align16 access mode, only encodings of 0000 and 0011 3309f464c52Smaya * are allowed. Other codes are reserved." 3319f464c52Smaya * 3329f464c52Smaya * Presumably the DevSNB behavior applies to IVB as well. 3339f464c52Smaya */ 3349f464c52Smaya brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); 3359f464c52Smaya } else { 3369f464c52Smaya brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); 3379f464c52Smaya } 33801e04c3fSmrg } 33901e04c3fSmrg } 34001e04c3fSmrg } 34101e04c3fSmrg} 34201e04c3fSmrg 34301e04c3fSmrg 34401e04c3fSmrgvoid 34501e04c3fSmrgbrw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) 34601e04c3fSmrg{ 3477ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 34801e04c3fSmrg 3499f464c52Smaya if (reg.file == BRW_GENERAL_REGISTER_FILE) 35001e04c3fSmrg assert(reg.nr < 128); 35101e04c3fSmrg 3529f464c52Smaya if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || 3537ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC || 3547ec681f3Smrg (devinfo->ver >= 12 && 3557ec681f3Smrg (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || 3567ec681f3Smrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) { 3579f464c52Smaya assert(reg.file == BRW_GENERAL_REGISTER_FILE || 3589f464c52Smaya reg.file == BRW_ARCHITECTURE_REGISTER_FILE); 3599f464c52Smaya assert(reg.address_mode == BRW_ADDRESS_DIRECT); 3609f464c52Smaya assert(reg.subnr == 0); 3617ec681f3Smrg assert(has_scalar_region(reg) || 3627ec681f3Smrg (reg.hstride == BRW_HORIZONTAL_STRIDE_1 && 3637ec681f3Smrg reg.vstride == reg.width + 1)); 3649f464c52Smaya assert(!reg.negate && !reg.abs); 3659f464c52Smaya brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); 3669f464c52Smaya brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); 3679f464c52Smaya } else { 3689f464c52Smaya /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: 3699f464c52Smaya * 3709f464c52Smaya * "Accumulator registers may be accessed explicitly as src0 3719f464c52Smaya * operands only." 3729f464c52Smaya */ 3739f464c52Smaya assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || 3749f464c52Smaya reg.nr != BRW_ARF_ACCUMULATOR); 37501e04c3fSmrg 3767ec681f3Smrg gfx7_convert_mrf_to_grf(p, ®); 3779f464c52Smaya assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 37801e04c3fSmrg 3799f464c52Smaya brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); 3809f464c52Smaya brw_inst_set_src1_abs(devinfo, inst, reg.abs); 3819f464c52Smaya brw_inst_set_src1_negate(devinfo, inst, reg.negate); 38201e04c3fSmrg 3839f464c52Smaya /* Only src1 can be immediate in two-argument instructions. 38401e04c3fSmrg */ 3859f464c52Smaya assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); 38601e04c3fSmrg 3879f464c52Smaya if (reg.file == BRW_IMMEDIATE_VALUE) { 3889f464c52Smaya /* two-argument instructions can only use 32-bit immediates */ 3899f464c52Smaya assert(type_sz(reg.type) < 8); 3909f464c52Smaya brw_inst_set_imm_ud(devinfo, inst, reg.ud); 39101e04c3fSmrg } else { 3929f464c52Smaya /* This is a hardware restriction, which may or may not be lifted 3939f464c52Smaya * in the future: 3949f464c52Smaya */ 3959f464c52Smaya assert (reg.address_mode == BRW_ADDRESS_DIRECT); 3969f464c52Smaya /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 39701e04c3fSmrg 3989f464c52Smaya brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); 3999f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 4009f464c52Smaya brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); 4019f464c52Smaya } else { 4029f464c52Smaya brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); 4039f464c52Smaya } 4049f464c52Smaya 4059f464c52Smaya if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 4069f464c52Smaya if (reg.width == BRW_WIDTH_1 && 4079f464c52Smaya brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { 4089f464c52Smaya brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); 4099f464c52Smaya brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); 4109f464c52Smaya brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); 4119f464c52Smaya } else { 4129f464c52Smaya brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); 4139f464c52Smaya brw_inst_set_src1_width(devinfo, inst, reg.width); 4149f464c52Smaya brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); 4159f464c52Smaya } 41601e04c3fSmrg } else { 4179f464c52Smaya brw_inst_set_src1_da16_swiz_x(devinfo, inst, 4189f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); 4199f464c52Smaya brw_inst_set_src1_da16_swiz_y(devinfo, inst, 4209f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); 4219f464c52Smaya brw_inst_set_src1_da16_swiz_z(devinfo, inst, 4229f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); 4239f464c52Smaya brw_inst_set_src1_da16_swiz_w(devinfo, inst, 4249f464c52Smaya BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); 4259f464c52Smaya 4269f464c52Smaya if (reg.vstride == BRW_VERTICAL_STRIDE_8) { 4279f464c52Smaya /* This is an oddity of the fact we're using the same 4289f464c52Smaya * descriptions for registers in align_16 as align_1: 4299f464c52Smaya */ 4309f464c52Smaya brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); 4317ec681f3Smrg } else if (devinfo->verx10 == 70 && 4329f464c52Smaya reg.type == BRW_REGISTER_TYPE_DF && 4339f464c52Smaya reg.vstride == BRW_VERTICAL_STRIDE_2) { 4349f464c52Smaya /* From SNB PRM: 4359f464c52Smaya * 4369f464c52Smaya * "For Align16 access mode, only encodings of 0000 and 0011 4379f464c52Smaya * are allowed. Other codes are reserved." 4389f464c52Smaya * 4399f464c52Smaya * Presumably the DevSNB behavior applies to IVB as well. 4409f464c52Smaya */ 4419f464c52Smaya brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); 4429f464c52Smaya } else { 4439f464c52Smaya brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); 4449f464c52Smaya } 44501e04c3fSmrg } 44601e04c3fSmrg } 44701e04c3fSmrg } 44801e04c3fSmrg} 44901e04c3fSmrg 45001e04c3fSmrg/** 45101e04c3fSmrg * Specify the descriptor and extended descriptor immediate for a SEND(C) 45201e04c3fSmrg * message instruction. 45301e04c3fSmrg */ 45401e04c3fSmrgvoid 45501e04c3fSmrgbrw_set_desc_ex(struct brw_codegen *p, brw_inst *inst, 45601e04c3fSmrg unsigned desc, unsigned ex_desc) 45701e04c3fSmrg{ 4587ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 45901e04c3fSmrg assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || 46001e04c3fSmrg brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC); 4617ec681f3Smrg if (devinfo->ver < 12) 4627ec681f3Smrg brw_inst_set_src1_file_type(devinfo, inst, 4637ec681f3Smrg BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD); 46401e04c3fSmrg brw_inst_set_send_desc(devinfo, inst, desc); 4657ec681f3Smrg if (devinfo->ver >= 9) 46601e04c3fSmrg brw_inst_set_send_ex_desc(devinfo, inst, ex_desc); 46701e04c3fSmrg} 46801e04c3fSmrg 46901e04c3fSmrgstatic void brw_set_math_message( struct brw_codegen *p, 47001e04c3fSmrg brw_inst *inst, 47101e04c3fSmrg unsigned function, 47201e04c3fSmrg unsigned integer_type, 47301e04c3fSmrg bool low_precision, 47401e04c3fSmrg unsigned dataType ) 47501e04c3fSmrg{ 4767ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 47701e04c3fSmrg unsigned msg_length; 47801e04c3fSmrg unsigned response_length; 47901e04c3fSmrg 48001e04c3fSmrg /* Infer message length from the function */ 48101e04c3fSmrg switch (function) { 48201e04c3fSmrg case BRW_MATH_FUNCTION_POW: 48301e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 48401e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 48501e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 48601e04c3fSmrg msg_length = 2; 48701e04c3fSmrg break; 48801e04c3fSmrg default: 48901e04c3fSmrg msg_length = 1; 49001e04c3fSmrg break; 49101e04c3fSmrg } 49201e04c3fSmrg 49301e04c3fSmrg /* Infer response length from the function */ 49401e04c3fSmrg switch (function) { 49501e04c3fSmrg case BRW_MATH_FUNCTION_SINCOS: 49601e04c3fSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 49701e04c3fSmrg response_length = 2; 49801e04c3fSmrg break; 49901e04c3fSmrg default: 50001e04c3fSmrg response_length = 1; 50101e04c3fSmrg break; 50201e04c3fSmrg } 50301e04c3fSmrg 50401e04c3fSmrg brw_set_desc(p, inst, brw_message_desc( 50501e04c3fSmrg devinfo, msg_length, response_length, false)); 50601e04c3fSmrg 50701e04c3fSmrg brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH); 50801e04c3fSmrg brw_inst_set_math_msg_function(devinfo, inst, function); 50901e04c3fSmrg brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type); 51001e04c3fSmrg brw_inst_set_math_msg_precision(devinfo, inst, low_precision); 51101e04c3fSmrg brw_inst_set_math_msg_saturate(devinfo, inst, brw_inst_saturate(devinfo, inst)); 51201e04c3fSmrg brw_inst_set_math_msg_data_type(devinfo, inst, dataType); 51301e04c3fSmrg brw_inst_set_saturate(devinfo, inst, 0); 51401e04c3fSmrg} 51501e04c3fSmrg 51601e04c3fSmrg 51701e04c3fSmrgstatic void brw_set_ff_sync_message(struct brw_codegen *p, 51801e04c3fSmrg brw_inst *insn, 51901e04c3fSmrg bool allocate, 52001e04c3fSmrg unsigned response_length, 52101e04c3fSmrg bool end_of_thread) 52201e04c3fSmrg{ 5237ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 52401e04c3fSmrg 52501e04c3fSmrg brw_set_desc(p, insn, brw_message_desc( 52601e04c3fSmrg devinfo, 1, response_length, true)); 52701e04c3fSmrg 52801e04c3fSmrg brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB); 52901e04c3fSmrg brw_inst_set_eot(devinfo, insn, end_of_thread); 53001e04c3fSmrg brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */ 53101e04c3fSmrg brw_inst_set_urb_allocate(devinfo, insn, allocate); 53201e04c3fSmrg /* The following fields are not used by FF_SYNC: */ 53301e04c3fSmrg brw_inst_set_urb_global_offset(devinfo, insn, 0); 53401e04c3fSmrg brw_inst_set_urb_swizzle_control(devinfo, insn, 0); 53501e04c3fSmrg brw_inst_set_urb_used(devinfo, insn, 0); 53601e04c3fSmrg brw_inst_set_urb_complete(devinfo, insn, 0); 53701e04c3fSmrg} 53801e04c3fSmrg 53901e04c3fSmrgstatic void brw_set_urb_message( struct brw_codegen *p, 54001e04c3fSmrg brw_inst *insn, 54101e04c3fSmrg enum brw_urb_write_flags flags, 54201e04c3fSmrg unsigned msg_length, 54301e04c3fSmrg unsigned response_length, 54401e04c3fSmrg unsigned offset, 54501e04c3fSmrg unsigned swizzle_control ) 54601e04c3fSmrg{ 5477ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 54801e04c3fSmrg 5497ec681f3Smrg assert(devinfo->ver < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 5507ec681f3Smrg assert(devinfo->ver < 7 || !(flags & BRW_URB_WRITE_ALLOCATE)); 5517ec681f3Smrg assert(devinfo->ver >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); 55201e04c3fSmrg 55301e04c3fSmrg brw_set_desc(p, insn, brw_message_desc( 55401e04c3fSmrg devinfo, msg_length, response_length, true)); 55501e04c3fSmrg 55601e04c3fSmrg brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB); 55701e04c3fSmrg brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT)); 55801e04c3fSmrg 55901e04c3fSmrg if (flags & BRW_URB_WRITE_OWORD) { 56001e04c3fSmrg assert(msg_length == 2); /* header + one OWORD of data */ 56101e04c3fSmrg brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD); 56201e04c3fSmrg } else { 56301e04c3fSmrg brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD); 56401e04c3fSmrg } 56501e04c3fSmrg 56601e04c3fSmrg brw_inst_set_urb_global_offset(devinfo, insn, offset); 56701e04c3fSmrg brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control); 56801e04c3fSmrg 5697ec681f3Smrg if (devinfo->ver < 8) { 57001e04c3fSmrg brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE)); 57101e04c3fSmrg } 57201e04c3fSmrg 5737ec681f3Smrg if (devinfo->ver < 7) { 57401e04c3fSmrg brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE)); 57501e04c3fSmrg brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED)); 57601e04c3fSmrg } else { 57701e04c3fSmrg brw_inst_set_urb_per_slot_offset(devinfo, insn, 57801e04c3fSmrg !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); 57901e04c3fSmrg } 58001e04c3fSmrg} 58101e04c3fSmrg 58201e04c3fSmrgstatic void 5837ec681f3Smrggfx7_set_dp_scratch_message(struct brw_codegen *p, 58401e04c3fSmrg brw_inst *inst, 58501e04c3fSmrg bool write, 58601e04c3fSmrg bool dword, 58701e04c3fSmrg bool invalidate_after_read, 58801e04c3fSmrg unsigned num_regs, 58901e04c3fSmrg unsigned addr_offset, 59001e04c3fSmrg unsigned mlen, 59101e04c3fSmrg unsigned rlen, 59201e04c3fSmrg bool header_present) 59301e04c3fSmrg{ 5947ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 59501e04c3fSmrg assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || 5967ec681f3Smrg (devinfo->ver >= 8 && num_regs == 8)); 5977ec681f3Smrg const unsigned block_size = (devinfo->ver >= 8 ? util_logbase2(num_regs) : 59801e04c3fSmrg num_regs - 1); 59901e04c3fSmrg 60001e04c3fSmrg brw_set_desc(p, inst, brw_message_desc( 60101e04c3fSmrg devinfo, mlen, rlen, header_present)); 60201e04c3fSmrg 6037ec681f3Smrg brw_inst_set_sfid(devinfo, inst, GFX7_SFID_DATAPORT_DATA_CACHE); 60401e04c3fSmrg brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */ 60501e04c3fSmrg brw_inst_set_scratch_read_write(devinfo, inst, write); 60601e04c3fSmrg brw_inst_set_scratch_type(devinfo, inst, dword); 60701e04c3fSmrg brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read); 60801e04c3fSmrg brw_inst_set_scratch_block_size(devinfo, inst, block_size); 60901e04c3fSmrg brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset); 61001e04c3fSmrg} 61101e04c3fSmrg 61201e04c3fSmrgstatic void 6137ec681f3Smrgbrw_inst_set_state(const struct intel_device_info *devinfo, 61401e04c3fSmrg brw_inst *insn, 61501e04c3fSmrg const struct brw_insn_state *state) 61601e04c3fSmrg{ 61701e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, state->exec_size); 61801e04c3fSmrg brw_inst_set_group(devinfo, insn, state->group); 61901e04c3fSmrg brw_inst_set_compression(devinfo, insn, state->compressed); 62001e04c3fSmrg brw_inst_set_access_mode(devinfo, insn, state->access_mode); 62101e04c3fSmrg brw_inst_set_mask_control(devinfo, insn, state->mask_control); 6227ec681f3Smrg if (devinfo->ver >= 12) 6237ec681f3Smrg brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb)); 62401e04c3fSmrg brw_inst_set_saturate(devinfo, insn, state->saturate); 62501e04c3fSmrg brw_inst_set_pred_control(devinfo, insn, state->predicate); 62601e04c3fSmrg brw_inst_set_pred_inv(devinfo, insn, state->pred_inv); 62701e04c3fSmrg 62801e04c3fSmrg if (is_3src(devinfo, brw_inst_opcode(devinfo, insn)) && 62901e04c3fSmrg state->access_mode == BRW_ALIGN_16) { 63001e04c3fSmrg brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); 6317ec681f3Smrg if (devinfo->ver >= 7) 63201e04c3fSmrg brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); 63301e04c3fSmrg } else { 63401e04c3fSmrg brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2); 6357ec681f3Smrg if (devinfo->ver >= 7) 63601e04c3fSmrg brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2); 63701e04c3fSmrg } 63801e04c3fSmrg 6397ec681f3Smrg if (devinfo->ver >= 6) 64001e04c3fSmrg brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control); 64101e04c3fSmrg} 64201e04c3fSmrg 6437ec681f3Smrgstatic brw_inst * 6447ec681f3Smrgbrw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned align) 64501e04c3fSmrg{ 6467ec681f3Smrg assert(util_is_power_of_two_or_zero(sizeof(brw_inst))); 6477ec681f3Smrg assert(util_is_power_of_two_or_zero(align)); 6487ec681f3Smrg const unsigned align_insn = MAX2(align / sizeof(brw_inst), 1); 6497ec681f3Smrg const unsigned start_insn = ALIGN(p->nr_insn, align_insn); 6507ec681f3Smrg const unsigned new_nr_insn = start_insn + nr_insn; 65101e04c3fSmrg 6527ec681f3Smrg if (p->store_size < new_nr_insn) { 6537ec681f3Smrg p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst)); 65401e04c3fSmrg p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size); 65501e04c3fSmrg } 65601e04c3fSmrg 6577ec681f3Smrg /* Memset any padding due to alignment to 0. We don't want to be hashing 6587ec681f3Smrg * or caching a bunch of random bits we got from a memory allocation. 6597ec681f3Smrg */ 6607ec681f3Smrg if (p->nr_insn < start_insn) { 6617ec681f3Smrg memset(&p->store[p->nr_insn], 0, 6627ec681f3Smrg (start_insn - p->nr_insn) * sizeof(brw_inst)); 6637ec681f3Smrg } 6647ec681f3Smrg 6657ec681f3Smrg assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst)); 6667ec681f3Smrg p->nr_insn = new_nr_insn; 6677ec681f3Smrg p->next_insn_offset = new_nr_insn * sizeof(brw_inst); 6687ec681f3Smrg 6697ec681f3Smrg return &p->store[start_insn]; 6707ec681f3Smrg} 6717ec681f3Smrg 6727ec681f3Smrgvoid 6737ec681f3Smrgbrw_realign(struct brw_codegen *p, unsigned align) 6747ec681f3Smrg{ 6757ec681f3Smrg brw_append_insns(p, 0, align); 6767ec681f3Smrg} 6777ec681f3Smrg 6787ec681f3Smrgint 6797ec681f3Smrgbrw_append_data(struct brw_codegen *p, void *data, 6807ec681f3Smrg unsigned size, unsigned align) 6817ec681f3Smrg{ 6827ec681f3Smrg unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst)); 6837ec681f3Smrg void *dst = brw_append_insns(p, nr_insn, align); 6847ec681f3Smrg memcpy(dst, data, size); 6857ec681f3Smrg 6867ec681f3Smrg /* If it's not a whole number of instructions, memset the end */ 6877ec681f3Smrg if (size < nr_insn * sizeof(brw_inst)) 6887ec681f3Smrg memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size); 6897ec681f3Smrg 6907ec681f3Smrg return dst - (void *)p->store; 6917ec681f3Smrg} 6927ec681f3Smrg 6937ec681f3Smrg#define next_insn brw_next_insn 6947ec681f3Smrgbrw_inst * 6957ec681f3Smrgbrw_next_insn(struct brw_codegen *p, unsigned opcode) 6967ec681f3Smrg{ 6977ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 6987ec681f3Smrg brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst)); 69901e04c3fSmrg 70001e04c3fSmrg memset(insn, 0, sizeof(*insn)); 70101e04c3fSmrg brw_inst_set_opcode(devinfo, insn, opcode); 70201e04c3fSmrg 70301e04c3fSmrg /* Apply the default instruction state */ 70401e04c3fSmrg brw_inst_set_state(devinfo, insn, p->current); 70501e04c3fSmrg 70601e04c3fSmrg return insn; 70701e04c3fSmrg} 70801e04c3fSmrg 7097ec681f3Smrgvoid 7107ec681f3Smrgbrw_add_reloc(struct brw_codegen *p, uint32_t id, 7117ec681f3Smrg enum brw_shader_reloc_type type, 7127ec681f3Smrg uint32_t offset, uint32_t delta) 7137ec681f3Smrg{ 7147ec681f3Smrg if (p->num_relocs + 1 > p->reloc_array_size) { 7157ec681f3Smrg p->reloc_array_size = MAX2(16, p->reloc_array_size * 2); 7167ec681f3Smrg p->relocs = reralloc(p->mem_ctx, p->relocs, 7177ec681f3Smrg struct brw_shader_reloc, p->reloc_array_size); 7187ec681f3Smrg } 7197ec681f3Smrg 7207ec681f3Smrg p->relocs[p->num_relocs++] = (struct brw_shader_reloc) { 7217ec681f3Smrg .id = id, 7227ec681f3Smrg .type = type, 7237ec681f3Smrg .offset = offset, 7247ec681f3Smrg .delta = delta, 7257ec681f3Smrg }; 7267ec681f3Smrg} 7277ec681f3Smrg 72801e04c3fSmrgstatic brw_inst * 72901e04c3fSmrgbrw_alu1(struct brw_codegen *p, unsigned opcode, 73001e04c3fSmrg struct brw_reg dest, struct brw_reg src) 73101e04c3fSmrg{ 73201e04c3fSmrg brw_inst *insn = next_insn(p, opcode); 73301e04c3fSmrg brw_set_dest(p, insn, dest); 73401e04c3fSmrg brw_set_src0(p, insn, src); 73501e04c3fSmrg return insn; 73601e04c3fSmrg} 73701e04c3fSmrg 73801e04c3fSmrgstatic brw_inst * 73901e04c3fSmrgbrw_alu2(struct brw_codegen *p, unsigned opcode, 74001e04c3fSmrg struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) 74101e04c3fSmrg{ 74201e04c3fSmrg /* 64-bit immediates are only supported on 1-src instructions */ 74301e04c3fSmrg assert(src0.file != BRW_IMMEDIATE_VALUE || type_sz(src0.type) <= 4); 74401e04c3fSmrg assert(src1.file != BRW_IMMEDIATE_VALUE || type_sz(src1.type) <= 4); 74501e04c3fSmrg 74601e04c3fSmrg brw_inst *insn = next_insn(p, opcode); 74701e04c3fSmrg brw_set_dest(p, insn, dest); 74801e04c3fSmrg brw_set_src0(p, insn, src0); 74901e04c3fSmrg brw_set_src1(p, insn, src1); 75001e04c3fSmrg return insn; 75101e04c3fSmrg} 75201e04c3fSmrg 75301e04c3fSmrgstatic int 75401e04c3fSmrgget_3src_subreg_nr(struct brw_reg reg) 75501e04c3fSmrg{ 75601e04c3fSmrg /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions 75701e04c3fSmrg * use 32-bit units (components 0..7). Since they only support F/D/UD 75801e04c3fSmrg * types, this doesn't lose any flexibility, but uses fewer bits. 75901e04c3fSmrg */ 76001e04c3fSmrg return reg.subnr / 4; 76101e04c3fSmrg} 76201e04c3fSmrg 7637ec681f3Smrgstatic enum gfx10_align1_3src_vertical_stride 7647ec681f3Smrgto_3src_align1_vstride(const struct intel_device_info *devinfo, 7657ec681f3Smrg enum brw_vertical_stride vstride) 76601e04c3fSmrg{ 76701e04c3fSmrg switch (vstride) { 76801e04c3fSmrg case BRW_VERTICAL_STRIDE_0: 76901e04c3fSmrg return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0; 7707ec681f3Smrg case BRW_VERTICAL_STRIDE_1: 7717ec681f3Smrg assert(devinfo->ver >= 12); 7727ec681f3Smrg return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1; 77301e04c3fSmrg case BRW_VERTICAL_STRIDE_2: 7747ec681f3Smrg assert(devinfo->ver < 12); 77501e04c3fSmrg return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2; 77601e04c3fSmrg case BRW_VERTICAL_STRIDE_4: 77701e04c3fSmrg return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4; 77801e04c3fSmrg case BRW_VERTICAL_STRIDE_8: 77901e04c3fSmrg case BRW_VERTICAL_STRIDE_16: 78001e04c3fSmrg return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8; 78101e04c3fSmrg default: 78201e04c3fSmrg unreachable("invalid vstride"); 78301e04c3fSmrg } 78401e04c3fSmrg} 78501e04c3fSmrg 78601e04c3fSmrg 7877ec681f3Smrgstatic enum gfx10_align1_3src_src_horizontal_stride 78801e04c3fSmrgto_3src_align1_hstride(enum brw_horizontal_stride hstride) 78901e04c3fSmrg{ 79001e04c3fSmrg switch (hstride) { 79101e04c3fSmrg case BRW_HORIZONTAL_STRIDE_0: 79201e04c3fSmrg return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0; 79301e04c3fSmrg case BRW_HORIZONTAL_STRIDE_1: 79401e04c3fSmrg return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1; 79501e04c3fSmrg case BRW_HORIZONTAL_STRIDE_2: 79601e04c3fSmrg return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2; 79701e04c3fSmrg case BRW_HORIZONTAL_STRIDE_4: 79801e04c3fSmrg return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4; 79901e04c3fSmrg default: 80001e04c3fSmrg unreachable("invalid hstride"); 80101e04c3fSmrg } 80201e04c3fSmrg} 80301e04c3fSmrg 80401e04c3fSmrgstatic brw_inst * 80501e04c3fSmrgbrw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, 80601e04c3fSmrg struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) 80701e04c3fSmrg{ 8087ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 80901e04c3fSmrg brw_inst *inst = next_insn(p, opcode); 81001e04c3fSmrg 8117ec681f3Smrg gfx7_convert_mrf_to_grf(p, &dest); 81201e04c3fSmrg 81301e04c3fSmrg assert(dest.nr < 128); 8147ec681f3Smrg 8157ec681f3Smrg if (devinfo->ver >= 10) 8167ec681f3Smrg assert(!(src0.file == BRW_IMMEDIATE_VALUE && 8177ec681f3Smrg src2.file == BRW_IMMEDIATE_VALUE)); 8187ec681f3Smrg 8199f464c52Smaya assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128); 8209f464c52Smaya assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128); 8219f464c52Smaya assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128); 82201e04c3fSmrg assert(dest.address_mode == BRW_ADDRESS_DIRECT); 82301e04c3fSmrg assert(src0.address_mode == BRW_ADDRESS_DIRECT); 82401e04c3fSmrg assert(src1.address_mode == BRW_ADDRESS_DIRECT); 82501e04c3fSmrg assert(src2.address_mode == BRW_ADDRESS_DIRECT); 82601e04c3fSmrg 82701e04c3fSmrg if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 82801e04c3fSmrg assert(dest.file == BRW_GENERAL_REGISTER_FILE || 82901e04c3fSmrg dest.file == BRW_ARCHITECTURE_REGISTER_FILE); 83001e04c3fSmrg 8317ec681f3Smrg if (devinfo->ver >= 12) { 8327ec681f3Smrg brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file); 83301e04c3fSmrg brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); 8347ec681f3Smrg } else { 8357ec681f3Smrg if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) { 8367ec681f3Smrg brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, 8377ec681f3Smrg BRW_ALIGN1_3SRC_ACCUMULATOR); 8387ec681f3Smrg brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); 8397ec681f3Smrg } else { 8407ec681f3Smrg brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, 8417ec681f3Smrg BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE); 8427ec681f3Smrg brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); 8437ec681f3Smrg } 84401e04c3fSmrg } 84501e04c3fSmrg brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8); 84601e04c3fSmrg 84701e04c3fSmrg brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1); 84801e04c3fSmrg 84901e04c3fSmrg if (brw_reg_type_is_floating_point(dest.type)) { 85001e04c3fSmrg brw_inst_set_3src_a1_exec_type(devinfo, inst, 85101e04c3fSmrg BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); 85201e04c3fSmrg } else { 85301e04c3fSmrg brw_inst_set_3src_a1_exec_type(devinfo, inst, 85401e04c3fSmrg BRW_ALIGN1_3SRC_EXEC_TYPE_INT); 85501e04c3fSmrg } 85601e04c3fSmrg 85701e04c3fSmrg brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type); 85801e04c3fSmrg brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type); 85901e04c3fSmrg brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type); 86001e04c3fSmrg brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type); 86101e04c3fSmrg 8627ec681f3Smrg if (src0.file == BRW_IMMEDIATE_VALUE) { 8637ec681f3Smrg brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud); 86401e04c3fSmrg } else { 8657ec681f3Smrg brw_inst_set_3src_a1_src0_vstride( 8667ec681f3Smrg devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride)); 8677ec681f3Smrg brw_inst_set_3src_a1_src0_hstride(devinfo, inst, 8687ec681f3Smrg to_3src_align1_hstride(src0.hstride)); 8697ec681f3Smrg brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr); 8707ec681f3Smrg if (src0.type == BRW_REGISTER_TYPE_NF) { 8717ec681f3Smrg brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); 8727ec681f3Smrg } else { 8737ec681f3Smrg brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); 8747ec681f3Smrg } 8757ec681f3Smrg brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); 8767ec681f3Smrg brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); 87701e04c3fSmrg } 8787ec681f3Smrg brw_inst_set_3src_a1_src1_vstride( 8797ec681f3Smrg devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride)); 8807ec681f3Smrg brw_inst_set_3src_a1_src1_hstride(devinfo, inst, 8817ec681f3Smrg to_3src_align1_hstride(src1.hstride)); 88201e04c3fSmrg 88301e04c3fSmrg brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr); 88401e04c3fSmrg if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) { 88501e04c3fSmrg brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR); 88601e04c3fSmrg } else { 88701e04c3fSmrg brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); 88801e04c3fSmrg } 88901e04c3fSmrg brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); 89001e04c3fSmrg brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); 89101e04c3fSmrg 8927ec681f3Smrg if (src2.file == BRW_IMMEDIATE_VALUE) { 8937ec681f3Smrg brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud); 8947ec681f3Smrg } else { 8957ec681f3Smrg brw_inst_set_3src_a1_src2_hstride(devinfo, inst, 8967ec681f3Smrg to_3src_align1_hstride(src2.hstride)); 8977ec681f3Smrg /* no vstride on src2 */ 8987ec681f3Smrg brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr); 8997ec681f3Smrg brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); 9007ec681f3Smrg brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); 9017ec681f3Smrg brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); 9027ec681f3Smrg } 90301e04c3fSmrg 90401e04c3fSmrg assert(src0.file == BRW_GENERAL_REGISTER_FILE || 90501e04c3fSmrg src0.file == BRW_IMMEDIATE_VALUE || 90601e04c3fSmrg (src0.file == BRW_ARCHITECTURE_REGISTER_FILE && 90701e04c3fSmrg src0.type == BRW_REGISTER_TYPE_NF)); 90801e04c3fSmrg assert(src1.file == BRW_GENERAL_REGISTER_FILE || 90901e04c3fSmrg src1.file == BRW_ARCHITECTURE_REGISTER_FILE); 91001e04c3fSmrg assert(src2.file == BRW_GENERAL_REGISTER_FILE || 91101e04c3fSmrg src2.file == BRW_IMMEDIATE_VALUE); 91201e04c3fSmrg 9137ec681f3Smrg if (devinfo->ver >= 12) { 9147ec681f3Smrg if (src0.file == BRW_IMMEDIATE_VALUE) { 9157ec681f3Smrg brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1); 9167ec681f3Smrg } else { 9177ec681f3Smrg brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file); 9187ec681f3Smrg } 9197ec681f3Smrg 9207ec681f3Smrg brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file); 9217ec681f3Smrg 9227ec681f3Smrg if (src2.file == BRW_IMMEDIATE_VALUE) { 9237ec681f3Smrg brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1); 9247ec681f3Smrg } else { 9257ec681f3Smrg brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file); 9267ec681f3Smrg } 9277ec681f3Smrg } else { 9287ec681f3Smrg brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, 9297ec681f3Smrg src0.file == BRW_GENERAL_REGISTER_FILE ? 9307ec681f3Smrg BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : 9317ec681f3Smrg BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); 9327ec681f3Smrg brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, 9337ec681f3Smrg src1.file == BRW_GENERAL_REGISTER_FILE ? 9347ec681f3Smrg BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : 9357ec681f3Smrg BRW_ALIGN1_3SRC_ACCUMULATOR); 9367ec681f3Smrg brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, 9377ec681f3Smrg src2.file == BRW_GENERAL_REGISTER_FILE ? 9387ec681f3Smrg BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE : 9397ec681f3Smrg BRW_ALIGN1_3SRC_IMMEDIATE_VALUE); 9407ec681f3Smrg } 9417ec681f3Smrg 94201e04c3fSmrg } else { 94301e04c3fSmrg assert(dest.file == BRW_GENERAL_REGISTER_FILE || 94401e04c3fSmrg dest.file == BRW_MESSAGE_REGISTER_FILE); 94501e04c3fSmrg assert(dest.type == BRW_REGISTER_TYPE_F || 94601e04c3fSmrg dest.type == BRW_REGISTER_TYPE_DF || 94701e04c3fSmrg dest.type == BRW_REGISTER_TYPE_D || 9489f464c52Smaya dest.type == BRW_REGISTER_TYPE_UD || 9497ec681f3Smrg (dest.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 8)); 9507ec681f3Smrg if (devinfo->ver == 6) { 95101e04c3fSmrg brw_inst_set_3src_a16_dst_reg_file(devinfo, inst, 95201e04c3fSmrg dest.file == BRW_MESSAGE_REGISTER_FILE); 95301e04c3fSmrg } 95401e04c3fSmrg brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); 9557ec681f3Smrg brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4); 95601e04c3fSmrg brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask); 95701e04c3fSmrg 95801e04c3fSmrg assert(src0.file == BRW_GENERAL_REGISTER_FILE); 95901e04c3fSmrg brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle); 96001e04c3fSmrg brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0)); 96101e04c3fSmrg brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); 96201e04c3fSmrg brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); 96301e04c3fSmrg brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); 96401e04c3fSmrg brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst, 96501e04c3fSmrg src0.vstride == BRW_VERTICAL_STRIDE_0); 96601e04c3fSmrg 96701e04c3fSmrg assert(src1.file == BRW_GENERAL_REGISTER_FILE); 96801e04c3fSmrg brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle); 96901e04c3fSmrg brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1)); 97001e04c3fSmrg brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); 97101e04c3fSmrg brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); 97201e04c3fSmrg brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); 97301e04c3fSmrg brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst, 97401e04c3fSmrg src1.vstride == BRW_VERTICAL_STRIDE_0); 97501e04c3fSmrg 97601e04c3fSmrg assert(src2.file == BRW_GENERAL_REGISTER_FILE); 97701e04c3fSmrg brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle); 97801e04c3fSmrg brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2)); 97901e04c3fSmrg brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); 98001e04c3fSmrg brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); 98101e04c3fSmrg brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); 98201e04c3fSmrg brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst, 98301e04c3fSmrg src2.vstride == BRW_VERTICAL_STRIDE_0); 98401e04c3fSmrg 9857ec681f3Smrg if (devinfo->ver >= 7) { 98601e04c3fSmrg /* Set both the source and destination types based on dest.type, 98701e04c3fSmrg * ignoring the source register types. The MAD and LRP emitters ensure 98801e04c3fSmrg * that all four types are float. The BFE and BFI2 emitters, however, 98901e04c3fSmrg * may send us mixed D and UD types and want us to ignore that and use 99001e04c3fSmrg * the destination type. 99101e04c3fSmrg */ 99201e04c3fSmrg brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type); 99301e04c3fSmrg brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type); 9949f464c52Smaya 9959f464c52Smaya /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType: 9969f464c52Smaya * 9979f464c52Smaya * "Three source instructions can use operands with mixed-mode 9989f464c52Smaya * precision. When SrcType field is set to :f or :hf it defines 9999f464c52Smaya * precision for source 0 only, and fields Src1Type and Src2Type 10009f464c52Smaya * define precision for other source operands: 10019f464c52Smaya * 10029f464c52Smaya * 0b = :f. Single precision Float (32-bit). 10039f464c52Smaya * 1b = :hf. Half precision Float (16-bit)." 10049f464c52Smaya */ 10059f464c52Smaya if (src1.type == BRW_REGISTER_TYPE_HF) 10069f464c52Smaya brw_inst_set_3src_a16_src1_type(devinfo, inst, 1); 10079f464c52Smaya 10089f464c52Smaya if (src2.type == BRW_REGISTER_TYPE_HF) 10099f464c52Smaya brw_inst_set_3src_a16_src2_type(devinfo, inst, 1); 101001e04c3fSmrg } 101101e04c3fSmrg } 101201e04c3fSmrg 101301e04c3fSmrg return inst; 101401e04c3fSmrg} 101501e04c3fSmrg 101601e04c3fSmrg 101701e04c3fSmrg/*********************************************************************** 101801e04c3fSmrg * Convenience routines. 101901e04c3fSmrg */ 102001e04c3fSmrg#define ALU1(OP) \ 102101e04c3fSmrgbrw_inst *brw_##OP(struct brw_codegen *p, \ 102201e04c3fSmrg struct brw_reg dest, \ 102301e04c3fSmrg struct brw_reg src0) \ 102401e04c3fSmrg{ \ 102501e04c3fSmrg return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 102601e04c3fSmrg} 102701e04c3fSmrg 102801e04c3fSmrg#define ALU2(OP) \ 102901e04c3fSmrgbrw_inst *brw_##OP(struct brw_codegen *p, \ 103001e04c3fSmrg struct brw_reg dest, \ 103101e04c3fSmrg struct brw_reg src0, \ 103201e04c3fSmrg struct brw_reg src1) \ 103301e04c3fSmrg{ \ 103401e04c3fSmrg return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 103501e04c3fSmrg} 103601e04c3fSmrg 103701e04c3fSmrg#define ALU3(OP) \ 103801e04c3fSmrgbrw_inst *brw_##OP(struct brw_codegen *p, \ 103901e04c3fSmrg struct brw_reg dest, \ 104001e04c3fSmrg struct brw_reg src0, \ 104101e04c3fSmrg struct brw_reg src1, \ 104201e04c3fSmrg struct brw_reg src2) \ 10439f464c52Smaya{ \ 10449f464c52Smaya if (p->current->access_mode == BRW_ALIGN_16) { \ 10459f464c52Smaya if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ 10469f464c52Smaya src0.swizzle = BRW_SWIZZLE_XXXX; \ 10479f464c52Smaya if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ 10489f464c52Smaya src1.swizzle = BRW_SWIZZLE_XXXX; \ 10499f464c52Smaya if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ 10509f464c52Smaya src2.swizzle = BRW_SWIZZLE_XXXX; \ 10519f464c52Smaya } \ 105201e04c3fSmrg return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 105301e04c3fSmrg} 105401e04c3fSmrg 105501e04c3fSmrg#define ALU3F(OP) \ 105601e04c3fSmrgbrw_inst *brw_##OP(struct brw_codegen *p, \ 105701e04c3fSmrg struct brw_reg dest, \ 105801e04c3fSmrg struct brw_reg src0, \ 105901e04c3fSmrg struct brw_reg src1, \ 106001e04c3fSmrg struct brw_reg src2) \ 106101e04c3fSmrg{ \ 106201e04c3fSmrg assert(dest.type == BRW_REGISTER_TYPE_F || \ 106301e04c3fSmrg dest.type == BRW_REGISTER_TYPE_DF); \ 106401e04c3fSmrg if (dest.type == BRW_REGISTER_TYPE_F) { \ 106501e04c3fSmrg assert(src0.type == BRW_REGISTER_TYPE_F); \ 106601e04c3fSmrg assert(src1.type == BRW_REGISTER_TYPE_F); \ 106701e04c3fSmrg assert(src2.type == BRW_REGISTER_TYPE_F); \ 106801e04c3fSmrg } else if (dest.type == BRW_REGISTER_TYPE_DF) { \ 106901e04c3fSmrg assert(src0.type == BRW_REGISTER_TYPE_DF); \ 107001e04c3fSmrg assert(src1.type == BRW_REGISTER_TYPE_DF); \ 107101e04c3fSmrg assert(src2.type == BRW_REGISTER_TYPE_DF); \ 10729f464c52Smaya } \ 10739f464c52Smaya \ 10749f464c52Smaya if (p->current->access_mode == BRW_ALIGN_16) { \ 10759f464c52Smaya if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ 10769f464c52Smaya src0.swizzle = BRW_SWIZZLE_XXXX; \ 10779f464c52Smaya if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ 10789f464c52Smaya src1.swizzle = BRW_SWIZZLE_XXXX; \ 10799f464c52Smaya if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ 10809f464c52Smaya src2.swizzle = BRW_SWIZZLE_XXXX; \ 108101e04c3fSmrg } \ 108201e04c3fSmrg return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 108301e04c3fSmrg} 108401e04c3fSmrg 108501e04c3fSmrgALU2(SEL) 108601e04c3fSmrgALU1(NOT) 108701e04c3fSmrgALU2(AND) 108801e04c3fSmrgALU2(OR) 108901e04c3fSmrgALU2(XOR) 109001e04c3fSmrgALU2(SHR) 109101e04c3fSmrgALU2(SHL) 109201e04c3fSmrgALU1(DIM) 109301e04c3fSmrgALU2(ASR) 10947ec681f3SmrgALU2(ROL) 10957ec681f3SmrgALU2(ROR) 109601e04c3fSmrgALU3(CSEL) 109701e04c3fSmrgALU1(FRC) 109801e04c3fSmrgALU1(RNDD) 10997ec681f3SmrgALU1(RNDE) 11007ec681f3SmrgALU1(RNDU) 11017ec681f3SmrgALU1(RNDZ) 110201e04c3fSmrgALU2(MAC) 110301e04c3fSmrgALU2(MACH) 110401e04c3fSmrgALU1(LZD) 110501e04c3fSmrgALU2(DP4) 110601e04c3fSmrgALU2(DPH) 110701e04c3fSmrgALU2(DP3) 110801e04c3fSmrgALU2(DP2) 11097ec681f3SmrgALU3(DP4A) 111001e04c3fSmrgALU3(MAD) 111101e04c3fSmrgALU3F(LRP) 111201e04c3fSmrgALU1(BFREV) 111301e04c3fSmrgALU3(BFE) 111401e04c3fSmrgALU2(BFI1) 111501e04c3fSmrgALU3(BFI2) 111601e04c3fSmrgALU1(FBH) 111701e04c3fSmrgALU1(FBL) 111801e04c3fSmrgALU1(CBIT) 111901e04c3fSmrgALU2(ADDC) 112001e04c3fSmrgALU2(SUBB) 11217ec681f3SmrgALU3(ADD3) 112201e04c3fSmrg 112301e04c3fSmrgbrw_inst * 112401e04c3fSmrgbrw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) 112501e04c3fSmrg{ 11267ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 112701e04c3fSmrg 112801e04c3fSmrg /* When converting F->DF on IVB/BYT, every odd source channel is ignored. 112901e04c3fSmrg * To avoid the problems that causes, we use an <X,2,0> source region to 113001e04c3fSmrg * read each element twice. 113101e04c3fSmrg */ 11327ec681f3Smrg if (devinfo->verx10 == 70 && 113301e04c3fSmrg brw_get_default_access_mode(p) == BRW_ALIGN_1 && 113401e04c3fSmrg dest.type == BRW_REGISTER_TYPE_DF && 113501e04c3fSmrg (src0.type == BRW_REGISTER_TYPE_F || 113601e04c3fSmrg src0.type == BRW_REGISTER_TYPE_D || 113701e04c3fSmrg src0.type == BRW_REGISTER_TYPE_UD) && 113801e04c3fSmrg !has_scalar_region(src0)) { 113901e04c3fSmrg assert(src0.vstride == src0.width + src0.hstride); 114001e04c3fSmrg src0.vstride = src0.hstride; 114101e04c3fSmrg src0.width = BRW_WIDTH_2; 114201e04c3fSmrg src0.hstride = BRW_HORIZONTAL_STRIDE_0; 114301e04c3fSmrg } 114401e04c3fSmrg 114501e04c3fSmrg return brw_alu1(p, BRW_OPCODE_MOV, dest, src0); 114601e04c3fSmrg} 114701e04c3fSmrg 114801e04c3fSmrgbrw_inst * 114901e04c3fSmrgbrw_ADD(struct brw_codegen *p, struct brw_reg dest, 115001e04c3fSmrg struct brw_reg src0, struct brw_reg src1) 115101e04c3fSmrg{ 115201e04c3fSmrg /* 6.2.2: add */ 115301e04c3fSmrg if (src0.type == BRW_REGISTER_TYPE_F || 115401e04c3fSmrg (src0.file == BRW_IMMEDIATE_VALUE && 115501e04c3fSmrg src0.type == BRW_REGISTER_TYPE_VF)) { 115601e04c3fSmrg assert(src1.type != BRW_REGISTER_TYPE_UD); 115701e04c3fSmrg assert(src1.type != BRW_REGISTER_TYPE_D); 115801e04c3fSmrg } 115901e04c3fSmrg 116001e04c3fSmrg if (src1.type == BRW_REGISTER_TYPE_F || 116101e04c3fSmrg (src1.file == BRW_IMMEDIATE_VALUE && 116201e04c3fSmrg src1.type == BRW_REGISTER_TYPE_VF)) { 116301e04c3fSmrg assert(src0.type != BRW_REGISTER_TYPE_UD); 116401e04c3fSmrg assert(src0.type != BRW_REGISTER_TYPE_D); 116501e04c3fSmrg } 116601e04c3fSmrg 116701e04c3fSmrg return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 116801e04c3fSmrg} 116901e04c3fSmrg 117001e04c3fSmrgbrw_inst * 117101e04c3fSmrgbrw_AVG(struct brw_codegen *p, struct brw_reg dest, 117201e04c3fSmrg struct brw_reg src0, struct brw_reg src1) 117301e04c3fSmrg{ 117401e04c3fSmrg assert(dest.type == src0.type); 117501e04c3fSmrg assert(src0.type == src1.type); 117601e04c3fSmrg switch (src0.type) { 117701e04c3fSmrg case BRW_REGISTER_TYPE_B: 117801e04c3fSmrg case BRW_REGISTER_TYPE_UB: 117901e04c3fSmrg case BRW_REGISTER_TYPE_W: 118001e04c3fSmrg case BRW_REGISTER_TYPE_UW: 118101e04c3fSmrg case BRW_REGISTER_TYPE_D: 118201e04c3fSmrg case BRW_REGISTER_TYPE_UD: 118301e04c3fSmrg break; 118401e04c3fSmrg default: 118501e04c3fSmrg unreachable("Bad type for brw_AVG"); 118601e04c3fSmrg } 118701e04c3fSmrg 118801e04c3fSmrg return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1); 118901e04c3fSmrg} 119001e04c3fSmrg 119101e04c3fSmrgbrw_inst * 119201e04c3fSmrgbrw_MUL(struct brw_codegen *p, struct brw_reg dest, 119301e04c3fSmrg struct brw_reg src0, struct brw_reg src1) 119401e04c3fSmrg{ 119501e04c3fSmrg /* 6.32.38: mul */ 119601e04c3fSmrg if (src0.type == BRW_REGISTER_TYPE_D || 119701e04c3fSmrg src0.type == BRW_REGISTER_TYPE_UD || 119801e04c3fSmrg src1.type == BRW_REGISTER_TYPE_D || 119901e04c3fSmrg src1.type == BRW_REGISTER_TYPE_UD) { 120001e04c3fSmrg assert(dest.type != BRW_REGISTER_TYPE_F); 120101e04c3fSmrg } 120201e04c3fSmrg 120301e04c3fSmrg if (src0.type == BRW_REGISTER_TYPE_F || 120401e04c3fSmrg (src0.file == BRW_IMMEDIATE_VALUE && 120501e04c3fSmrg src0.type == BRW_REGISTER_TYPE_VF)) { 120601e04c3fSmrg assert(src1.type != BRW_REGISTER_TYPE_UD); 120701e04c3fSmrg assert(src1.type != BRW_REGISTER_TYPE_D); 120801e04c3fSmrg } 120901e04c3fSmrg 121001e04c3fSmrg if (src1.type == BRW_REGISTER_TYPE_F || 121101e04c3fSmrg (src1.file == BRW_IMMEDIATE_VALUE && 121201e04c3fSmrg src1.type == BRW_REGISTER_TYPE_VF)) { 121301e04c3fSmrg assert(src0.type != BRW_REGISTER_TYPE_UD); 121401e04c3fSmrg assert(src0.type != BRW_REGISTER_TYPE_D); 121501e04c3fSmrg } 121601e04c3fSmrg 121701e04c3fSmrg assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 121801e04c3fSmrg src0.nr != BRW_ARF_ACCUMULATOR); 121901e04c3fSmrg assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 122001e04c3fSmrg src1.nr != BRW_ARF_ACCUMULATOR); 122101e04c3fSmrg 122201e04c3fSmrg return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 122301e04c3fSmrg} 122401e04c3fSmrg 122501e04c3fSmrgbrw_inst * 122601e04c3fSmrgbrw_LINE(struct brw_codegen *p, struct brw_reg dest, 122701e04c3fSmrg struct brw_reg src0, struct brw_reg src1) 122801e04c3fSmrg{ 122901e04c3fSmrg src0.vstride = BRW_VERTICAL_STRIDE_0; 123001e04c3fSmrg src0.width = BRW_WIDTH_1; 123101e04c3fSmrg src0.hstride = BRW_HORIZONTAL_STRIDE_0; 123201e04c3fSmrg return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1); 123301e04c3fSmrg} 123401e04c3fSmrg 123501e04c3fSmrgbrw_inst * 123601e04c3fSmrgbrw_PLN(struct brw_codegen *p, struct brw_reg dest, 123701e04c3fSmrg struct brw_reg src0, struct brw_reg src1) 123801e04c3fSmrg{ 123901e04c3fSmrg src0.vstride = BRW_VERTICAL_STRIDE_0; 124001e04c3fSmrg src0.width = BRW_WIDTH_1; 124101e04c3fSmrg src0.hstride = BRW_HORIZONTAL_STRIDE_0; 124201e04c3fSmrg src1.vstride = BRW_VERTICAL_STRIDE_8; 124301e04c3fSmrg src1.width = BRW_WIDTH_8; 124401e04c3fSmrg src1.hstride = BRW_HORIZONTAL_STRIDE_1; 124501e04c3fSmrg return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1); 124601e04c3fSmrg} 124701e04c3fSmrg 124801e04c3fSmrgbrw_inst * 124901e04c3fSmrgbrw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) 125001e04c3fSmrg{ 12517ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 125201e04c3fSmrg const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16; 125301e04c3fSmrg /* The F32TO16 instruction doesn't support 32-bit destination types in 12547ec681f3Smrg * Align1 mode, and neither does the Gfx8 implementation in terms of a 12557ec681f3Smrg * converting MOV. Gfx7 does zero out the high 16 bits in Align16 mode as 125601e04c3fSmrg * an undocumented feature. 125701e04c3fSmrg */ 125801e04c3fSmrg const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD && 12597ec681f3Smrg (!align16 || devinfo->ver >= 8)); 126001e04c3fSmrg brw_inst *inst; 126101e04c3fSmrg 126201e04c3fSmrg if (align16) { 126301e04c3fSmrg assert(dst.type == BRW_REGISTER_TYPE_UD); 126401e04c3fSmrg } else { 126501e04c3fSmrg assert(dst.type == BRW_REGISTER_TYPE_UD || 126601e04c3fSmrg dst.type == BRW_REGISTER_TYPE_W || 126701e04c3fSmrg dst.type == BRW_REGISTER_TYPE_UW || 126801e04c3fSmrg dst.type == BRW_REGISTER_TYPE_HF); 126901e04c3fSmrg } 127001e04c3fSmrg 127101e04c3fSmrg brw_push_insn_state(p); 127201e04c3fSmrg 127301e04c3fSmrg if (needs_zero_fill) { 127401e04c3fSmrg brw_set_default_access_mode(p, BRW_ALIGN_1); 127501e04c3fSmrg dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2); 127601e04c3fSmrg } 127701e04c3fSmrg 12787ec681f3Smrg if (devinfo->ver >= 8) { 127901e04c3fSmrg inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); 128001e04c3fSmrg } else { 12817ec681f3Smrg assert(devinfo->ver == 7); 128201e04c3fSmrg inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); 128301e04c3fSmrg } 128401e04c3fSmrg 128501e04c3fSmrg if (needs_zero_fill) { 12867ec681f3Smrg if (devinfo->ver < 12) 12877ec681f3Smrg brw_inst_set_no_dd_clear(devinfo, inst, true); 12887ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_null()); 128901e04c3fSmrg inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0)); 12907ec681f3Smrg if (devinfo->ver < 12) 12917ec681f3Smrg brw_inst_set_no_dd_check(devinfo, inst, true); 129201e04c3fSmrg } 129301e04c3fSmrg 129401e04c3fSmrg brw_pop_insn_state(p); 129501e04c3fSmrg return inst; 129601e04c3fSmrg} 129701e04c3fSmrg 129801e04c3fSmrgbrw_inst * 129901e04c3fSmrgbrw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) 130001e04c3fSmrg{ 13017ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 130201e04c3fSmrg bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16; 130301e04c3fSmrg 130401e04c3fSmrg if (align16) { 130501e04c3fSmrg assert(src.type == BRW_REGISTER_TYPE_UD); 130601e04c3fSmrg } else { 130701e04c3fSmrg /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32: 130801e04c3fSmrg * 130901e04c3fSmrg * Because this instruction does not have a 16-bit floating-point 131001e04c3fSmrg * type, the source data type must be Word (W). The destination type 131101e04c3fSmrg * must be F (Float). 131201e04c3fSmrg */ 131301e04c3fSmrg if (src.type == BRW_REGISTER_TYPE_UD) 131401e04c3fSmrg src = spread(retype(src, BRW_REGISTER_TYPE_W), 2); 131501e04c3fSmrg 131601e04c3fSmrg assert(src.type == BRW_REGISTER_TYPE_W || 131701e04c3fSmrg src.type == BRW_REGISTER_TYPE_UW || 131801e04c3fSmrg src.type == BRW_REGISTER_TYPE_HF); 131901e04c3fSmrg } 132001e04c3fSmrg 13217ec681f3Smrg if (devinfo->ver >= 8) { 132201e04c3fSmrg return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF)); 132301e04c3fSmrg } else { 13247ec681f3Smrg assert(devinfo->ver == 7); 132501e04c3fSmrg return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src); 132601e04c3fSmrg } 132701e04c3fSmrg} 132801e04c3fSmrg 132901e04c3fSmrg 133001e04c3fSmrgvoid brw_NOP(struct brw_codegen *p) 133101e04c3fSmrg{ 133201e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_NOP); 133301e04c3fSmrg memset(insn, 0, sizeof(*insn)); 133401e04c3fSmrg brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP); 133501e04c3fSmrg} 133601e04c3fSmrg 13377ec681f3Smrgvoid brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func) 13387ec681f3Smrg{ 13397ec681f3Smrg brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC); 13407ec681f3Smrg brw_inst_set_cond_modifier(p->devinfo, insn, func); 13417ec681f3Smrg} 134201e04c3fSmrg 134301e04c3fSmrg/*********************************************************************** 134401e04c3fSmrg * Comparisons, if/else/endif 134501e04c3fSmrg */ 134601e04c3fSmrg 134701e04c3fSmrgbrw_inst * 134801e04c3fSmrgbrw_JMPI(struct brw_codegen *p, struct brw_reg index, 134901e04c3fSmrg unsigned predicate_control) 135001e04c3fSmrg{ 13517ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 135201e04c3fSmrg struct brw_reg ip = brw_ip_reg(); 135301e04c3fSmrg brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index); 135401e04c3fSmrg 135501e04c3fSmrg brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1); 135601e04c3fSmrg brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); 135701e04c3fSmrg brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); 135801e04c3fSmrg brw_inst_set_pred_control(devinfo, inst, predicate_control); 135901e04c3fSmrg 136001e04c3fSmrg return inst; 136101e04c3fSmrg} 136201e04c3fSmrg 136301e04c3fSmrgstatic void 136401e04c3fSmrgpush_if_stack(struct brw_codegen *p, brw_inst *inst) 136501e04c3fSmrg{ 136601e04c3fSmrg p->if_stack[p->if_stack_depth] = inst - p->store; 136701e04c3fSmrg 136801e04c3fSmrg p->if_stack_depth++; 136901e04c3fSmrg if (p->if_stack_array_size <= p->if_stack_depth) { 137001e04c3fSmrg p->if_stack_array_size *= 2; 137101e04c3fSmrg p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, 137201e04c3fSmrg p->if_stack_array_size); 137301e04c3fSmrg } 137401e04c3fSmrg} 137501e04c3fSmrg 137601e04c3fSmrgstatic brw_inst * 137701e04c3fSmrgpop_if_stack(struct brw_codegen *p) 137801e04c3fSmrg{ 137901e04c3fSmrg p->if_stack_depth--; 138001e04c3fSmrg return &p->store[p->if_stack[p->if_stack_depth]]; 138101e04c3fSmrg} 138201e04c3fSmrg 138301e04c3fSmrgstatic void 138401e04c3fSmrgpush_loop_stack(struct brw_codegen *p, brw_inst *inst) 138501e04c3fSmrg{ 138601e04c3fSmrg if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) { 138701e04c3fSmrg p->loop_stack_array_size *= 2; 138801e04c3fSmrg p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, 138901e04c3fSmrg p->loop_stack_array_size); 139001e04c3fSmrg p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, 139101e04c3fSmrg p->loop_stack_array_size); 139201e04c3fSmrg } 139301e04c3fSmrg 139401e04c3fSmrg p->loop_stack[p->loop_stack_depth] = inst - p->store; 139501e04c3fSmrg p->loop_stack_depth++; 139601e04c3fSmrg p->if_depth_in_loop[p->loop_stack_depth] = 0; 139701e04c3fSmrg} 139801e04c3fSmrg 139901e04c3fSmrgstatic brw_inst * 140001e04c3fSmrgget_inner_do_insn(struct brw_codegen *p) 140101e04c3fSmrg{ 140201e04c3fSmrg return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; 140301e04c3fSmrg} 140401e04c3fSmrg 140501e04c3fSmrg/* EU takes the value from the flag register and pushes it onto some 140601e04c3fSmrg * sort of a stack (presumably merging with any flag value already on 140701e04c3fSmrg * the stack). Within an if block, the flags at the top of the stack 140801e04c3fSmrg * control execution on each channel of the unit, eg. on each of the 140901e04c3fSmrg * 16 pixel values in our wm programs. 141001e04c3fSmrg * 141101e04c3fSmrg * When the matching 'else' instruction is reached (presumably by 141201e04c3fSmrg * countdown of the instruction count patched in by our ELSE/ENDIF 141301e04c3fSmrg * functions), the relevant flags are inverted. 141401e04c3fSmrg * 141501e04c3fSmrg * When the matching 'endif' instruction is reached, the flags are 141601e04c3fSmrg * popped off. If the stack is now empty, normal execution resumes. 141701e04c3fSmrg */ 141801e04c3fSmrgbrw_inst * 141901e04c3fSmrgbrw_IF(struct brw_codegen *p, unsigned execute_size) 142001e04c3fSmrg{ 14217ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 142201e04c3fSmrg brw_inst *insn; 142301e04c3fSmrg 142401e04c3fSmrg insn = next_insn(p, BRW_OPCODE_IF); 142501e04c3fSmrg 142601e04c3fSmrg /* Override the defaults for this instruction: 142701e04c3fSmrg */ 14287ec681f3Smrg if (devinfo->ver < 6) { 142901e04c3fSmrg brw_set_dest(p, insn, brw_ip_reg()); 143001e04c3fSmrg brw_set_src0(p, insn, brw_ip_reg()); 143101e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 14327ec681f3Smrg } else if (devinfo->ver == 6) { 143301e04c3fSmrg brw_set_dest(p, insn, brw_imm_w(0)); 14347ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, insn, 0); 143501e04c3fSmrg brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 143601e04c3fSmrg brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 14377ec681f3Smrg } else if (devinfo->ver == 7) { 143801e04c3fSmrg brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 143901e04c3fSmrg brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 144001e04c3fSmrg brw_set_src1(p, insn, brw_imm_w(0)); 144101e04c3fSmrg brw_inst_set_jip(devinfo, insn, 0); 144201e04c3fSmrg brw_inst_set_uip(devinfo, insn, 0); 144301e04c3fSmrg } else { 144401e04c3fSmrg brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 14457ec681f3Smrg if (devinfo->ver < 12) 14467ec681f3Smrg brw_set_src0(p, insn, brw_imm_d(0)); 144701e04c3fSmrg brw_inst_set_jip(devinfo, insn, 0); 144801e04c3fSmrg brw_inst_set_uip(devinfo, insn, 0); 144901e04c3fSmrg } 145001e04c3fSmrg 145101e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, execute_size); 145201e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 145301e04c3fSmrg brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL); 145401e04c3fSmrg brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); 14557ec681f3Smrg if (!p->single_program_flow && devinfo->ver < 6) 145601e04c3fSmrg brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); 145701e04c3fSmrg 145801e04c3fSmrg push_if_stack(p, insn); 145901e04c3fSmrg p->if_depth_in_loop[p->loop_stack_depth]++; 146001e04c3fSmrg return insn; 146101e04c3fSmrg} 146201e04c3fSmrg 14637ec681f3Smrg/* This function is only used for gfx6-style IF instructions with an 14647ec681f3Smrg * embedded comparison (conditional modifier). It is not used on gfx7. 146501e04c3fSmrg */ 146601e04c3fSmrgbrw_inst * 14677ec681f3Smrggfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, 146801e04c3fSmrg struct brw_reg src0, struct brw_reg src1) 146901e04c3fSmrg{ 14707ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 147101e04c3fSmrg brw_inst *insn; 147201e04c3fSmrg 147301e04c3fSmrg insn = next_insn(p, BRW_OPCODE_IF); 147401e04c3fSmrg 147501e04c3fSmrg brw_set_dest(p, insn, brw_imm_w(0)); 147601e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); 14777ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, insn, 0); 147801e04c3fSmrg brw_set_src0(p, insn, src0); 147901e04c3fSmrg brw_set_src1(p, insn, src1); 148001e04c3fSmrg 148101e04c3fSmrg assert(brw_inst_qtr_control(devinfo, insn) == BRW_COMPRESSION_NONE); 148201e04c3fSmrg assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); 148301e04c3fSmrg brw_inst_set_cond_modifier(devinfo, insn, conditional); 148401e04c3fSmrg 148501e04c3fSmrg push_if_stack(p, insn); 148601e04c3fSmrg return insn; 148701e04c3fSmrg} 148801e04c3fSmrg 148901e04c3fSmrg/** 149001e04c3fSmrg * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 149101e04c3fSmrg */ 149201e04c3fSmrgstatic void 149301e04c3fSmrgconvert_IF_ELSE_to_ADD(struct brw_codegen *p, 149401e04c3fSmrg brw_inst *if_inst, brw_inst *else_inst) 149501e04c3fSmrg{ 14967ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 149701e04c3fSmrg 149801e04c3fSmrg /* The next instruction (where the ENDIF would be, if it existed) */ 149901e04c3fSmrg brw_inst *next_inst = &p->store[p->nr_insn]; 150001e04c3fSmrg 150101e04c3fSmrg assert(p->single_program_flow); 150201e04c3fSmrg assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF); 150301e04c3fSmrg assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE); 150401e04c3fSmrg assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1); 150501e04c3fSmrg 150601e04c3fSmrg /* Convert IF to an ADD instruction that moves the instruction pointer 150701e04c3fSmrg * to the first instruction of the ELSE block. If there is no ELSE 150801e04c3fSmrg * block, point to where ENDIF would be. Reverse the predicate. 150901e04c3fSmrg * 151001e04c3fSmrg * There's no need to execute an ENDIF since we don't need to do any 151101e04c3fSmrg * stack operations, and if we're currently executing, we just want to 151201e04c3fSmrg * continue normally. 151301e04c3fSmrg */ 151401e04c3fSmrg brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_ADD); 151501e04c3fSmrg brw_inst_set_pred_inv(devinfo, if_inst, true); 151601e04c3fSmrg 151701e04c3fSmrg if (else_inst != NULL) { 151801e04c3fSmrg /* Convert ELSE to an ADD instruction that points where the ENDIF 151901e04c3fSmrg * would be. 152001e04c3fSmrg */ 152101e04c3fSmrg brw_inst_set_opcode(devinfo, else_inst, BRW_OPCODE_ADD); 152201e04c3fSmrg 152301e04c3fSmrg brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16); 152401e04c3fSmrg brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16); 152501e04c3fSmrg } else { 152601e04c3fSmrg brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16); 152701e04c3fSmrg } 152801e04c3fSmrg} 152901e04c3fSmrg 153001e04c3fSmrg/** 153101e04c3fSmrg * Patch IF and ELSE instructions with appropriate jump targets. 153201e04c3fSmrg */ 153301e04c3fSmrgstatic void 153401e04c3fSmrgpatch_IF_ELSE(struct brw_codegen *p, 153501e04c3fSmrg brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst) 153601e04c3fSmrg{ 15377ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 153801e04c3fSmrg 153901e04c3fSmrg /* We shouldn't be patching IF and ELSE instructions in single program flow 154001e04c3fSmrg * mode when gen < 6, because in single program flow mode on those 154101e04c3fSmrg * platforms, we convert flow control instructions to conditional ADDs that 154201e04c3fSmrg * operate on IP (see brw_ENDIF). 154301e04c3fSmrg * 15447ec681f3Smrg * However, on Gfx6, writing to IP doesn't work in single program flow mode 154501e04c3fSmrg * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 154601e04c3fSmrg * not be updated by non-flow control instructions."). And on later 154701e04c3fSmrg * platforms, there is no significant benefit to converting control flow 154801e04c3fSmrg * instructions to conditional ADDs. So we do patch IF and ELSE 154901e04c3fSmrg * instructions in single program flow mode on those platforms. 155001e04c3fSmrg */ 15517ec681f3Smrg if (devinfo->ver < 6) 155201e04c3fSmrg assert(!p->single_program_flow); 155301e04c3fSmrg 155401e04c3fSmrg assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF); 155501e04c3fSmrg assert(endif_inst != NULL); 155601e04c3fSmrg assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE); 155701e04c3fSmrg 155801e04c3fSmrg unsigned br = brw_jump_scale(devinfo); 155901e04c3fSmrg 156001e04c3fSmrg assert(brw_inst_opcode(devinfo, endif_inst) == BRW_OPCODE_ENDIF); 156101e04c3fSmrg brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst)); 156201e04c3fSmrg 156301e04c3fSmrg if (else_inst == NULL) { 156401e04c3fSmrg /* Patch IF -> ENDIF */ 15657ec681f3Smrg if (devinfo->ver < 6) { 156601e04c3fSmrg /* Turn it into an IFF, which means no mask stack operations for 156701e04c3fSmrg * all-false and jumping past the ENDIF. 156801e04c3fSmrg */ 156901e04c3fSmrg brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_IFF); 15707ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, if_inst, 157101e04c3fSmrg br * (endif_inst - if_inst + 1)); 15727ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0); 15737ec681f3Smrg } else if (devinfo->ver == 6) { 15747ec681f3Smrg /* As of gfx6, there is no IFF and IF must point to the ENDIF. */ 15757ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst)); 157601e04c3fSmrg } else { 157701e04c3fSmrg brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); 157801e04c3fSmrg brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst)); 157901e04c3fSmrg } 158001e04c3fSmrg } else { 158101e04c3fSmrg brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst)); 158201e04c3fSmrg 158301e04c3fSmrg /* Patch IF -> ELSE */ 15847ec681f3Smrg if (devinfo->ver < 6) { 15857ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, if_inst, 158601e04c3fSmrg br * (else_inst - if_inst)); 15877ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, if_inst, 0); 15887ec681f3Smrg } else if (devinfo->ver == 6) { 15897ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, if_inst, 159001e04c3fSmrg br * (else_inst - if_inst + 1)); 159101e04c3fSmrg } 159201e04c3fSmrg 159301e04c3fSmrg /* Patch ELSE -> ENDIF */ 15947ec681f3Smrg if (devinfo->ver < 6) { 15957ec681f3Smrg /* BRW_OPCODE_ELSE pre-gfx6 should point just past the 159601e04c3fSmrg * matching ENDIF. 159701e04c3fSmrg */ 15987ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, else_inst, 159901e04c3fSmrg br * (endif_inst - else_inst + 1)); 16007ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, else_inst, 1); 16017ec681f3Smrg } else if (devinfo->ver == 6) { 16027ec681f3Smrg /* BRW_OPCODE_ELSE on gfx6 should point to the matching ENDIF. */ 16037ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, else_inst, 160401e04c3fSmrg br * (endif_inst - else_inst)); 160501e04c3fSmrg } else { 160601e04c3fSmrg /* The IF instruction's JIP should point just past the ELSE */ 160701e04c3fSmrg brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1)); 160801e04c3fSmrg /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 160901e04c3fSmrg brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); 161001e04c3fSmrg brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst)); 16117ec681f3Smrg if (devinfo->ver >= 8) { 161201e04c3fSmrg /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both 161301e04c3fSmrg * should point to ENDIF. 161401e04c3fSmrg */ 161501e04c3fSmrg brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst)); 161601e04c3fSmrg } 161701e04c3fSmrg } 161801e04c3fSmrg } 161901e04c3fSmrg} 162001e04c3fSmrg 162101e04c3fSmrgvoid 162201e04c3fSmrgbrw_ELSE(struct brw_codegen *p) 162301e04c3fSmrg{ 16247ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 162501e04c3fSmrg brw_inst *insn; 162601e04c3fSmrg 162701e04c3fSmrg insn = next_insn(p, BRW_OPCODE_ELSE); 162801e04c3fSmrg 16297ec681f3Smrg if (devinfo->ver < 6) { 163001e04c3fSmrg brw_set_dest(p, insn, brw_ip_reg()); 163101e04c3fSmrg brw_set_src0(p, insn, brw_ip_reg()); 163201e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 16337ec681f3Smrg } else if (devinfo->ver == 6) { 163401e04c3fSmrg brw_set_dest(p, insn, brw_imm_w(0)); 16357ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, insn, 0); 163601e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 163701e04c3fSmrg brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 16387ec681f3Smrg } else if (devinfo->ver == 7) { 163901e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 164001e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 164101e04c3fSmrg brw_set_src1(p, insn, brw_imm_w(0)); 164201e04c3fSmrg brw_inst_set_jip(devinfo, insn, 0); 164301e04c3fSmrg brw_inst_set_uip(devinfo, insn, 0); 164401e04c3fSmrg } else { 164501e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 16467ec681f3Smrg if (devinfo->ver < 12) 16477ec681f3Smrg brw_set_src0(p, insn, brw_imm_d(0)); 164801e04c3fSmrg brw_inst_set_jip(devinfo, insn, 0); 164901e04c3fSmrg brw_inst_set_uip(devinfo, insn, 0); 165001e04c3fSmrg } 165101e04c3fSmrg 165201e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 165301e04c3fSmrg brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); 16547ec681f3Smrg if (!p->single_program_flow && devinfo->ver < 6) 165501e04c3fSmrg brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); 165601e04c3fSmrg 165701e04c3fSmrg push_if_stack(p, insn); 165801e04c3fSmrg} 165901e04c3fSmrg 166001e04c3fSmrgvoid 166101e04c3fSmrgbrw_ENDIF(struct brw_codegen *p) 166201e04c3fSmrg{ 16637ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 166401e04c3fSmrg brw_inst *insn = NULL; 166501e04c3fSmrg brw_inst *else_inst = NULL; 166601e04c3fSmrg brw_inst *if_inst = NULL; 166701e04c3fSmrg brw_inst *tmp; 166801e04c3fSmrg bool emit_endif = true; 166901e04c3fSmrg 167001e04c3fSmrg /* In single program flow mode, we can express IF and ELSE instructions 167101e04c3fSmrg * equivalently as ADD instructions that operate on IP. On platforms prior 16727ec681f3Smrg * to Gfx6, flow control instructions cause an implied thread switch, so 167301e04c3fSmrg * this is a significant savings. 167401e04c3fSmrg * 16757ec681f3Smrg * However, on Gfx6, writing to IP doesn't work in single program flow mode 167601e04c3fSmrg * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 167701e04c3fSmrg * not be updated by non-flow control instructions."). And on later 167801e04c3fSmrg * platforms, there is no significant benefit to converting control flow 16797ec681f3Smrg * instructions to conditional ADDs. So we only do this trick on Gfx4 and 16807ec681f3Smrg * Gfx5. 168101e04c3fSmrg */ 16827ec681f3Smrg if (devinfo->ver < 6 && p->single_program_flow) 168301e04c3fSmrg emit_endif = false; 168401e04c3fSmrg 168501e04c3fSmrg /* 168601e04c3fSmrg * A single next_insn() may change the base address of instruction store 168701e04c3fSmrg * memory(p->store), so call it first before referencing the instruction 168801e04c3fSmrg * store pointer from an index 168901e04c3fSmrg */ 169001e04c3fSmrg if (emit_endif) 169101e04c3fSmrg insn = next_insn(p, BRW_OPCODE_ENDIF); 169201e04c3fSmrg 169301e04c3fSmrg /* Pop the IF and (optional) ELSE instructions from the stack */ 169401e04c3fSmrg p->if_depth_in_loop[p->loop_stack_depth]--; 169501e04c3fSmrg tmp = pop_if_stack(p); 169601e04c3fSmrg if (brw_inst_opcode(devinfo, tmp) == BRW_OPCODE_ELSE) { 169701e04c3fSmrg else_inst = tmp; 169801e04c3fSmrg tmp = pop_if_stack(p); 169901e04c3fSmrg } 170001e04c3fSmrg if_inst = tmp; 170101e04c3fSmrg 170201e04c3fSmrg if (!emit_endif) { 170301e04c3fSmrg /* ENDIF is useless; don't bother emitting it. */ 170401e04c3fSmrg convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 170501e04c3fSmrg return; 170601e04c3fSmrg } 170701e04c3fSmrg 17087ec681f3Smrg if (devinfo->ver < 6) { 170901e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 171001e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 171101e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 17127ec681f3Smrg } else if (devinfo->ver == 6) { 171301e04c3fSmrg brw_set_dest(p, insn, brw_imm_w(0)); 171401e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 171501e04c3fSmrg brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 17167ec681f3Smrg } else if (devinfo->ver == 7) { 171701e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 171801e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 171901e04c3fSmrg brw_set_src1(p, insn, brw_imm_w(0)); 172001e04c3fSmrg } else { 172101e04c3fSmrg brw_set_src0(p, insn, brw_imm_d(0)); 172201e04c3fSmrg } 172301e04c3fSmrg 172401e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 172501e04c3fSmrg brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); 17267ec681f3Smrg if (devinfo->ver < 6) 172701e04c3fSmrg brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); 172801e04c3fSmrg 172901e04c3fSmrg /* Also pop item off the stack in the endif instruction: */ 17307ec681f3Smrg if (devinfo->ver < 6) { 17317ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, insn, 0); 17327ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, insn, 1); 17337ec681f3Smrg } else if (devinfo->ver == 6) { 17347ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, insn, 2); 173501e04c3fSmrg } else { 173601e04c3fSmrg brw_inst_set_jip(devinfo, insn, 2); 173701e04c3fSmrg } 173801e04c3fSmrg patch_IF_ELSE(p, if_inst, else_inst, insn); 173901e04c3fSmrg} 174001e04c3fSmrg 174101e04c3fSmrgbrw_inst * 174201e04c3fSmrgbrw_BREAK(struct brw_codegen *p) 174301e04c3fSmrg{ 17447ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 174501e04c3fSmrg brw_inst *insn; 174601e04c3fSmrg 174701e04c3fSmrg insn = next_insn(p, BRW_OPCODE_BREAK); 17487ec681f3Smrg if (devinfo->ver >= 8) { 174901e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 175001e04c3fSmrg brw_set_src0(p, insn, brw_imm_d(0x0)); 17517ec681f3Smrg } else if (devinfo->ver >= 6) { 175201e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 175301e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 175401e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 175501e04c3fSmrg } else { 175601e04c3fSmrg brw_set_dest(p, insn, brw_ip_reg()); 175701e04c3fSmrg brw_set_src0(p, insn, brw_ip_reg()); 175801e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 17597ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, insn, 176001e04c3fSmrg p->if_depth_in_loop[p->loop_stack_depth]); 176101e04c3fSmrg } 176201e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 176301e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); 176401e04c3fSmrg 176501e04c3fSmrg return insn; 176601e04c3fSmrg} 176701e04c3fSmrg 176801e04c3fSmrgbrw_inst * 176901e04c3fSmrgbrw_CONT(struct brw_codegen *p) 177001e04c3fSmrg{ 17717ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 177201e04c3fSmrg brw_inst *insn; 177301e04c3fSmrg 177401e04c3fSmrg insn = next_insn(p, BRW_OPCODE_CONTINUE); 177501e04c3fSmrg brw_set_dest(p, insn, brw_ip_reg()); 17767ec681f3Smrg if (devinfo->ver >= 8) { 177701e04c3fSmrg brw_set_src0(p, insn, brw_imm_d(0x0)); 177801e04c3fSmrg } else { 177901e04c3fSmrg brw_set_src0(p, insn, brw_ip_reg()); 178001e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 178101e04c3fSmrg } 178201e04c3fSmrg 17837ec681f3Smrg if (devinfo->ver < 6) { 17847ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, insn, 178501e04c3fSmrg p->if_depth_in_loop[p->loop_stack_depth]); 178601e04c3fSmrg } 178701e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 178801e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); 178901e04c3fSmrg return insn; 179001e04c3fSmrg} 179101e04c3fSmrg 179201e04c3fSmrgbrw_inst * 17937ec681f3Smrgbrw_HALT(struct brw_codegen *p) 179401e04c3fSmrg{ 17957ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 179601e04c3fSmrg brw_inst *insn; 179701e04c3fSmrg 179801e04c3fSmrg insn = next_insn(p, BRW_OPCODE_HALT); 179901e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 18007ec681f3Smrg if (devinfo->ver < 6) { 18017ec681f3Smrg /* From the Gfx4 PRM: 18027ec681f3Smrg * 18037ec681f3Smrg * "IP register must be put (for example, by the assembler) at <dst> 18047ec681f3Smrg * and <src0> locations. 18057ec681f3Smrg */ 18067ec681f3Smrg brw_set_dest(p, insn, brw_ip_reg()); 18077ec681f3Smrg brw_set_src0(p, insn, brw_ip_reg()); 18087ec681f3Smrg brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */ 18097ec681f3Smrg } else if (devinfo->ver < 8) { 181001e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 181101e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ 18127ec681f3Smrg } else if (devinfo->ver < 12) { 18137ec681f3Smrg brw_set_src0(p, insn, brw_imm_d(0x0)); 181401e04c3fSmrg } 181501e04c3fSmrg 181601e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 181701e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); 181801e04c3fSmrg return insn; 181901e04c3fSmrg} 182001e04c3fSmrg 182101e04c3fSmrg/* DO/WHILE loop: 182201e04c3fSmrg * 182301e04c3fSmrg * The DO/WHILE is just an unterminated loop -- break or continue are 182401e04c3fSmrg * used for control within the loop. We have a few ways they can be 182501e04c3fSmrg * done. 182601e04c3fSmrg * 182701e04c3fSmrg * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 182801e04c3fSmrg * jip and no DO instruction. 182901e04c3fSmrg * 18307ec681f3Smrg * For non-uniform control flow pre-gfx6, there's a DO instruction to 183101e04c3fSmrg * push the mask, and a WHILE to jump back, and BREAK to get out and 183201e04c3fSmrg * pop the mask. 183301e04c3fSmrg * 18347ec681f3Smrg * For gfx6, there's no more mask stack, so no need for DO. WHILE 183501e04c3fSmrg * just points back to the first instruction of the loop. 183601e04c3fSmrg */ 183701e04c3fSmrgbrw_inst * 183801e04c3fSmrgbrw_DO(struct brw_codegen *p, unsigned execute_size) 183901e04c3fSmrg{ 18407ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 184101e04c3fSmrg 18427ec681f3Smrg if (devinfo->ver >= 6 || p->single_program_flow) { 184301e04c3fSmrg push_loop_stack(p, &p->store[p->nr_insn]); 184401e04c3fSmrg return &p->store[p->nr_insn]; 184501e04c3fSmrg } else { 184601e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_DO); 184701e04c3fSmrg 184801e04c3fSmrg push_loop_stack(p, insn); 184901e04c3fSmrg 185001e04c3fSmrg /* Override the defaults for this instruction: 185101e04c3fSmrg */ 185201e04c3fSmrg brw_set_dest(p, insn, brw_null_reg()); 185301e04c3fSmrg brw_set_src0(p, insn, brw_null_reg()); 185401e04c3fSmrg brw_set_src1(p, insn, brw_null_reg()); 185501e04c3fSmrg 185601e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 185701e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, execute_size); 185801e04c3fSmrg brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); 185901e04c3fSmrg 186001e04c3fSmrg return insn; 186101e04c3fSmrg } 186201e04c3fSmrg} 186301e04c3fSmrg 186401e04c3fSmrg/** 18657ec681f3Smrg * For pre-gfx6, we patch BREAK/CONT instructions to point at the WHILE 186601e04c3fSmrg * instruction here. 186701e04c3fSmrg * 18687ec681f3Smrg * For gfx6+, see brw_set_uip_jip(), which doesn't care so much about the loop 186901e04c3fSmrg * nesting, since it can always just point to the end of the block/current loop. 187001e04c3fSmrg */ 187101e04c3fSmrgstatic void 187201e04c3fSmrgbrw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst) 187301e04c3fSmrg{ 18747ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 187501e04c3fSmrg brw_inst *do_inst = get_inner_do_insn(p); 187601e04c3fSmrg brw_inst *inst; 187701e04c3fSmrg unsigned br = brw_jump_scale(devinfo); 187801e04c3fSmrg 18797ec681f3Smrg assert(devinfo->ver < 6); 188001e04c3fSmrg 188101e04c3fSmrg for (inst = while_inst - 1; inst != do_inst; inst--) { 188201e04c3fSmrg /* If the jump count is != 0, that means that this instruction has already 188301e04c3fSmrg * been patched because it's part of a loop inside of the one we're 188401e04c3fSmrg * patching. 188501e04c3fSmrg */ 188601e04c3fSmrg if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_BREAK && 18877ec681f3Smrg brw_inst_gfx4_jump_count(devinfo, inst) == 0) { 18887ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, inst, br*((while_inst - inst) + 1)); 188901e04c3fSmrg } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CONTINUE && 18907ec681f3Smrg brw_inst_gfx4_jump_count(devinfo, inst) == 0) { 18917ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, inst, br * (while_inst - inst)); 189201e04c3fSmrg } 189301e04c3fSmrg } 189401e04c3fSmrg} 189501e04c3fSmrg 189601e04c3fSmrgbrw_inst * 189701e04c3fSmrgbrw_WHILE(struct brw_codegen *p) 189801e04c3fSmrg{ 18997ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 190001e04c3fSmrg brw_inst *insn, *do_insn; 190101e04c3fSmrg unsigned br = brw_jump_scale(devinfo); 190201e04c3fSmrg 19037ec681f3Smrg if (devinfo->ver >= 6) { 190401e04c3fSmrg insn = next_insn(p, BRW_OPCODE_WHILE); 190501e04c3fSmrg do_insn = get_inner_do_insn(p); 190601e04c3fSmrg 19077ec681f3Smrg if (devinfo->ver >= 8) { 190801e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 19097ec681f3Smrg if (devinfo->ver < 12) 19107ec681f3Smrg brw_set_src0(p, insn, brw_imm_d(0)); 191101e04c3fSmrg brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); 19127ec681f3Smrg } else if (devinfo->ver == 7) { 191301e04c3fSmrg brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 191401e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 191501e04c3fSmrg brw_set_src1(p, insn, brw_imm_w(0)); 191601e04c3fSmrg brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); 191701e04c3fSmrg } else { 191801e04c3fSmrg brw_set_dest(p, insn, brw_imm_w(0)); 19197ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, insn, br * (do_insn - insn)); 192001e04c3fSmrg brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 192101e04c3fSmrg brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 192201e04c3fSmrg } 192301e04c3fSmrg 192401e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p)); 192501e04c3fSmrg 192601e04c3fSmrg } else { 192701e04c3fSmrg if (p->single_program_flow) { 192801e04c3fSmrg insn = next_insn(p, BRW_OPCODE_ADD); 192901e04c3fSmrg do_insn = get_inner_do_insn(p); 193001e04c3fSmrg 193101e04c3fSmrg brw_set_dest(p, insn, brw_ip_reg()); 193201e04c3fSmrg brw_set_src0(p, insn, brw_ip_reg()); 193301e04c3fSmrg brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 193401e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); 193501e04c3fSmrg } else { 193601e04c3fSmrg insn = next_insn(p, BRW_OPCODE_WHILE); 193701e04c3fSmrg do_insn = get_inner_do_insn(p); 193801e04c3fSmrg 193901e04c3fSmrg assert(brw_inst_opcode(devinfo, do_insn) == BRW_OPCODE_DO); 194001e04c3fSmrg 194101e04c3fSmrg brw_set_dest(p, insn, brw_ip_reg()); 194201e04c3fSmrg brw_set_src0(p, insn, brw_ip_reg()); 194301e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0)); 194401e04c3fSmrg 194501e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn)); 19467ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, insn, br * (do_insn - insn + 1)); 19477ec681f3Smrg brw_inst_set_gfx4_pop_count(devinfo, insn, 0); 194801e04c3fSmrg 194901e04c3fSmrg brw_patch_break_cont(p, insn); 195001e04c3fSmrg } 195101e04c3fSmrg } 195201e04c3fSmrg brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); 195301e04c3fSmrg 195401e04c3fSmrg p->loop_stack_depth--; 195501e04c3fSmrg 195601e04c3fSmrg return insn; 195701e04c3fSmrg} 195801e04c3fSmrg 195901e04c3fSmrg/* FORWARD JUMPS: 196001e04c3fSmrg */ 196101e04c3fSmrgvoid brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx) 196201e04c3fSmrg{ 19637ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 196401e04c3fSmrg brw_inst *jmp_insn = &p->store[jmp_insn_idx]; 196501e04c3fSmrg unsigned jmpi = 1; 196601e04c3fSmrg 19677ec681f3Smrg if (devinfo->ver >= 5) 196801e04c3fSmrg jmpi = 2; 196901e04c3fSmrg 197001e04c3fSmrg assert(brw_inst_opcode(devinfo, jmp_insn) == BRW_OPCODE_JMPI); 197101e04c3fSmrg assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE); 197201e04c3fSmrg 19737ec681f3Smrg brw_inst_set_gfx4_jump_count(devinfo, jmp_insn, 197401e04c3fSmrg jmpi * (p->nr_insn - jmp_insn_idx - 1)); 197501e04c3fSmrg} 197601e04c3fSmrg 197701e04c3fSmrg/* To integrate with the above, it makes sense that the comparison 197801e04c3fSmrg * instruction should populate the flag register. It might be simpler 197901e04c3fSmrg * just to use the flag reg for most WM tasks? 198001e04c3fSmrg */ 198101e04c3fSmrgvoid brw_CMP(struct brw_codegen *p, 198201e04c3fSmrg struct brw_reg dest, 198301e04c3fSmrg unsigned conditional, 198401e04c3fSmrg struct brw_reg src0, 198501e04c3fSmrg struct brw_reg src1) 198601e04c3fSmrg{ 19877ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 198801e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_CMP); 198901e04c3fSmrg 199001e04c3fSmrg brw_inst_set_cond_modifier(devinfo, insn, conditional); 199101e04c3fSmrg brw_set_dest(p, insn, dest); 199201e04c3fSmrg brw_set_src0(p, insn, src0); 199301e04c3fSmrg brw_set_src1(p, insn, src1); 199401e04c3fSmrg 199501e04c3fSmrg /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds 199601e04c3fSmrg * page says: 199701e04c3fSmrg * "Any CMP instruction with a null destination must use a {switch}." 199801e04c3fSmrg * 19997ec681f3Smrg * It also applies to other Gfx7 platforms (IVB, BYT) even though it isn't 200001e04c3fSmrg * mentioned on their work-arounds pages. 200101e04c3fSmrg */ 20027ec681f3Smrg if (devinfo->ver == 7) { 20037ec681f3Smrg if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 20047ec681f3Smrg dest.nr == BRW_ARF_NULL) { 20057ec681f3Smrg brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); 20067ec681f3Smrg } 20077ec681f3Smrg } 20087ec681f3Smrg} 20097ec681f3Smrg 20107ec681f3Smrgvoid brw_CMPN(struct brw_codegen *p, 20117ec681f3Smrg struct brw_reg dest, 20127ec681f3Smrg unsigned conditional, 20137ec681f3Smrg struct brw_reg src0, 20147ec681f3Smrg struct brw_reg src1) 20157ec681f3Smrg{ 20167ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 20177ec681f3Smrg brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN); 20187ec681f3Smrg 20197ec681f3Smrg brw_inst_set_cond_modifier(devinfo, insn, conditional); 20207ec681f3Smrg brw_set_dest(p, insn, dest); 20217ec681f3Smrg brw_set_src0(p, insn, src0); 20227ec681f3Smrg brw_set_src1(p, insn, src1); 20237ec681f3Smrg 20247ec681f3Smrg /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA) 20257ec681f3Smrg * says: 20267ec681f3Smrg * 20277ec681f3Smrg * If the destination is the null register, the {Switch} instruction 20287ec681f3Smrg * option must be used. 20297ec681f3Smrg * 20307ec681f3Smrg * Page 77 of the Haswell PRM Volume 2b contains the same text. 20317ec681f3Smrg */ 20327ec681f3Smrg if (devinfo->ver == 7) { 203301e04c3fSmrg if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 203401e04c3fSmrg dest.nr == BRW_ARF_NULL) { 203501e04c3fSmrg brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); 203601e04c3fSmrg } 203701e04c3fSmrg } 203801e04c3fSmrg} 203901e04c3fSmrg 204001e04c3fSmrg/*********************************************************************** 204101e04c3fSmrg * Helpers for the various SEND message types: 204201e04c3fSmrg */ 204301e04c3fSmrg 204401e04c3fSmrg/** Extended math function, float[8]. 204501e04c3fSmrg */ 20467ec681f3Smrgvoid gfx4_math(struct brw_codegen *p, 204701e04c3fSmrg struct brw_reg dest, 204801e04c3fSmrg unsigned function, 204901e04c3fSmrg unsigned msg_reg_nr, 205001e04c3fSmrg struct brw_reg src, 205101e04c3fSmrg unsigned precision ) 205201e04c3fSmrg{ 20537ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 205401e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); 205501e04c3fSmrg unsigned data_type; 205601e04c3fSmrg if (has_scalar_region(src)) { 205701e04c3fSmrg data_type = BRW_MATH_DATA_SCALAR; 205801e04c3fSmrg } else { 205901e04c3fSmrg data_type = BRW_MATH_DATA_VECTOR; 206001e04c3fSmrg } 206101e04c3fSmrg 20627ec681f3Smrg assert(devinfo->ver < 6); 206301e04c3fSmrg 206401e04c3fSmrg /* Example code doesn't set predicate_control for send 206501e04c3fSmrg * instructions. 206601e04c3fSmrg */ 206701e04c3fSmrg brw_inst_set_pred_control(devinfo, insn, 0); 206801e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); 206901e04c3fSmrg 207001e04c3fSmrg brw_set_dest(p, insn, dest); 207101e04c3fSmrg brw_set_src0(p, insn, src); 207201e04c3fSmrg brw_set_math_message(p, 207301e04c3fSmrg insn, 207401e04c3fSmrg function, 207501e04c3fSmrg src.type == BRW_REGISTER_TYPE_D, 207601e04c3fSmrg precision, 207701e04c3fSmrg data_type); 207801e04c3fSmrg} 207901e04c3fSmrg 20807ec681f3Smrgvoid gfx6_math(struct brw_codegen *p, 208101e04c3fSmrg struct brw_reg dest, 208201e04c3fSmrg unsigned function, 208301e04c3fSmrg struct brw_reg src0, 208401e04c3fSmrg struct brw_reg src1) 208501e04c3fSmrg{ 20867ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 208701e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_MATH); 208801e04c3fSmrg 20897ec681f3Smrg assert(devinfo->ver >= 6); 209001e04c3fSmrg 209101e04c3fSmrg assert(dest.file == BRW_GENERAL_REGISTER_FILE || 20927ec681f3Smrg (devinfo->ver >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); 209301e04c3fSmrg 209401e04c3fSmrg assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 20957ec681f3Smrg if (devinfo->ver == 6) { 209601e04c3fSmrg assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 209701e04c3fSmrg assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 209801e04c3fSmrg } 209901e04c3fSmrg 210001e04c3fSmrg if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 210101e04c3fSmrg function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 210201e04c3fSmrg function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 210301e04c3fSmrg assert(src0.type != BRW_REGISTER_TYPE_F); 210401e04c3fSmrg assert(src1.type != BRW_REGISTER_TYPE_F); 210501e04c3fSmrg assert(src1.file == BRW_GENERAL_REGISTER_FILE || 21067ec681f3Smrg (devinfo->ver >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); 21077ec681f3Smrg /* From BSpec 6647/47428 "[Instruction] Extended Math Function": 21087ec681f3Smrg * INT DIV function does not support source modifiers. 21097ec681f3Smrg */ 21107ec681f3Smrg assert(!src0.negate); 21117ec681f3Smrg assert(!src0.abs); 21127ec681f3Smrg assert(!src1.negate); 21137ec681f3Smrg assert(!src1.abs); 211401e04c3fSmrg } else { 21159f464c52Smaya assert(src0.type == BRW_REGISTER_TYPE_F || 21167ec681f3Smrg (src0.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9)); 21179f464c52Smaya assert(src1.type == BRW_REGISTER_TYPE_F || 21187ec681f3Smrg (src1.type == BRW_REGISTER_TYPE_HF && devinfo->ver >= 9)); 211901e04c3fSmrg } 212001e04c3fSmrg 21217ec681f3Smrg /* Source modifiers are ignored for extended math instructions on Gfx6. */ 21227ec681f3Smrg if (devinfo->ver == 6) { 212301e04c3fSmrg assert(!src0.negate); 212401e04c3fSmrg assert(!src0.abs); 212501e04c3fSmrg assert(!src1.negate); 212601e04c3fSmrg assert(!src1.abs); 212701e04c3fSmrg } 212801e04c3fSmrg 212901e04c3fSmrg brw_inst_set_math_function(devinfo, insn, function); 213001e04c3fSmrg 213101e04c3fSmrg brw_set_dest(p, insn, dest); 213201e04c3fSmrg brw_set_src0(p, insn, src0); 213301e04c3fSmrg brw_set_src1(p, insn, src1); 213401e04c3fSmrg} 213501e04c3fSmrg 213601e04c3fSmrg/** 213701e04c3fSmrg * Return the right surface index to access the thread scratch space using 213801e04c3fSmrg * stateless dataport messages. 213901e04c3fSmrg */ 214001e04c3fSmrgunsigned 214101e04c3fSmrgbrw_scratch_surface_idx(const struct brw_codegen *p) 214201e04c3fSmrg{ 214301e04c3fSmrg /* The scratch space is thread-local so IA coherency is unnecessary. */ 21447ec681f3Smrg if (p->devinfo->ver >= 8) 21457ec681f3Smrg return GFX8_BTI_STATELESS_NON_COHERENT; 214601e04c3fSmrg else 214701e04c3fSmrg return BRW_BTI_STATELESS; 214801e04c3fSmrg} 214901e04c3fSmrg 215001e04c3fSmrg/** 215101e04c3fSmrg * Write a block of OWORDs (half a GRF each) from the scratch buffer, 215201e04c3fSmrg * using a constant offset per channel. 215301e04c3fSmrg * 215401e04c3fSmrg * The offset must be aligned to oword size (16 bytes). Used for 215501e04c3fSmrg * register spilling. 215601e04c3fSmrg */ 215701e04c3fSmrgvoid brw_oword_block_write_scratch(struct brw_codegen *p, 215801e04c3fSmrg struct brw_reg mrf, 215901e04c3fSmrg int num_regs, 216001e04c3fSmrg unsigned offset) 216101e04c3fSmrg{ 21627ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 216301e04c3fSmrg const unsigned target_cache = 21647ec681f3Smrg (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : 21657ec681f3Smrg devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : 216601e04c3fSmrg BRW_SFID_DATAPORT_WRITE); 21677ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 216801e04c3fSmrg uint32_t msg_type; 216901e04c3fSmrg 21707ec681f3Smrg if (devinfo->ver >= 6) 217101e04c3fSmrg offset /= 16; 217201e04c3fSmrg 217301e04c3fSmrg mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 217401e04c3fSmrg 217501e04c3fSmrg const unsigned mlen = 1 + num_regs; 217601e04c3fSmrg 217701e04c3fSmrg /* Set up the message header. This is g0, with g0.2 filled with 217801e04c3fSmrg * the offset. We don't want to leave our offset around in g0 or 217901e04c3fSmrg * it'll screw up texture samples, so set it up inside the message 218001e04c3fSmrg * reg. 218101e04c3fSmrg */ 218201e04c3fSmrg { 218301e04c3fSmrg brw_push_insn_state(p); 218401e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_8); 218501e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 218601e04c3fSmrg brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); 21877ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 218801e04c3fSmrg 218901e04c3fSmrg brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 219001e04c3fSmrg 219101e04c3fSmrg /* set message header global offset field (reg 0, element 2) */ 219201e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 21937ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_null()); 219401e04c3fSmrg brw_MOV(p, 219501e04c3fSmrg retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 219601e04c3fSmrg mrf.nr, 219701e04c3fSmrg 2), BRW_REGISTER_TYPE_UD), 219801e04c3fSmrg brw_imm_ud(offset)); 219901e04c3fSmrg 220001e04c3fSmrg brw_pop_insn_state(p); 22017ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 220201e04c3fSmrg } 220301e04c3fSmrg 220401e04c3fSmrg { 220501e04c3fSmrg struct brw_reg dest; 220601e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); 220701e04c3fSmrg int send_commit_msg; 220801e04c3fSmrg struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 220901e04c3fSmrg BRW_REGISTER_TYPE_UW); 221001e04c3fSmrg 221101e04c3fSmrg brw_inst_set_sfid(devinfo, insn, target_cache); 221201e04c3fSmrg brw_inst_set_compression(devinfo, insn, false); 221301e04c3fSmrg 221401e04c3fSmrg if (brw_inst_exec_size(devinfo, insn) >= 16) 221501e04c3fSmrg src_header = vec16(src_header); 221601e04c3fSmrg 221701e04c3fSmrg assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); 22187ec681f3Smrg if (devinfo->ver < 6) 221901e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, mrf.nr); 222001e04c3fSmrg 22217ec681f3Smrg /* Until gfx6, writes followed by reads from the same location 222201e04c3fSmrg * are not guaranteed to be ordered unless write_commit is set. 222301e04c3fSmrg * If set, then a no-op write is issued to the destination 222401e04c3fSmrg * register to set a dependency, and a read from the destination 222501e04c3fSmrg * can be used to ensure the ordering. 222601e04c3fSmrg * 22277ec681f3Smrg * For gfx6, only writes between different threads need ordering 222801e04c3fSmrg * protection. Our use of DP writes is all about register 222901e04c3fSmrg * spilling within a thread. 223001e04c3fSmrg */ 22317ec681f3Smrg if (devinfo->ver >= 6) { 223201e04c3fSmrg dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 223301e04c3fSmrg send_commit_msg = 0; 223401e04c3fSmrg } else { 223501e04c3fSmrg dest = src_header; 223601e04c3fSmrg send_commit_msg = 1; 223701e04c3fSmrg } 223801e04c3fSmrg 223901e04c3fSmrg brw_set_dest(p, insn, dest); 22407ec681f3Smrg if (devinfo->ver >= 6) { 224101e04c3fSmrg brw_set_src0(p, insn, mrf); 224201e04c3fSmrg } else { 224301e04c3fSmrg brw_set_src0(p, insn, brw_null_reg()); 224401e04c3fSmrg } 224501e04c3fSmrg 22467ec681f3Smrg if (devinfo->ver >= 6) 22477ec681f3Smrg msg_type = GFX6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 224801e04c3fSmrg else 224901e04c3fSmrg msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 225001e04c3fSmrg 225101e04c3fSmrg brw_set_desc(p, insn, 225201e04c3fSmrg brw_message_desc(devinfo, mlen, send_commit_msg, true) | 225301e04c3fSmrg brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p), 225401e04c3fSmrg BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), 22557ec681f3Smrg msg_type, send_commit_msg)); 225601e04c3fSmrg } 225701e04c3fSmrg} 225801e04c3fSmrg 225901e04c3fSmrg 226001e04c3fSmrg/** 226101e04c3fSmrg * Read a block of owords (half a GRF each) from the scratch buffer 226201e04c3fSmrg * using a constant index per channel. 226301e04c3fSmrg * 226401e04c3fSmrg * Offset must be aligned to oword size (16 bytes). Used for register 226501e04c3fSmrg * spilling. 226601e04c3fSmrg */ 226701e04c3fSmrgvoid 226801e04c3fSmrgbrw_oword_block_read_scratch(struct brw_codegen *p, 226901e04c3fSmrg struct brw_reg dest, 227001e04c3fSmrg struct brw_reg mrf, 227101e04c3fSmrg int num_regs, 227201e04c3fSmrg unsigned offset) 227301e04c3fSmrg{ 22747ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 22757ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 227601e04c3fSmrg 22777ec681f3Smrg if (devinfo->ver >= 6) 227801e04c3fSmrg offset /= 16; 227901e04c3fSmrg 22807ec681f3Smrg if (p->devinfo->ver >= 7) { 228101e04c3fSmrg /* On gen 7 and above, we no longer have message registers and we can 228201e04c3fSmrg * send from any register we want. By using the destination register 228301e04c3fSmrg * for the message, we guarantee that the implied message write won't 228401e04c3fSmrg * accidentally overwrite anything. This has been a problem because 228501e04c3fSmrg * the MRF registers and source for the final FB write are both fixed 228601e04c3fSmrg * and may overlap. 228701e04c3fSmrg */ 228801e04c3fSmrg mrf = retype(dest, BRW_REGISTER_TYPE_UD); 228901e04c3fSmrg } else { 229001e04c3fSmrg mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 229101e04c3fSmrg } 229201e04c3fSmrg dest = retype(dest, BRW_REGISTER_TYPE_UW); 229301e04c3fSmrg 229401e04c3fSmrg const unsigned rlen = num_regs; 229501e04c3fSmrg const unsigned target_cache = 22967ec681f3Smrg (devinfo->ver >= 7 ? GFX7_SFID_DATAPORT_DATA_CACHE : 22977ec681f3Smrg devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : 229801e04c3fSmrg BRW_SFID_DATAPORT_READ); 229901e04c3fSmrg 230001e04c3fSmrg { 230101e04c3fSmrg brw_push_insn_state(p); 23027ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 230301e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_8); 230401e04c3fSmrg brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); 230501e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 230601e04c3fSmrg 230701e04c3fSmrg brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 230801e04c3fSmrg 230901e04c3fSmrg /* set message header global offset field (reg 0, element 2) */ 231001e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 23117ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_null()); 231201e04c3fSmrg brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset)); 231301e04c3fSmrg 231401e04c3fSmrg brw_pop_insn_state(p); 23157ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 231601e04c3fSmrg } 231701e04c3fSmrg 231801e04c3fSmrg { 231901e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); 232001e04c3fSmrg 232101e04c3fSmrg brw_inst_set_sfid(devinfo, insn, target_cache); 232201e04c3fSmrg assert(brw_inst_pred_control(devinfo, insn) == 0); 232301e04c3fSmrg brw_inst_set_compression(devinfo, insn, false); 232401e04c3fSmrg 232501e04c3fSmrg brw_set_dest(p, insn, dest); /* UW? */ 23267ec681f3Smrg if (devinfo->ver >= 6) { 232701e04c3fSmrg brw_set_src0(p, insn, mrf); 232801e04c3fSmrg } else { 232901e04c3fSmrg brw_set_src0(p, insn, brw_null_reg()); 233001e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, mrf.nr); 233101e04c3fSmrg } 233201e04c3fSmrg 233301e04c3fSmrg brw_set_desc(p, insn, 233401e04c3fSmrg brw_message_desc(devinfo, 1, rlen, true) | 233501e04c3fSmrg brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p), 233601e04c3fSmrg BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), 233701e04c3fSmrg BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 233801e04c3fSmrg BRW_DATAPORT_READ_TARGET_RENDER_CACHE)); 233901e04c3fSmrg } 234001e04c3fSmrg} 234101e04c3fSmrg 234201e04c3fSmrgvoid 23437ec681f3Smrggfx7_block_read_scratch(struct brw_codegen *p, 234401e04c3fSmrg struct brw_reg dest, 234501e04c3fSmrg int num_regs, 234601e04c3fSmrg unsigned offset) 234701e04c3fSmrg{ 234801e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); 234901e04c3fSmrg assert(brw_inst_pred_control(p->devinfo, insn) == BRW_PREDICATE_NONE); 235001e04c3fSmrg 235101e04c3fSmrg brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW)); 235201e04c3fSmrg 235301e04c3fSmrg /* The HW requires that the header is present; this is to get the g0.5 235401e04c3fSmrg * scratch offset. 235501e04c3fSmrg */ 235601e04c3fSmrg brw_set_src0(p, insn, brw_vec8_grf(0, 0)); 235701e04c3fSmrg 235801e04c3fSmrg /* According to the docs, offset is "A 12-bit HWord offset into the memory 235901e04c3fSmrg * Immediate Memory buffer as specified by binding table 0xFF." An HWORD 236001e04c3fSmrg * is 32 bytes, which happens to be the size of a register. 236101e04c3fSmrg */ 236201e04c3fSmrg offset /= REG_SIZE; 236301e04c3fSmrg assert(offset < (1 << 12)); 236401e04c3fSmrg 23657ec681f3Smrg gfx7_set_dp_scratch_message(p, insn, 236601e04c3fSmrg false, /* scratch read */ 236701e04c3fSmrg false, /* OWords */ 236801e04c3fSmrg false, /* invalidate after read */ 236901e04c3fSmrg num_regs, 237001e04c3fSmrg offset, 237101e04c3fSmrg 1, /* mlen: just g0 */ 237201e04c3fSmrg num_regs, /* rlen */ 237301e04c3fSmrg true); /* header present */ 237401e04c3fSmrg} 237501e04c3fSmrg 237601e04c3fSmrg/** 237701e04c3fSmrg * Read float[4] vectors from the data port constant cache. 237801e04c3fSmrg * Location (in buffer) should be a multiple of 16. 237901e04c3fSmrg * Used for fetching shader constants. 238001e04c3fSmrg */ 238101e04c3fSmrgvoid brw_oword_block_read(struct brw_codegen *p, 238201e04c3fSmrg struct brw_reg dest, 238301e04c3fSmrg struct brw_reg mrf, 238401e04c3fSmrg uint32_t offset, 238501e04c3fSmrg uint32_t bind_table_index) 238601e04c3fSmrg{ 23877ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 238801e04c3fSmrg const unsigned target_cache = 23897ec681f3Smrg (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_CONSTANT_CACHE : 239001e04c3fSmrg BRW_SFID_DATAPORT_READ); 239101e04c3fSmrg const unsigned exec_size = 1 << brw_get_default_exec_size(p); 23927ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 239301e04c3fSmrg 239401e04c3fSmrg /* On newer hardware, offset is in units of owords. */ 23957ec681f3Smrg if (devinfo->ver >= 6) 239601e04c3fSmrg offset /= 16; 239701e04c3fSmrg 239801e04c3fSmrg mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 239901e04c3fSmrg 240001e04c3fSmrg brw_push_insn_state(p); 240101e04c3fSmrg brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 240201e04c3fSmrg brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); 240301e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 240401e04c3fSmrg 240501e04c3fSmrg brw_push_insn_state(p); 240601e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_8); 24077ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 240801e04c3fSmrg brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 240901e04c3fSmrg 241001e04c3fSmrg /* set message header global offset field (reg 0, element 2) */ 241101e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 24127ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_null()); 241301e04c3fSmrg brw_MOV(p, 241401e04c3fSmrg retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 241501e04c3fSmrg mrf.nr, 241601e04c3fSmrg 2), BRW_REGISTER_TYPE_UD), 241701e04c3fSmrg brw_imm_ud(offset)); 241801e04c3fSmrg brw_pop_insn_state(p); 241901e04c3fSmrg 24207ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 24217ec681f3Smrg 242201e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); 242301e04c3fSmrg 242401e04c3fSmrg brw_inst_set_sfid(devinfo, insn, target_cache); 242501e04c3fSmrg 242601e04c3fSmrg /* cast dest to a uword[8] vector */ 242701e04c3fSmrg dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 242801e04c3fSmrg 242901e04c3fSmrg brw_set_dest(p, insn, dest); 24307ec681f3Smrg if (devinfo->ver >= 6) { 243101e04c3fSmrg brw_set_src0(p, insn, mrf); 243201e04c3fSmrg } else { 243301e04c3fSmrg brw_set_src0(p, insn, brw_null_reg()); 243401e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, mrf.nr); 243501e04c3fSmrg } 243601e04c3fSmrg 243701e04c3fSmrg brw_set_desc(p, insn, 243801e04c3fSmrg brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) | 243901e04c3fSmrg brw_dp_read_desc(devinfo, bind_table_index, 244001e04c3fSmrg BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), 244101e04c3fSmrg BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 244201e04c3fSmrg BRW_DATAPORT_READ_TARGET_DATA_CACHE)); 244301e04c3fSmrg 244401e04c3fSmrg brw_pop_insn_state(p); 244501e04c3fSmrg} 244601e04c3fSmrg 244701e04c3fSmrgbrw_inst * 244801e04c3fSmrgbrw_fb_WRITE(struct brw_codegen *p, 244901e04c3fSmrg struct brw_reg payload, 245001e04c3fSmrg struct brw_reg implied_header, 245101e04c3fSmrg unsigned msg_control, 245201e04c3fSmrg unsigned binding_table_index, 245301e04c3fSmrg unsigned msg_length, 245401e04c3fSmrg unsigned response_length, 245501e04c3fSmrg bool eot, 245601e04c3fSmrg bool last_render_target, 245701e04c3fSmrg bool header_present) 245801e04c3fSmrg{ 24597ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 246001e04c3fSmrg const unsigned target_cache = 24617ec681f3Smrg (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : 246201e04c3fSmrg BRW_SFID_DATAPORT_WRITE); 246301e04c3fSmrg brw_inst *insn; 246401e04c3fSmrg struct brw_reg dest, src0; 246501e04c3fSmrg 246601e04c3fSmrg if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16) 246701e04c3fSmrg dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 246801e04c3fSmrg else 246901e04c3fSmrg dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 247001e04c3fSmrg 24717ec681f3Smrg if (devinfo->ver >= 6) { 247201e04c3fSmrg insn = next_insn(p, BRW_OPCODE_SENDC); 247301e04c3fSmrg } else { 247401e04c3fSmrg insn = next_insn(p, BRW_OPCODE_SEND); 247501e04c3fSmrg } 247601e04c3fSmrg brw_inst_set_sfid(devinfo, insn, target_cache); 247701e04c3fSmrg brw_inst_set_compression(devinfo, insn, false); 247801e04c3fSmrg 24797ec681f3Smrg if (devinfo->ver >= 6) { 248001e04c3fSmrg /* headerless version, just submit color payload */ 248101e04c3fSmrg src0 = payload; 248201e04c3fSmrg } else { 248301e04c3fSmrg assert(payload.file == BRW_MESSAGE_REGISTER_FILE); 248401e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, payload.nr); 248501e04c3fSmrg src0 = implied_header; 248601e04c3fSmrg } 248701e04c3fSmrg 248801e04c3fSmrg brw_set_dest(p, insn, dest); 248901e04c3fSmrg brw_set_src0(p, insn, src0); 249001e04c3fSmrg brw_set_desc(p, insn, 249101e04c3fSmrg brw_message_desc(devinfo, msg_length, response_length, 249201e04c3fSmrg header_present) | 24937ec681f3Smrg brw_fb_write_desc(devinfo, binding_table_index, msg_control, 24947ec681f3Smrg last_render_target, 24957ec681f3Smrg false /* coarse_write */)); 249601e04c3fSmrg brw_inst_set_eot(devinfo, insn, eot); 249701e04c3fSmrg 249801e04c3fSmrg return insn; 249901e04c3fSmrg} 250001e04c3fSmrg 250101e04c3fSmrgbrw_inst * 25027ec681f3Smrggfx9_fb_READ(struct brw_codegen *p, 250301e04c3fSmrg struct brw_reg dst, 250401e04c3fSmrg struct brw_reg payload, 250501e04c3fSmrg unsigned binding_table_index, 250601e04c3fSmrg unsigned msg_length, 250701e04c3fSmrg unsigned response_length, 250801e04c3fSmrg bool per_sample) 250901e04c3fSmrg{ 25107ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 25117ec681f3Smrg assert(devinfo->ver >= 9); 251201e04c3fSmrg brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC); 251301e04c3fSmrg 25147ec681f3Smrg brw_inst_set_sfid(devinfo, insn, GFX6_SFID_DATAPORT_RENDER_CACHE); 251501e04c3fSmrg brw_set_dest(p, insn, dst); 251601e04c3fSmrg brw_set_src0(p, insn, payload); 251701e04c3fSmrg brw_set_desc( 251801e04c3fSmrg p, insn, 251901e04c3fSmrg brw_message_desc(devinfo, msg_length, response_length, true) | 25207ec681f3Smrg brw_fb_read_desc(devinfo, binding_table_index, 0 /* msg_control */, 25217ec681f3Smrg 1 << brw_get_default_exec_size(p), per_sample)); 252201e04c3fSmrg brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16); 252301e04c3fSmrg 252401e04c3fSmrg return insn; 252501e04c3fSmrg} 252601e04c3fSmrg 252701e04c3fSmrg/** 252801e04c3fSmrg * Texture sample instruction. 252901e04c3fSmrg * Note: the msg_type plus msg_length values determine exactly what kind 253001e04c3fSmrg * of sampling operation is performed. See volume 4, page 161 of docs. 253101e04c3fSmrg */ 253201e04c3fSmrgvoid brw_SAMPLE(struct brw_codegen *p, 253301e04c3fSmrg struct brw_reg dest, 253401e04c3fSmrg unsigned msg_reg_nr, 253501e04c3fSmrg struct brw_reg src0, 253601e04c3fSmrg unsigned binding_table_index, 253701e04c3fSmrg unsigned sampler, 253801e04c3fSmrg unsigned msg_type, 253901e04c3fSmrg unsigned response_length, 254001e04c3fSmrg unsigned msg_length, 254101e04c3fSmrg unsigned header_present, 254201e04c3fSmrg unsigned simd_mode, 254301e04c3fSmrg unsigned return_format) 254401e04c3fSmrg{ 25457ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 254601e04c3fSmrg brw_inst *insn; 254701e04c3fSmrg 254801e04c3fSmrg if (msg_reg_nr != -1) 25497ec681f3Smrg gfx6_resolve_implied_move(p, &src0, msg_reg_nr); 255001e04c3fSmrg 255101e04c3fSmrg insn = next_insn(p, BRW_OPCODE_SEND); 255201e04c3fSmrg brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER); 255301e04c3fSmrg brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */ 255401e04c3fSmrg 255501e04c3fSmrg /* From the 965 PRM (volume 4, part 1, section 14.2.41): 255601e04c3fSmrg * 255701e04c3fSmrg * "Instruction compression is not allowed for this instruction (that 255801e04c3fSmrg * is, send). The hardware behavior is undefined if this instruction is 255901e04c3fSmrg * set as compressed. However, compress control can be set to "SecHalf" 256001e04c3fSmrg * to affect the EMask generation." 256101e04c3fSmrg * 256201e04c3fSmrg * No similar wording is found in later PRMs, but there are examples 256301e04c3fSmrg * utilizing send with SecHalf. More importantly, SIMD8 sampler messages 256401e04c3fSmrg * are allowed in SIMD16 mode and they could not work without SecHalf. For 256501e04c3fSmrg * these reasons, we allow BRW_COMPRESSION_2NDHALF here. 256601e04c3fSmrg */ 256701e04c3fSmrg brw_inst_set_compression(devinfo, insn, false); 256801e04c3fSmrg 25697ec681f3Smrg if (devinfo->ver < 6) 257001e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); 257101e04c3fSmrg 257201e04c3fSmrg brw_set_dest(p, insn, dest); 257301e04c3fSmrg brw_set_src0(p, insn, src0); 257401e04c3fSmrg brw_set_desc(p, insn, 257501e04c3fSmrg brw_message_desc(devinfo, msg_length, response_length, 257601e04c3fSmrg header_present) | 257701e04c3fSmrg brw_sampler_desc(devinfo, binding_table_index, sampler, 257801e04c3fSmrg msg_type, simd_mode, return_format)); 257901e04c3fSmrg} 258001e04c3fSmrg 258101e04c3fSmrg/* Adjust the message header's sampler state pointer to 258201e04c3fSmrg * select the correct group of 16 samplers. 258301e04c3fSmrg */ 258401e04c3fSmrgvoid brw_adjust_sampler_state_pointer(struct brw_codegen *p, 258501e04c3fSmrg struct brw_reg header, 258601e04c3fSmrg struct brw_reg sampler_index) 258701e04c3fSmrg{ 258801e04c3fSmrg /* The "Sampler Index" field can only store values between 0 and 15. 258901e04c3fSmrg * However, we can add an offset to the "Sampler State Pointer" 259001e04c3fSmrg * field, effectively selecting a different set of 16 samplers. 259101e04c3fSmrg * 259201e04c3fSmrg * The "Sampler State Pointer" needs to be aligned to a 32-byte 259301e04c3fSmrg * offset, and each sampler state is only 16-bytes, so we can't 259401e04c3fSmrg * exclusively use the offset - we have to use both. 259501e04c3fSmrg */ 259601e04c3fSmrg 25977ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 259801e04c3fSmrg 259901e04c3fSmrg if (sampler_index.file == BRW_IMMEDIATE_VALUE) { 260001e04c3fSmrg const int sampler_state_size = 16; /* 16 bytes */ 260101e04c3fSmrg uint32_t sampler = sampler_index.ud; 260201e04c3fSmrg 260301e04c3fSmrg if (sampler >= 16) { 26047ec681f3Smrg assert(devinfo->verx10 >= 75); 260501e04c3fSmrg brw_ADD(p, 260601e04c3fSmrg get_element_ud(header, 3), 260701e04c3fSmrg get_element_ud(brw_vec8_grf(0, 0), 3), 260801e04c3fSmrg brw_imm_ud(16 * (sampler / 16) * sampler_state_size)); 260901e04c3fSmrg } 261001e04c3fSmrg } else { 261101e04c3fSmrg /* Non-const sampler array indexing case */ 26127ec681f3Smrg if (devinfo->verx10 <= 70) { 261301e04c3fSmrg return; 261401e04c3fSmrg } 261501e04c3fSmrg 261601e04c3fSmrg struct brw_reg temp = get_element_ud(header, 3); 261701e04c3fSmrg 26187ec681f3Smrg brw_push_insn_state(p); 261901e04c3fSmrg brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0)); 26207ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_regdist(1)); 262101e04c3fSmrg brw_SHL(p, temp, temp, brw_imm_ud(4)); 262201e04c3fSmrg brw_ADD(p, 262301e04c3fSmrg get_element_ud(header, 3), 262401e04c3fSmrg get_element_ud(brw_vec8_grf(0, 0), 3), 262501e04c3fSmrg temp); 26267ec681f3Smrg brw_pop_insn_state(p); 262701e04c3fSmrg } 262801e04c3fSmrg} 262901e04c3fSmrg 263001e04c3fSmrg/* All these variables are pretty confusing - we might be better off 263101e04c3fSmrg * using bitmasks and macros for this, in the old style. Or perhaps 263201e04c3fSmrg * just having the caller instantiate the fields in dword3 itself. 263301e04c3fSmrg */ 263401e04c3fSmrgvoid brw_urb_WRITE(struct brw_codegen *p, 263501e04c3fSmrg struct brw_reg dest, 263601e04c3fSmrg unsigned msg_reg_nr, 263701e04c3fSmrg struct brw_reg src0, 263801e04c3fSmrg enum brw_urb_write_flags flags, 263901e04c3fSmrg unsigned msg_length, 264001e04c3fSmrg unsigned response_length, 264101e04c3fSmrg unsigned offset, 264201e04c3fSmrg unsigned swizzle) 264301e04c3fSmrg{ 26447ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 264501e04c3fSmrg brw_inst *insn; 264601e04c3fSmrg 26477ec681f3Smrg gfx6_resolve_implied_move(p, &src0, msg_reg_nr); 264801e04c3fSmrg 26497ec681f3Smrg if (devinfo->ver >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { 265001e04c3fSmrg /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 265101e04c3fSmrg brw_push_insn_state(p); 265201e04c3fSmrg brw_set_default_access_mode(p, BRW_ALIGN_1); 265301e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 265401e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 265501e04c3fSmrg brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 265601e04c3fSmrg BRW_REGISTER_TYPE_UD), 265701e04c3fSmrg retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 265801e04c3fSmrg brw_imm_ud(0xff00)); 265901e04c3fSmrg brw_pop_insn_state(p); 266001e04c3fSmrg } 266101e04c3fSmrg 266201e04c3fSmrg insn = next_insn(p, BRW_OPCODE_SEND); 266301e04c3fSmrg 26647ec681f3Smrg assert(msg_length < BRW_MAX_MRF(devinfo->ver)); 266501e04c3fSmrg 266601e04c3fSmrg brw_set_dest(p, insn, dest); 266701e04c3fSmrg brw_set_src0(p, insn, src0); 266801e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0)); 266901e04c3fSmrg 26707ec681f3Smrg if (devinfo->ver < 6) 267101e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); 267201e04c3fSmrg 267301e04c3fSmrg brw_set_urb_message(p, 267401e04c3fSmrg insn, 267501e04c3fSmrg flags, 267601e04c3fSmrg msg_length, 267701e04c3fSmrg response_length, 267801e04c3fSmrg offset, 267901e04c3fSmrg swizzle); 268001e04c3fSmrg} 268101e04c3fSmrg 268201e04c3fSmrgvoid 268301e04c3fSmrgbrw_send_indirect_message(struct brw_codegen *p, 268401e04c3fSmrg unsigned sfid, 268501e04c3fSmrg struct brw_reg dst, 268601e04c3fSmrg struct brw_reg payload, 268701e04c3fSmrg struct brw_reg desc, 26889f464c52Smaya unsigned desc_imm, 26899f464c52Smaya bool eot) 269001e04c3fSmrg{ 26917ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 269201e04c3fSmrg struct brw_inst *send; 269301e04c3fSmrg 269401e04c3fSmrg dst = retype(dst, BRW_REGISTER_TYPE_UW); 269501e04c3fSmrg 269601e04c3fSmrg assert(desc.type == BRW_REGISTER_TYPE_UD); 269701e04c3fSmrg 269801e04c3fSmrg if (desc.file == BRW_IMMEDIATE_VALUE) { 269901e04c3fSmrg send = next_insn(p, BRW_OPCODE_SEND); 27009f464c52Smaya brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); 270101e04c3fSmrg brw_set_desc(p, send, desc.ud | desc_imm); 270201e04c3fSmrg } else { 27037ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 270401e04c3fSmrg struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); 270501e04c3fSmrg 270601e04c3fSmrg brw_push_insn_state(p); 270701e04c3fSmrg brw_set_default_access_mode(p, BRW_ALIGN_1); 270801e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 270901e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 271001e04c3fSmrg brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 27117ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 271201e04c3fSmrg 271301e04c3fSmrg /* Load the indirect descriptor to an address register using OR so the 271401e04c3fSmrg * caller can specify additional descriptor bits with the desc_imm 271501e04c3fSmrg * immediate. 271601e04c3fSmrg */ 271701e04c3fSmrg brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); 271801e04c3fSmrg 271901e04c3fSmrg brw_pop_insn_state(p); 272001e04c3fSmrg 27217ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 272201e04c3fSmrg send = next_insn(p, BRW_OPCODE_SEND); 27239f464c52Smaya brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); 27247ec681f3Smrg 27257ec681f3Smrg if (devinfo->ver >= 12) 27267ec681f3Smrg brw_inst_set_send_sel_reg32_desc(devinfo, send, true); 27277ec681f3Smrg else 27287ec681f3Smrg brw_set_src1(p, send, addr); 272901e04c3fSmrg } 273001e04c3fSmrg 27319f464c52Smaya brw_set_dest(p, send, dst); 27329f464c52Smaya brw_inst_set_sfid(devinfo, send, sfid); 27339f464c52Smaya brw_inst_set_eot(devinfo, send, eot); 27349f464c52Smaya} 27359f464c52Smaya 27369f464c52Smayavoid 27379f464c52Smayabrw_send_indirect_split_message(struct brw_codegen *p, 27389f464c52Smaya unsigned sfid, 27399f464c52Smaya struct brw_reg dst, 27409f464c52Smaya struct brw_reg payload0, 27419f464c52Smaya struct brw_reg payload1, 27429f464c52Smaya struct brw_reg desc, 27439f464c52Smaya unsigned desc_imm, 27449f464c52Smaya struct brw_reg ex_desc, 27459f464c52Smaya unsigned ex_desc_imm, 27469f464c52Smaya bool eot) 27479f464c52Smaya{ 27487ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 27499f464c52Smaya struct brw_inst *send; 27509f464c52Smaya 27519f464c52Smaya dst = retype(dst, BRW_REGISTER_TYPE_UW); 27529f464c52Smaya 27539f464c52Smaya assert(desc.type == BRW_REGISTER_TYPE_UD); 27549f464c52Smaya 27559f464c52Smaya if (desc.file == BRW_IMMEDIATE_VALUE) { 27569f464c52Smaya desc.ud |= desc_imm; 27579f464c52Smaya } else { 27587ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 27599f464c52Smaya struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); 27609f464c52Smaya 27619f464c52Smaya brw_push_insn_state(p); 27629f464c52Smaya brw_set_default_access_mode(p, BRW_ALIGN_1); 27639f464c52Smaya brw_set_default_mask_control(p, BRW_MASK_DISABLE); 27649f464c52Smaya brw_set_default_exec_size(p, BRW_EXECUTE_1); 27659f464c52Smaya brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 27667ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 27679f464c52Smaya 27689f464c52Smaya /* Load the indirect descriptor to an address register using OR so the 27699f464c52Smaya * caller can specify additional descriptor bits with the desc_imm 27709f464c52Smaya * immediate. 27719f464c52Smaya */ 27729f464c52Smaya brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); 27739f464c52Smaya 27749f464c52Smaya brw_pop_insn_state(p); 27759f464c52Smaya desc = addr; 27767ec681f3Smrg 27777ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 27789f464c52Smaya } 277901e04c3fSmrg 27807ec681f3Smrg if (ex_desc.file == BRW_IMMEDIATE_VALUE && 27817ec681f3Smrg (devinfo->ver >= 12 || 27827ec681f3Smrg ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) { 27839f464c52Smaya ex_desc.ud |= ex_desc_imm; 27849f464c52Smaya } else { 27857ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 27869f464c52Smaya struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); 27879f464c52Smaya 27889f464c52Smaya brw_push_insn_state(p); 27899f464c52Smaya brw_set_default_access_mode(p, BRW_ALIGN_1); 27909f464c52Smaya brw_set_default_mask_control(p, BRW_MASK_DISABLE); 27919f464c52Smaya brw_set_default_exec_size(p, BRW_EXECUTE_1); 27929f464c52Smaya brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 27937ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 27949f464c52Smaya 27959f464c52Smaya /* Load the indirect extended descriptor to an address register using OR 27969f464c52Smaya * so the caller can specify additional descriptor bits with the 27979f464c52Smaya * desc_imm immediate. 27989f464c52Smaya * 27999f464c52Smaya * Even though the instruction dispatcher always pulls the SFID and EOT 28009f464c52Smaya * fields from the instruction itself, actual external unit which 28019f464c52Smaya * processes the message gets the SFID and EOT from the extended 28029f464c52Smaya * descriptor which comes from the address register. If we don't OR 28039f464c52Smaya * those two bits in, the external unit may get confused and hang. 28049f464c52Smaya */ 28057ec681f3Smrg unsigned imm_part = ex_desc_imm | sfid | eot << 5; 28067ec681f3Smrg 28077ec681f3Smrg if (ex_desc.file == BRW_IMMEDIATE_VALUE) { 28087ec681f3Smrg /* ex_desc bits 15:12 don't exist in the instruction encoding prior 28097ec681f3Smrg * to Gfx12, so we may have fallen back to an indirect extended 28107ec681f3Smrg * descriptor. 28117ec681f3Smrg */ 28127ec681f3Smrg brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part)); 28137ec681f3Smrg } else { 28147ec681f3Smrg brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part)); 28157ec681f3Smrg } 28169f464c52Smaya 28179f464c52Smaya brw_pop_insn_state(p); 28189f464c52Smaya ex_desc = addr; 28197ec681f3Smrg 28207ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 28219f464c52Smaya } 28229f464c52Smaya 28237ec681f3Smrg send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS); 282401e04c3fSmrg brw_set_dest(p, send, dst); 28259f464c52Smaya brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); 28269f464c52Smaya brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); 28279f464c52Smaya 28289f464c52Smaya if (desc.file == BRW_IMMEDIATE_VALUE) { 28299f464c52Smaya brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); 28309f464c52Smaya brw_inst_set_send_desc(devinfo, send, desc.ud); 28319f464c52Smaya } else { 28329f464c52Smaya assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); 28339f464c52Smaya assert(desc.nr == BRW_ARF_ADDRESS); 28349f464c52Smaya assert(desc.subnr == 0); 28359f464c52Smaya brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); 28369f464c52Smaya } 28379f464c52Smaya 28389f464c52Smaya if (ex_desc.file == BRW_IMMEDIATE_VALUE) { 28399f464c52Smaya brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); 28407ec681f3Smrg brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud); 28419f464c52Smaya } else { 28429f464c52Smaya assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); 28439f464c52Smaya assert(ex_desc.nr == BRW_ARF_ADDRESS); 28449f464c52Smaya assert((ex_desc.subnr & 0x3) == 0); 28459f464c52Smaya brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); 28469f464c52Smaya brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); 28479f464c52Smaya } 28489f464c52Smaya 284901e04c3fSmrg brw_inst_set_sfid(devinfo, send, sfid); 28509f464c52Smaya brw_inst_set_eot(devinfo, send, eot); 285101e04c3fSmrg} 285201e04c3fSmrg 285301e04c3fSmrgstatic void 285401e04c3fSmrgbrw_send_indirect_surface_message(struct brw_codegen *p, 285501e04c3fSmrg unsigned sfid, 285601e04c3fSmrg struct brw_reg dst, 285701e04c3fSmrg struct brw_reg payload, 285801e04c3fSmrg struct brw_reg surface, 285901e04c3fSmrg unsigned desc_imm) 286001e04c3fSmrg{ 286101e04c3fSmrg if (surface.file != BRW_IMMEDIATE_VALUE) { 28627ec681f3Smrg const struct tgl_swsb swsb = brw_get_default_swsb(p); 286301e04c3fSmrg struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); 286401e04c3fSmrg 286501e04c3fSmrg brw_push_insn_state(p); 286601e04c3fSmrg brw_set_default_access_mode(p, BRW_ALIGN_1); 286701e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 286801e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 286901e04c3fSmrg brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 28707ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); 287101e04c3fSmrg 287201e04c3fSmrg /* Mask out invalid bits from the surface index to avoid hangs e.g. when 287301e04c3fSmrg * some surface array is accessed out of bounds. 287401e04c3fSmrg */ 287501e04c3fSmrg brw_AND(p, addr, 287601e04c3fSmrg suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), 287701e04c3fSmrg BRW_GET_SWZ(surface.swizzle, 0)), 287801e04c3fSmrg brw_imm_ud(0xff)); 287901e04c3fSmrg 288001e04c3fSmrg brw_pop_insn_state(p); 288101e04c3fSmrg 288201e04c3fSmrg surface = addr; 28837ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); 288401e04c3fSmrg } 288501e04c3fSmrg 28869f464c52Smaya brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false); 288701e04c3fSmrg} 288801e04c3fSmrg 288901e04c3fSmrgstatic bool 28907ec681f3Smrgwhile_jumps_before_offset(const struct intel_device_info *devinfo, 289101e04c3fSmrg brw_inst *insn, int while_offset, int start_offset) 289201e04c3fSmrg{ 289301e04c3fSmrg int scale = 16 / brw_jump_scale(devinfo); 28947ec681f3Smrg int jip = devinfo->ver == 6 ? brw_inst_gfx6_jump_count(devinfo, insn) 289501e04c3fSmrg : brw_inst_jip(devinfo, insn); 289601e04c3fSmrg assert(jip < 0); 289701e04c3fSmrg return while_offset + jip * scale <= start_offset; 289801e04c3fSmrg} 289901e04c3fSmrg 290001e04c3fSmrg 290101e04c3fSmrgstatic int 290201e04c3fSmrgbrw_find_next_block_end(struct brw_codegen *p, int start_offset) 290301e04c3fSmrg{ 290401e04c3fSmrg int offset; 290501e04c3fSmrg void *store = p->store; 29067ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 290701e04c3fSmrg 290801e04c3fSmrg int depth = 0; 290901e04c3fSmrg 291001e04c3fSmrg for (offset = next_offset(devinfo, store, start_offset); 291101e04c3fSmrg offset < p->next_insn_offset; 291201e04c3fSmrg offset = next_offset(devinfo, store, offset)) { 291301e04c3fSmrg brw_inst *insn = store + offset; 291401e04c3fSmrg 291501e04c3fSmrg switch (brw_inst_opcode(devinfo, insn)) { 291601e04c3fSmrg case BRW_OPCODE_IF: 291701e04c3fSmrg depth++; 291801e04c3fSmrg break; 291901e04c3fSmrg case BRW_OPCODE_ENDIF: 292001e04c3fSmrg if (depth == 0) 292101e04c3fSmrg return offset; 292201e04c3fSmrg depth--; 292301e04c3fSmrg break; 292401e04c3fSmrg case BRW_OPCODE_WHILE: 292501e04c3fSmrg /* If the while doesn't jump before our instruction, it's the end 292601e04c3fSmrg * of a sibling do...while loop. Ignore it. 292701e04c3fSmrg */ 292801e04c3fSmrg if (!while_jumps_before_offset(devinfo, insn, offset, start_offset)) 292901e04c3fSmrg continue; 29307ec681f3Smrg FALLTHROUGH; 293101e04c3fSmrg case BRW_OPCODE_ELSE: 293201e04c3fSmrg case BRW_OPCODE_HALT: 293301e04c3fSmrg if (depth == 0) 293401e04c3fSmrg return offset; 29357ec681f3Smrg break; 29367ec681f3Smrg default: 29377ec681f3Smrg break; 293801e04c3fSmrg } 293901e04c3fSmrg } 294001e04c3fSmrg 294101e04c3fSmrg return 0; 294201e04c3fSmrg} 294301e04c3fSmrg 29447ec681f3Smrg/* There is no DO instruction on gfx6, so to find the end of the loop 294501e04c3fSmrg * we have to see if the loop is jumping back before our start 294601e04c3fSmrg * instruction. 294701e04c3fSmrg */ 294801e04c3fSmrgstatic int 294901e04c3fSmrgbrw_find_loop_end(struct brw_codegen *p, int start_offset) 295001e04c3fSmrg{ 29517ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 295201e04c3fSmrg int offset; 295301e04c3fSmrg void *store = p->store; 295401e04c3fSmrg 29557ec681f3Smrg assert(devinfo->ver >= 6); 295601e04c3fSmrg 295701e04c3fSmrg /* Always start after the instruction (such as a WHILE) we're trying to fix 295801e04c3fSmrg * up. 295901e04c3fSmrg */ 296001e04c3fSmrg for (offset = next_offset(devinfo, store, start_offset); 296101e04c3fSmrg offset < p->next_insn_offset; 296201e04c3fSmrg offset = next_offset(devinfo, store, offset)) { 296301e04c3fSmrg brw_inst *insn = store + offset; 296401e04c3fSmrg 296501e04c3fSmrg if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE) { 296601e04c3fSmrg if (while_jumps_before_offset(devinfo, insn, offset, start_offset)) 296701e04c3fSmrg return offset; 296801e04c3fSmrg } 296901e04c3fSmrg } 297001e04c3fSmrg assert(!"not reached"); 297101e04c3fSmrg return start_offset; 297201e04c3fSmrg} 297301e04c3fSmrg 297401e04c3fSmrg/* After program generation, go back and update the UIP and JIP of 297501e04c3fSmrg * BREAK, CONT, and HALT instructions to their correct locations. 297601e04c3fSmrg */ 297701e04c3fSmrgvoid 297801e04c3fSmrgbrw_set_uip_jip(struct brw_codegen *p, int start_offset) 297901e04c3fSmrg{ 29807ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 298101e04c3fSmrg int offset; 298201e04c3fSmrg int br = brw_jump_scale(devinfo); 298301e04c3fSmrg int scale = 16 / br; 298401e04c3fSmrg void *store = p->store; 298501e04c3fSmrg 29867ec681f3Smrg if (devinfo->ver < 6) 298701e04c3fSmrg return; 298801e04c3fSmrg 298901e04c3fSmrg for (offset = start_offset; offset < p->next_insn_offset; offset += 16) { 299001e04c3fSmrg brw_inst *insn = store + offset; 299101e04c3fSmrg assert(brw_inst_cmpt_control(devinfo, insn) == 0); 299201e04c3fSmrg 299301e04c3fSmrg int block_end_offset = brw_find_next_block_end(p, offset); 299401e04c3fSmrg switch (brw_inst_opcode(devinfo, insn)) { 299501e04c3fSmrg case BRW_OPCODE_BREAK: 299601e04c3fSmrg assert(block_end_offset != 0); 299701e04c3fSmrg brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); 29987ec681f3Smrg /* Gfx7 UIP points to WHILE; Gfx6 points just after it */ 299901e04c3fSmrg brw_inst_set_uip(devinfo, insn, 300001e04c3fSmrg (brw_find_loop_end(p, offset) - offset + 30017ec681f3Smrg (devinfo->ver == 6 ? 16 : 0)) / scale); 300201e04c3fSmrg break; 300301e04c3fSmrg case BRW_OPCODE_CONTINUE: 300401e04c3fSmrg assert(block_end_offset != 0); 300501e04c3fSmrg brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); 300601e04c3fSmrg brw_inst_set_uip(devinfo, insn, 300701e04c3fSmrg (brw_find_loop_end(p, offset) - offset) / scale); 300801e04c3fSmrg 300901e04c3fSmrg assert(brw_inst_uip(devinfo, insn) != 0); 301001e04c3fSmrg assert(brw_inst_jip(devinfo, insn) != 0); 301101e04c3fSmrg break; 301201e04c3fSmrg 301301e04c3fSmrg case BRW_OPCODE_ENDIF: { 301401e04c3fSmrg int32_t jump = (block_end_offset == 0) ? 301501e04c3fSmrg 1 * br : (block_end_offset - offset) / scale; 30167ec681f3Smrg if (devinfo->ver >= 7) 301701e04c3fSmrg brw_inst_set_jip(devinfo, insn, jump); 301801e04c3fSmrg else 30197ec681f3Smrg brw_inst_set_gfx6_jump_count(devinfo, insn, jump); 302001e04c3fSmrg break; 302101e04c3fSmrg } 302201e04c3fSmrg 302301e04c3fSmrg case BRW_OPCODE_HALT: 302401e04c3fSmrg /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): 302501e04c3fSmrg * 302601e04c3fSmrg * "In case of the halt instruction not inside any conditional 302701e04c3fSmrg * code block, the value of <JIP> and <UIP> should be the 302801e04c3fSmrg * same. In case of the halt instruction inside conditional code 302901e04c3fSmrg * block, the <UIP> should be the end of the program, and the 303001e04c3fSmrg * <JIP> should be end of the most inner conditional code block." 303101e04c3fSmrg * 303201e04c3fSmrg * The uip will have already been set by whoever set up the 303301e04c3fSmrg * instruction. 303401e04c3fSmrg */ 303501e04c3fSmrg if (block_end_offset == 0) { 303601e04c3fSmrg brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn)); 303701e04c3fSmrg } else { 303801e04c3fSmrg brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); 303901e04c3fSmrg } 304001e04c3fSmrg assert(brw_inst_uip(devinfo, insn) != 0); 304101e04c3fSmrg assert(brw_inst_jip(devinfo, insn) != 0); 304201e04c3fSmrg break; 30437ec681f3Smrg 30447ec681f3Smrg default: 30457ec681f3Smrg break; 304601e04c3fSmrg } 304701e04c3fSmrg } 304801e04c3fSmrg} 304901e04c3fSmrg 305001e04c3fSmrgvoid brw_ff_sync(struct brw_codegen *p, 305101e04c3fSmrg struct brw_reg dest, 305201e04c3fSmrg unsigned msg_reg_nr, 305301e04c3fSmrg struct brw_reg src0, 305401e04c3fSmrg bool allocate, 305501e04c3fSmrg unsigned response_length, 305601e04c3fSmrg bool eot) 305701e04c3fSmrg{ 30587ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 305901e04c3fSmrg brw_inst *insn; 306001e04c3fSmrg 30617ec681f3Smrg gfx6_resolve_implied_move(p, &src0, msg_reg_nr); 306201e04c3fSmrg 306301e04c3fSmrg insn = next_insn(p, BRW_OPCODE_SEND); 306401e04c3fSmrg brw_set_dest(p, insn, dest); 306501e04c3fSmrg brw_set_src0(p, insn, src0); 306601e04c3fSmrg brw_set_src1(p, insn, brw_imm_d(0)); 306701e04c3fSmrg 30687ec681f3Smrg if (devinfo->ver < 6) 306901e04c3fSmrg brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); 307001e04c3fSmrg 307101e04c3fSmrg brw_set_ff_sync_message(p, 307201e04c3fSmrg insn, 307301e04c3fSmrg allocate, 307401e04c3fSmrg response_length, 307501e04c3fSmrg eot); 307601e04c3fSmrg} 307701e04c3fSmrg 307801e04c3fSmrg/** 30797ec681f3Smrg * Emit the SEND instruction necessary to generate stream output data on Gfx6 308001e04c3fSmrg * (for transform feedback). 308101e04c3fSmrg * 308201e04c3fSmrg * If send_commit_msg is true, this is the last piece of stream output data 308301e04c3fSmrg * from this thread, so send the data as a committed write. According to the 308401e04c3fSmrg * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): 308501e04c3fSmrg * 308601e04c3fSmrg * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all 308701e04c3fSmrg * writes are complete by sending the final write as a committed write." 308801e04c3fSmrg */ 308901e04c3fSmrgvoid 309001e04c3fSmrgbrw_svb_write(struct brw_codegen *p, 309101e04c3fSmrg struct brw_reg dest, 309201e04c3fSmrg unsigned msg_reg_nr, 309301e04c3fSmrg struct brw_reg src0, 309401e04c3fSmrg unsigned binding_table_index, 309501e04c3fSmrg bool send_commit_msg) 309601e04c3fSmrg{ 30977ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 30987ec681f3Smrg assert(devinfo->ver == 6); 30997ec681f3Smrg const unsigned target_cache = GFX6_SFID_DATAPORT_RENDER_CACHE; 310001e04c3fSmrg brw_inst *insn; 310101e04c3fSmrg 31027ec681f3Smrg gfx6_resolve_implied_move(p, &src0, msg_reg_nr); 310301e04c3fSmrg 310401e04c3fSmrg insn = next_insn(p, BRW_OPCODE_SEND); 310501e04c3fSmrg brw_inst_set_sfid(devinfo, insn, target_cache); 310601e04c3fSmrg brw_set_dest(p, insn, dest); 310701e04c3fSmrg brw_set_src0(p, insn, src0); 310801e04c3fSmrg brw_set_desc(p, insn, 310901e04c3fSmrg brw_message_desc(devinfo, 1, send_commit_msg, true) | 311001e04c3fSmrg brw_dp_write_desc(devinfo, binding_table_index, 311101e04c3fSmrg 0, /* msg_control: ignored */ 31127ec681f3Smrg GFX6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 311301e04c3fSmrg send_commit_msg)); /* send_commit_msg */ 311401e04c3fSmrg} 311501e04c3fSmrg 311601e04c3fSmrgstatic unsigned 31177ec681f3Smrgbrw_surface_payload_size(unsigned num_channels, 31189f464c52Smaya unsigned exec_size /**< 0 for SIMD4x2 */) 311901e04c3fSmrg{ 31209f464c52Smaya if (exec_size == 0) 31219f464c52Smaya return 1; /* SIMD4x2 */ 31229f464c52Smaya else if (exec_size <= 8) 312301e04c3fSmrg return num_channels; 31249f464c52Smaya else 31259f464c52Smaya return 2 * num_channels; 312601e04c3fSmrg} 312701e04c3fSmrg 312801e04c3fSmrgvoid 312901e04c3fSmrgbrw_untyped_atomic(struct brw_codegen *p, 313001e04c3fSmrg struct brw_reg dst, 313101e04c3fSmrg struct brw_reg payload, 313201e04c3fSmrg struct brw_reg surface, 313301e04c3fSmrg unsigned atomic_op, 313401e04c3fSmrg unsigned msg_length, 313501e04c3fSmrg bool response_expected, 313601e04c3fSmrg bool header_present) 313701e04c3fSmrg{ 31387ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 31397ec681f3Smrg const unsigned sfid = (devinfo->verx10 >= 75 ? 314001e04c3fSmrg HSW_SFID_DATAPORT_DATA_CACHE_1 : 31417ec681f3Smrg GFX7_SFID_DATAPORT_DATA_CACHE); 31429f464c52Smaya const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; 31439f464c52Smaya /* SIMD4x2 untyped atomic instructions only exist on HSW+ */ 31447ec681f3Smrg const bool has_simd4x2 = devinfo->verx10 >= 75; 31459f464c52Smaya const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 31469f464c52Smaya has_simd4x2 ? 0 : 8; 31479f464c52Smaya const unsigned response_length = 31487ec681f3Smrg brw_surface_payload_size(response_expected, exec_size); 314901e04c3fSmrg const unsigned desc = 315001e04c3fSmrg brw_message_desc(devinfo, msg_length, response_length, header_present) | 31519f464c52Smaya brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op, 31529f464c52Smaya response_expected); 315301e04c3fSmrg /* Mask out unused components -- This is especially important in Align16 315401e04c3fSmrg * mode on generations that don't have native support for SIMD4x2 atomics, 315501e04c3fSmrg * because unused but enabled components will cause the dataport to perform 315601e04c3fSmrg * additional atomic operations on the addresses that happen to be in the 315701e04c3fSmrg * uninitialized Y, Z and W coordinates of the payload. 315801e04c3fSmrg */ 315901e04c3fSmrg const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; 316001e04c3fSmrg 316101e04c3fSmrg brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask), 316201e04c3fSmrg payload, surface, desc); 316301e04c3fSmrg} 316401e04c3fSmrg 316501e04c3fSmrgvoid 316601e04c3fSmrgbrw_untyped_surface_read(struct brw_codegen *p, 316701e04c3fSmrg struct brw_reg dst, 316801e04c3fSmrg struct brw_reg payload, 316901e04c3fSmrg struct brw_reg surface, 317001e04c3fSmrg unsigned msg_length, 317101e04c3fSmrg unsigned num_channels) 317201e04c3fSmrg{ 31737ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 31747ec681f3Smrg const unsigned sfid = (devinfo->verx10 >= 75 ? 317501e04c3fSmrg HSW_SFID_DATAPORT_DATA_CACHE_1 : 31767ec681f3Smrg GFX7_SFID_DATAPORT_DATA_CACHE); 31779f464c52Smaya const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; 31789f464c52Smaya const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; 317901e04c3fSmrg const unsigned response_length = 31807ec681f3Smrg brw_surface_payload_size(num_channels, exec_size); 318101e04c3fSmrg const unsigned desc = 318201e04c3fSmrg brw_message_desc(devinfo, msg_length, response_length, false) | 31839f464c52Smaya brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false); 318401e04c3fSmrg 318501e04c3fSmrg brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); 318601e04c3fSmrg} 318701e04c3fSmrg 318801e04c3fSmrgvoid 318901e04c3fSmrgbrw_untyped_surface_write(struct brw_codegen *p, 319001e04c3fSmrg struct brw_reg payload, 319101e04c3fSmrg struct brw_reg surface, 319201e04c3fSmrg unsigned msg_length, 319301e04c3fSmrg unsigned num_channels, 319401e04c3fSmrg bool header_present) 319501e04c3fSmrg{ 31967ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 31977ec681f3Smrg const unsigned sfid = (devinfo->verx10 >= 75 ? 319801e04c3fSmrg HSW_SFID_DATAPORT_DATA_CACHE_1 : 31997ec681f3Smrg GFX7_SFID_DATAPORT_DATA_CACHE); 320001e04c3fSmrg const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; 32019f464c52Smaya /* SIMD4x2 untyped surface write instructions only exist on HSW+ */ 32027ec681f3Smrg const bool has_simd4x2 = devinfo->verx10 >= 75; 32039f464c52Smaya const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 32049f464c52Smaya has_simd4x2 ? 0 : 8; 320501e04c3fSmrg const unsigned desc = 320601e04c3fSmrg brw_message_desc(devinfo, msg_length, 0, header_present) | 32079f464c52Smaya brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true); 320801e04c3fSmrg /* Mask out unused components -- See comment in brw_untyped_atomic(). */ 32099f464c52Smaya const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; 321001e04c3fSmrg 321101e04c3fSmrg brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), 321201e04c3fSmrg payload, surface, desc); 321301e04c3fSmrg} 321401e04c3fSmrg 321501e04c3fSmrgstatic void 321601e04c3fSmrgbrw_set_memory_fence_message(struct brw_codegen *p, 321701e04c3fSmrg struct brw_inst *insn, 321801e04c3fSmrg enum brw_message_target sfid, 32197ec681f3Smrg bool commit_enable, 32207ec681f3Smrg unsigned bti) 322101e04c3fSmrg{ 32227ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 322301e04c3fSmrg 322401e04c3fSmrg brw_set_desc(p, insn, brw_message_desc( 322501e04c3fSmrg devinfo, 1, (commit_enable ? 1 : 0), true)); 322601e04c3fSmrg 322701e04c3fSmrg brw_inst_set_sfid(devinfo, insn, sfid); 322801e04c3fSmrg 322901e04c3fSmrg switch (sfid) { 32307ec681f3Smrg case GFX6_SFID_DATAPORT_RENDER_CACHE: 32317ec681f3Smrg brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE); 323201e04c3fSmrg break; 32337ec681f3Smrg case GFX7_SFID_DATAPORT_DATA_CACHE: 32347ec681f3Smrg brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE); 323501e04c3fSmrg break; 323601e04c3fSmrg default: 323701e04c3fSmrg unreachable("Not reached"); 323801e04c3fSmrg } 323901e04c3fSmrg 324001e04c3fSmrg if (commit_enable) 324101e04c3fSmrg brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); 32427ec681f3Smrg 32437ec681f3Smrg assert(devinfo->ver >= 11 || bti == 0); 32447ec681f3Smrg brw_inst_set_binding_table_index(devinfo, insn, bti); 32457ec681f3Smrg} 32467ec681f3Smrg 32477ec681f3Smrgstatic void 32487ec681f3Smrggfx12_set_memory_fence_message(struct brw_codegen *p, 32497ec681f3Smrg struct brw_inst *insn, 32507ec681f3Smrg enum brw_message_target sfid) 32517ec681f3Smrg{ 32527ec681f3Smrg const unsigned mlen = 1; /* g0 header */ 32537ec681f3Smrg /* Completion signaled by write to register. No data returned. */ 32547ec681f3Smrg const unsigned rlen = 1; 32557ec681f3Smrg 32567ec681f3Smrg brw_inst_set_sfid(p->devinfo, insn, sfid); 32577ec681f3Smrg 32587ec681f3Smrg if (sfid == BRW_SFID_URB) { 32597ec681f3Smrg brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) | 32607ec681f3Smrg brw_message_desc(p->devinfo, mlen, rlen, false)); 32617ec681f3Smrg } else { 32627ec681f3Smrg enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP; 32637ec681f3Smrg enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE; 32647ec681f3Smrg 32657ec681f3Smrg if (sfid == GFX12_SFID_TGM) { 32667ec681f3Smrg scope = LSC_FENCE_TILE; 32677ec681f3Smrg flush_type = LSC_FLUSH_TYPE_EVICT; 32687ec681f3Smrg } 32697ec681f3Smrg 32707ec681f3Smrg brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope, 32717ec681f3Smrg flush_type, false) | 32727ec681f3Smrg brw_message_desc(p->devinfo, mlen, rlen, false)); 32737ec681f3Smrg } 327401e04c3fSmrg} 327501e04c3fSmrg 327601e04c3fSmrgvoid 327701e04c3fSmrgbrw_memory_fence(struct brw_codegen *p, 327801e04c3fSmrg struct brw_reg dst, 32799f464c52Smaya struct brw_reg src, 32809f464c52Smaya enum opcode send_op, 32817ec681f3Smrg enum brw_message_target sfid, 32827ec681f3Smrg bool commit_enable, 32837ec681f3Smrg unsigned bti) 328401e04c3fSmrg{ 32857ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 328601e04c3fSmrg 32879f464c52Smaya dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); 32889f464c52Smaya src = retype(vec1(src), BRW_REGISTER_TYPE_UD); 328901e04c3fSmrg 329001e04c3fSmrg /* Set dst as destination for dependency tracking, the MEMORY_FENCE 329101e04c3fSmrg * message doesn't write anything back. 329201e04c3fSmrg */ 32937ec681f3Smrg struct brw_inst *insn = next_insn(p, send_op); 32947ec681f3Smrg brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); 32957ec681f3Smrg brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); 329601e04c3fSmrg brw_set_dest(p, insn, dst); 32979f464c52Smaya brw_set_src0(p, insn, src); 32989f464c52Smaya 32997ec681f3Smrg /* All DG2 hardware requires LSC for fence messages, even A-step */ 33007ec681f3Smrg if (devinfo->has_lsc) 33017ec681f3Smrg gfx12_set_memory_fence_message(p, insn, sfid); 33027ec681f3Smrg else 33037ec681f3Smrg brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); 330401e04c3fSmrg} 330501e04c3fSmrg 330601e04c3fSmrgvoid 330701e04c3fSmrgbrw_pixel_interpolator_query(struct brw_codegen *p, 330801e04c3fSmrg struct brw_reg dest, 330901e04c3fSmrg struct brw_reg mrf, 331001e04c3fSmrg bool noperspective, 33117ec681f3Smrg bool coarse_pixel_rate, 331201e04c3fSmrg unsigned mode, 331301e04c3fSmrg struct brw_reg data, 331401e04c3fSmrg unsigned msg_length, 331501e04c3fSmrg unsigned response_length) 331601e04c3fSmrg{ 33177ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 331801e04c3fSmrg const uint16_t exec_size = brw_get_default_exec_size(p); 331901e04c3fSmrg const unsigned slot_group = brw_get_default_group(p) / 16; 332001e04c3fSmrg const unsigned simd_mode = (exec_size == BRW_EXECUTE_16); 332101e04c3fSmrg const unsigned desc = 332201e04c3fSmrg brw_message_desc(devinfo, msg_length, response_length, false) | 33237ec681f3Smrg brw_pixel_interp_desc(devinfo, mode, noperspective, coarse_pixel_rate, 33247ec681f3Smrg simd_mode, slot_group); 332501e04c3fSmrg 332601e04c3fSmrg /* brw_send_indirect_message will automatically use a direct send message 332701e04c3fSmrg * if data is actually immediate. 332801e04c3fSmrg */ 332901e04c3fSmrg brw_send_indirect_message(p, 33307ec681f3Smrg GFX7_SFID_PIXEL_INTERPOLATOR, 333101e04c3fSmrg dest, 333201e04c3fSmrg mrf, 333301e04c3fSmrg vec1(data), 33349f464c52Smaya desc, 33359f464c52Smaya false); 333601e04c3fSmrg} 333701e04c3fSmrg 333801e04c3fSmrgvoid 333901e04c3fSmrgbrw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, 334001e04c3fSmrg struct brw_reg mask) 334101e04c3fSmrg{ 33427ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 334301e04c3fSmrg const unsigned exec_size = 1 << brw_get_default_exec_size(p); 334401e04c3fSmrg const unsigned qtr_control = brw_get_default_group(p) / 8; 334501e04c3fSmrg brw_inst *inst; 334601e04c3fSmrg 33477ec681f3Smrg assert(devinfo->ver >= 7); 334801e04c3fSmrg assert(mask.type == BRW_REGISTER_TYPE_UD); 334901e04c3fSmrg 335001e04c3fSmrg brw_push_insn_state(p); 335101e04c3fSmrg 33527ec681f3Smrg /* The flag register is only used on Gfx7 in align1 mode, so avoid setting 33539f464c52Smaya * unnecessary bits in the instruction words, get the information we need 33549f464c52Smaya * and reset the default flag register. This allows more instructions to be 33559f464c52Smaya * compacted. 33569f464c52Smaya */ 33579f464c52Smaya const unsigned flag_subreg = p->current->flag_subreg; 33589f464c52Smaya brw_set_default_flag_reg(p, 0, 0); 33599f464c52Smaya 336001e04c3fSmrg if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { 336101e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 336201e04c3fSmrg 33637ec681f3Smrg if (devinfo->ver >= 8) { 33647ec681f3Smrg /* Getting the first active channel index is easy on Gfx8: Just find 336501e04c3fSmrg * the first bit set in the execution mask. The register exists on 336601e04c3fSmrg * HSW already but it reads back as all ones when the current 336701e04c3fSmrg * instruction has execution masking disabled, so it's kind of 336801e04c3fSmrg * useless. 336901e04c3fSmrg */ 337001e04c3fSmrg struct brw_reg exec_mask = 337101e04c3fSmrg retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD); 337201e04c3fSmrg 337301e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 337401e04c3fSmrg if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) { 337501e04c3fSmrg /* Unfortunately, ce0 does not take into account the thread 337601e04c3fSmrg * dispatch mask, which may be a problem in cases where it's not 337701e04c3fSmrg * tightly packed (i.e. it doesn't have the form '2^n - 1' for 337801e04c3fSmrg * some n). Combine ce0 with the given dispatch (or vector) mask 337901e04c3fSmrg * to mask off those channels which were never dispatched by the 338001e04c3fSmrg * hardware. 338101e04c3fSmrg */ 338201e04c3fSmrg brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8)); 33837ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_regdist(1)); 338401e04c3fSmrg brw_AND(p, vec1(dst), exec_mask, vec1(dst)); 338501e04c3fSmrg exec_mask = vec1(dst); 338601e04c3fSmrg } 338701e04c3fSmrg 338801e04c3fSmrg /* Quarter control has the effect of magically shifting the value of 338901e04c3fSmrg * ce0 so you'll get the first active channel relative to the 339001e04c3fSmrg * specified quarter control as result. 339101e04c3fSmrg */ 339201e04c3fSmrg inst = brw_FBL(p, vec1(dst), exec_mask); 339301e04c3fSmrg } else { 33949f464c52Smaya const struct brw_reg flag = brw_flag_subreg(flag_subreg); 339501e04c3fSmrg 339601e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 339701e04c3fSmrg brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); 339801e04c3fSmrg 339901e04c3fSmrg /* Run enough instructions returning zero with execution masking and 340001e04c3fSmrg * a conditional modifier enabled in order to get the full execution 340101e04c3fSmrg * mask in f1.0. We could use a single 32-wide move here if it 340201e04c3fSmrg * weren't because of the hardware bug that causes channel enables to 340301e04c3fSmrg * be applied incorrectly to the second half of 32-wide instructions 34047ec681f3Smrg * on Gfx7. 340501e04c3fSmrg */ 340601e04c3fSmrg const unsigned lower_size = MIN2(16, exec_size); 340701e04c3fSmrg for (unsigned i = 0; i < exec_size / lower_size; i++) { 340801e04c3fSmrg inst = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), 340901e04c3fSmrg brw_imm_uw(0)); 341001e04c3fSmrg brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); 341101e04c3fSmrg brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control); 341201e04c3fSmrg brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); 341301e04c3fSmrg brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1); 34149f464c52Smaya brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2); 34159f464c52Smaya brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2); 341601e04c3fSmrg } 341701e04c3fSmrg 341801e04c3fSmrg /* Find the first bit set in the exec_size-wide portion of the flag 341901e04c3fSmrg * register that was updated by the last sequence of MOV 342001e04c3fSmrg * instructions. 342101e04c3fSmrg */ 342201e04c3fSmrg const enum brw_reg_type type = brw_int_type(exec_size / 8, false); 342301e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_1); 342401e04c3fSmrg brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control)); 342501e04c3fSmrg } 342601e04c3fSmrg } else { 342701e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 342801e04c3fSmrg 34297ec681f3Smrg if (devinfo->ver >= 8 && 343001e04c3fSmrg mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) { 343101e04c3fSmrg /* In SIMD4x2 mode the first active channel index is just the 343201e04c3fSmrg * negation of the first bit of the mask register. Note that ce0 34337ec681f3Smrg * doesn't take into account the dispatch mask, so the Gfx7 path 343401e04c3fSmrg * should be used instead unless you have the guarantee that the 343501e04c3fSmrg * dispatch mask is tightly packed (i.e. it has the form '2^n - 1' 343601e04c3fSmrg * for some n). 343701e04c3fSmrg */ 343801e04c3fSmrg inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X), 343901e04c3fSmrg negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)), 344001e04c3fSmrg brw_imm_ud(1)); 344101e04c3fSmrg 344201e04c3fSmrg } else { 344301e04c3fSmrg /* Overwrite the destination without and with execution masking to 344401e04c3fSmrg * find out which of the channels is active. 344501e04c3fSmrg */ 344601e04c3fSmrg brw_push_insn_state(p); 344701e04c3fSmrg brw_set_default_exec_size(p, BRW_EXECUTE_4); 344801e04c3fSmrg brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), 344901e04c3fSmrg brw_imm_ud(1)); 345001e04c3fSmrg 345101e04c3fSmrg inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), 345201e04c3fSmrg brw_imm_ud(0)); 345301e04c3fSmrg brw_pop_insn_state(p); 345401e04c3fSmrg brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); 345501e04c3fSmrg } 345601e04c3fSmrg } 345701e04c3fSmrg 345801e04c3fSmrg brw_pop_insn_state(p); 345901e04c3fSmrg} 346001e04c3fSmrg 346101e04c3fSmrgvoid 346201e04c3fSmrgbrw_broadcast(struct brw_codegen *p, 346301e04c3fSmrg struct brw_reg dst, 346401e04c3fSmrg struct brw_reg src, 346501e04c3fSmrg struct brw_reg idx) 346601e04c3fSmrg{ 34677ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 346801e04c3fSmrg const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; 346901e04c3fSmrg brw_inst *inst; 347001e04c3fSmrg 347101e04c3fSmrg brw_push_insn_state(p); 347201e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 347301e04c3fSmrg brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4); 347401e04c3fSmrg 347501e04c3fSmrg assert(src.file == BRW_GENERAL_REGISTER_FILE && 347601e04c3fSmrg src.address_mode == BRW_ADDRESS_DIRECT); 347701e04c3fSmrg assert(!src.abs && !src.negate); 347801e04c3fSmrg assert(src.type == dst.type); 347901e04c3fSmrg 348001e04c3fSmrg if ((src.vstride == 0 && (src.hstride == 0 || !align1)) || 348101e04c3fSmrg idx.file == BRW_IMMEDIATE_VALUE) { 348201e04c3fSmrg /* Trivial, the source is already uniform or the index is a constant. 348301e04c3fSmrg * We will typically not get here if the optimizer is doing its job, but 348401e04c3fSmrg * asserting would be mean. 348501e04c3fSmrg */ 348601e04c3fSmrg const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0; 34877ec681f3Smrg src = align1 ? stride(suboffset(src, i), 0, 1, 0) : 34887ec681f3Smrg stride(suboffset(src, 4 * i), 0, 4, 1); 34897ec681f3Smrg 34907ec681f3Smrg if (type_sz(src.type) > 4 && !devinfo->has_64bit_float) { 34917ec681f3Smrg brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), 34927ec681f3Smrg subscript(src, BRW_REGISTER_TYPE_D, 0)); 34937ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_null()); 34947ec681f3Smrg brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), 34957ec681f3Smrg subscript(src, BRW_REGISTER_TYPE_D, 1)); 34967ec681f3Smrg } else { 34977ec681f3Smrg brw_MOV(p, dst, src); 34987ec681f3Smrg } 349901e04c3fSmrg } else { 350001e04c3fSmrg /* From the Haswell PRM section "Register Region Restrictions": 350101e04c3fSmrg * 350201e04c3fSmrg * "The lower bits of the AddressImmediate must not overflow to 350301e04c3fSmrg * change the register address. The lower 5 bits of Address 350401e04c3fSmrg * Immediate when added to lower 5 bits of address register gives 350501e04c3fSmrg * the sub-register offset. The upper bits of Address Immediate 350601e04c3fSmrg * when added to upper bits of address register gives the register 350701e04c3fSmrg * address. Any overflow from sub-register offset is dropped." 350801e04c3fSmrg * 350901e04c3fSmrg * Fortunately, for broadcast, we never have a sub-register offset so 351001e04c3fSmrg * this isn't an issue. 351101e04c3fSmrg */ 351201e04c3fSmrg assert(src.subnr == 0); 351301e04c3fSmrg 351401e04c3fSmrg if (align1) { 351501e04c3fSmrg const struct brw_reg addr = 351601e04c3fSmrg retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); 351701e04c3fSmrg unsigned offset = src.nr * REG_SIZE + src.subnr; 351801e04c3fSmrg /* Limit in bytes of the signed indirect addressing immediate. */ 351901e04c3fSmrg const unsigned limit = 512; 352001e04c3fSmrg 352101e04c3fSmrg brw_push_insn_state(p); 352201e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 352301e04c3fSmrg brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); 352401e04c3fSmrg 352501e04c3fSmrg /* Take into account the component size and horizontal stride. */ 352601e04c3fSmrg assert(src.vstride == src.hstride + src.width); 352701e04c3fSmrg brw_SHL(p, addr, vec1(idx), 35287ec681f3Smrg brw_imm_ud(util_logbase2(type_sz(src.type)) + 352901e04c3fSmrg src.hstride - 1)); 353001e04c3fSmrg 353101e04c3fSmrg /* We can only address up to limit bytes using the indirect 353201e04c3fSmrg * addressing immediate, account for the difference if the source 353301e04c3fSmrg * register is above this limit. 353401e04c3fSmrg */ 353501e04c3fSmrg if (offset >= limit) { 35367ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_regdist(1)); 353701e04c3fSmrg brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); 353801e04c3fSmrg offset = offset % limit; 353901e04c3fSmrg } 354001e04c3fSmrg 354101e04c3fSmrg brw_pop_insn_state(p); 354201e04c3fSmrg 35437ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_regdist(1)); 35447ec681f3Smrg 354501e04c3fSmrg /* Use indirect addressing to fetch the specified component. */ 354601e04c3fSmrg if (type_sz(src.type) > 4 && 35477ec681f3Smrg (devinfo->is_cherryview || intel_device_info_is_9lp(devinfo) || 35487ec681f3Smrg !devinfo->has_64bit_float)) { 354901e04c3fSmrg /* From the Cherryview PRM Vol 7. "Register Region Restrictions": 355001e04c3fSmrg * 355101e04c3fSmrg * "When source or destination datatype is 64b or operation is 355201e04c3fSmrg * integer DWord multiply, indirect addressing must not be 355301e04c3fSmrg * used." 355401e04c3fSmrg * 355501e04c3fSmrg * To work around both of this issue, we do two integer MOVs 355601e04c3fSmrg * insead of one 64-bit MOV. Because no double value should ever 355701e04c3fSmrg * cross a register boundary, it's safe to use the immediate 355801e04c3fSmrg * offset in the indirect here to handle adding 4 bytes to the 355901e04c3fSmrg * offset and avoid the extra ADD to the register file. 356001e04c3fSmrg */ 356101e04c3fSmrg brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), 356201e04c3fSmrg retype(brw_vec1_indirect(addr.subnr, offset), 356301e04c3fSmrg BRW_REGISTER_TYPE_D)); 35647ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_null()); 356501e04c3fSmrg brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), 356601e04c3fSmrg retype(brw_vec1_indirect(addr.subnr, offset + 4), 356701e04c3fSmrg BRW_REGISTER_TYPE_D)); 356801e04c3fSmrg } else { 356901e04c3fSmrg brw_MOV(p, dst, 357001e04c3fSmrg retype(brw_vec1_indirect(addr.subnr, offset), src.type)); 357101e04c3fSmrg } 357201e04c3fSmrg } else { 357301e04c3fSmrg /* In SIMD4x2 mode the index can be either zero or one, replicate it 357401e04c3fSmrg * to all bits of a flag register, 357501e04c3fSmrg */ 357601e04c3fSmrg inst = brw_MOV(p, 357701e04c3fSmrg brw_null_reg(), 357801e04c3fSmrg stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1)); 357901e04c3fSmrg brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE); 358001e04c3fSmrg brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ); 358101e04c3fSmrg brw_inst_set_flag_reg_nr(devinfo, inst, 1); 358201e04c3fSmrg 358301e04c3fSmrg /* and use predicated SEL to pick the right channel. */ 358401e04c3fSmrg inst = brw_SEL(p, dst, 358501e04c3fSmrg stride(suboffset(src, 4), 4, 4, 1), 358601e04c3fSmrg stride(src, 4, 4, 1)); 358701e04c3fSmrg brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL); 358801e04c3fSmrg brw_inst_set_flag_reg_nr(devinfo, inst, 1); 358901e04c3fSmrg } 359001e04c3fSmrg } 359101e04c3fSmrg 359201e04c3fSmrg brw_pop_insn_state(p); 359301e04c3fSmrg} 359401e04c3fSmrg 359501e04c3fSmrg/** 359601e04c3fSmrg * This instruction is generated as a single-channel align1 instruction by 359701e04c3fSmrg * both the VS and FS stages when using INTEL_DEBUG=shader_time. 359801e04c3fSmrg * 359901e04c3fSmrg * We can't use the typed atomic op in the FS because that has the execution 360001e04c3fSmrg * mask ANDed with the pixel mask, but we just want to write the one dword for 360101e04c3fSmrg * all the pixels. 360201e04c3fSmrg * 360301e04c3fSmrg * We don't use the SIMD4x2 atomic ops in the VS because want to just write 360401e04c3fSmrg * one u32. So we use the same untyped atomic write message as the pixel 360501e04c3fSmrg * shader. 360601e04c3fSmrg * 360701e04c3fSmrg * The untyped atomic operation requires a BUFFER surface type with RAW 360801e04c3fSmrg * format, and is only accessible through the legacy DATA_CACHE dataport 360901e04c3fSmrg * messages. 361001e04c3fSmrg */ 361101e04c3fSmrgvoid brw_shader_time_add(struct brw_codegen *p, 361201e04c3fSmrg struct brw_reg payload, 361301e04c3fSmrg uint32_t surf_index) 361401e04c3fSmrg{ 36157ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 36167ec681f3Smrg const unsigned sfid = (devinfo->verx10 >= 75 ? 361701e04c3fSmrg HSW_SFID_DATAPORT_DATA_CACHE_1 : 36187ec681f3Smrg GFX7_SFID_DATAPORT_DATA_CACHE); 36197ec681f3Smrg assert(devinfo->ver >= 7); 362001e04c3fSmrg 362101e04c3fSmrg brw_push_insn_state(p); 362201e04c3fSmrg brw_set_default_access_mode(p, BRW_ALIGN_1); 362301e04c3fSmrg brw_set_default_mask_control(p, BRW_MASK_DISABLE); 362401e04c3fSmrg brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); 362501e04c3fSmrg brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); 362601e04c3fSmrg 362701e04c3fSmrg /* We use brw_vec1_reg and unmasked because we want to increment the given 362801e04c3fSmrg * offset only once. 362901e04c3fSmrg */ 363001e04c3fSmrg brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 363101e04c3fSmrg BRW_ARF_NULL, 0)); 363201e04c3fSmrg brw_set_src0(p, send, brw_vec1_reg(payload.file, 363301e04c3fSmrg payload.nr, 0)); 363401e04c3fSmrg brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) | 36359f464c52Smaya brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD, 36369f464c52Smaya false))); 363701e04c3fSmrg 363801e04c3fSmrg brw_inst_set_sfid(devinfo, send, sfid); 363901e04c3fSmrg brw_inst_set_binding_table_index(devinfo, send, surf_index); 364001e04c3fSmrg 364101e04c3fSmrg brw_pop_insn_state(p); 364201e04c3fSmrg} 364301e04c3fSmrg 364401e04c3fSmrg 364501e04c3fSmrg/** 364601e04c3fSmrg * Emit the SEND message for a barrier 364701e04c3fSmrg */ 364801e04c3fSmrgvoid 364901e04c3fSmrgbrw_barrier(struct brw_codegen *p, struct brw_reg src) 365001e04c3fSmrg{ 36517ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 365201e04c3fSmrg struct brw_inst *inst; 365301e04c3fSmrg 36547ec681f3Smrg assert(devinfo->ver >= 7); 365501e04c3fSmrg 365601e04c3fSmrg brw_push_insn_state(p); 365701e04c3fSmrg brw_set_default_access_mode(p, BRW_ALIGN_1); 365801e04c3fSmrg inst = next_insn(p, BRW_OPCODE_SEND); 365901e04c3fSmrg brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); 366001e04c3fSmrg brw_set_src0(p, inst, src); 366101e04c3fSmrg brw_set_src1(p, inst, brw_null_reg()); 366201e04c3fSmrg brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false)); 366301e04c3fSmrg 366401e04c3fSmrg brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY); 366501e04c3fSmrg brw_inst_set_gateway_subfuncid(devinfo, inst, 366601e04c3fSmrg BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG); 366701e04c3fSmrg 366801e04c3fSmrg brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); 366901e04c3fSmrg brw_pop_insn_state(p); 367001e04c3fSmrg} 367101e04c3fSmrg 367201e04c3fSmrg 367301e04c3fSmrg/** 367401e04c3fSmrg * Emit the wait instruction for a barrier 367501e04c3fSmrg */ 367601e04c3fSmrgvoid 367701e04c3fSmrgbrw_WAIT(struct brw_codegen *p) 367801e04c3fSmrg{ 36797ec681f3Smrg const struct intel_device_info *devinfo = p->devinfo; 368001e04c3fSmrg struct brw_inst *insn; 368101e04c3fSmrg 368201e04c3fSmrg struct brw_reg src = brw_notification_reg(); 368301e04c3fSmrg 368401e04c3fSmrg insn = next_insn(p, BRW_OPCODE_WAIT); 368501e04c3fSmrg brw_set_dest(p, insn, src); 368601e04c3fSmrg brw_set_src0(p, insn, src); 368701e04c3fSmrg brw_set_src1(p, insn, brw_null_reg()); 368801e04c3fSmrg 368901e04c3fSmrg brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); 369001e04c3fSmrg brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); 369101e04c3fSmrg} 369201e04c3fSmrg 369301e04c3fSmrgvoid 36947ec681f3Smrgbrw_float_controls_mode(struct brw_codegen *p, 36957ec681f3Smrg unsigned mode, unsigned mask) 36967ec681f3Smrg{ 36977ec681f3Smrg /* From the Skylake PRM, Volume 7, page 760: 36987ec681f3Smrg * "Implementation Restriction on Register Access: When the control 36997ec681f3Smrg * register is used as an explicit source and/or destination, hardware 37007ec681f3Smrg * does not ensure execution pipeline coherency. Software must set the 37017ec681f3Smrg * thread control field to ‘switch’ for an instruction that uses 37027ec681f3Smrg * control register as an explicit operand." 37037ec681f3Smrg * 37047ec681f3Smrg * On Gfx12+ this is implemented in terms of SWSB annotations instead. 37057ec681f3Smrg */ 37067ec681f3Smrg brw_set_default_swsb(p, tgl_swsb_regdist(1)); 370701e04c3fSmrg 37087ec681f3Smrg brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), 37097ec681f3Smrg brw_imm_ud(~mask)); 37107ec681f3Smrg brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1); 37117ec681f3Smrg if (p->devinfo->ver < 12) 371201e04c3fSmrg brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); 37137ec681f3Smrg 37147ec681f3Smrg if (mode) { 37157ec681f3Smrg brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), 37167ec681f3Smrg brw_imm_ud(mode)); 37177ec681f3Smrg brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1); 37187ec681f3Smrg if (p->devinfo->ver < 12) 37197ec681f3Smrg brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH); 372001e04c3fSmrg } 37217ec681f3Smrg 37227ec681f3Smrg if (p->devinfo->ver >= 12) 37237ec681f3Smrg brw_SYNC(p, TGL_SYNC_NOP); 37247ec681f3Smrg} 37257ec681f3Smrg 37267ec681f3Smrgvoid 37277ec681f3Smrgbrw_update_reloc_imm(const struct intel_device_info *devinfo, 37287ec681f3Smrg brw_inst *inst, 37297ec681f3Smrg uint32_t value) 37307ec681f3Smrg{ 37317ec681f3Smrg /* Sanity check that the instruction is a MOV of an immediate */ 37327ec681f3Smrg assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV); 37337ec681f3Smrg assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE); 37347ec681f3Smrg 37357ec681f3Smrg /* If it was compacted, we can't safely rewrite */ 37367ec681f3Smrg assert(brw_inst_cmpt_control(devinfo, inst) == 0); 37377ec681f3Smrg 37387ec681f3Smrg brw_inst_set_imm_ud(devinfo, inst, value); 37397ec681f3Smrg} 37407ec681f3Smrg 37417ec681f3Smrg/* A default value for constants that will be patched at run-time. 37427ec681f3Smrg * We pick an arbitrary value that prevents instruction compaction. 37437ec681f3Smrg */ 37447ec681f3Smrg#define DEFAULT_PATCH_IMM 0x4a7cc037 37457ec681f3Smrg 37467ec681f3Smrgvoid 37477ec681f3Smrgbrw_MOV_reloc_imm(struct brw_codegen *p, 37487ec681f3Smrg struct brw_reg dst, 37497ec681f3Smrg enum brw_reg_type src_type, 37507ec681f3Smrg uint32_t id) 37517ec681f3Smrg{ 37527ec681f3Smrg assert(type_sz(src_type) == 4); 37537ec681f3Smrg assert(type_sz(dst.type) == 4); 37547ec681f3Smrg 37557ec681f3Smrg brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM, 37567ec681f3Smrg p->next_insn_offset, 0); 37577ec681f3Smrg 37587ec681f3Smrg brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type)); 375901e04c3fSmrg} 3760