1428d7b3dSmrg/* 2428d7b3dSmrg Copyright (C) Intel Corp. 2006. All Rights Reserved. 3428d7b3dSmrg Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4428d7b3dSmrg develop this 3D driver. 5428d7b3dSmrg 6428d7b3dSmrg Permission is hereby granted, free of charge, to any person obtaining 7428d7b3dSmrg a copy of this software and associated documentation files (the 8428d7b3dSmrg "Software"), to deal in the Software without restriction, including 9428d7b3dSmrg without limitation the rights to use, copy, modify, merge, publish, 10428d7b3dSmrg distribute, sublicense, and/or sell copies of the Software, and to 11428d7b3dSmrg permit persons to whom the Software is furnished to do so, subject to 12428d7b3dSmrg the following conditions: 13428d7b3dSmrg 14428d7b3dSmrg The above copyright notice and this permission notice (including the 15428d7b3dSmrg next paragraph) shall be included in all copies or substantial 16428d7b3dSmrg portions of the Software. 17428d7b3dSmrg 18428d7b3dSmrg THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19428d7b3dSmrg EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20428d7b3dSmrg MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21428d7b3dSmrg IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22428d7b3dSmrg LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23428d7b3dSmrg OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24428d7b3dSmrg WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25428d7b3dSmrg 26428d7b3dSmrg **********************************************************************/ 27428d7b3dSmrg/* 28428d7b3dSmrg * Authors: 29428d7b3dSmrg * Keith Whitwell <keith@tungstengraphics.com> 30428d7b3dSmrg */ 31428d7b3dSmrg 32428d7b3dSmrg#include "brw_eu.h" 33428d7b3dSmrg 34428d7b3dSmrg#include <string.h> 35428d7b3dSmrg#include <stdlib.h> 36428d7b3dSmrg 37428d7b3dSmrg#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0])) 38428d7b3dSmrg 39428d7b3dSmrg/*********************************************************************** 40428d7b3dSmrg * Internal helper for constructing instructions 41428d7b3dSmrg */ 42428d7b3dSmrg 43428d7b3dSmrgstatic void guess_execution_size(struct brw_compile *p, 44428d7b3dSmrg struct brw_instruction *insn, 45428d7b3dSmrg struct brw_reg reg) 46428d7b3dSmrg{ 47428d7b3dSmrg if (reg.width == BRW_WIDTH_8 && p->compressed) 48428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_16; 49428d7b3dSmrg else 50428d7b3dSmrg insn->header.execution_size = reg.width; 51428d7b3dSmrg} 52428d7b3dSmrg 53428d7b3dSmrg 54428d7b3dSmrg/** 55428d7b3dSmrg * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56428d7b3dSmrg * registers, implicitly moving the operand to a message register. 57428d7b3dSmrg * 58428d7b3dSmrg * On Sandybridge, this is no longer the case. This function performs the 59428d7b3dSmrg * explicit move; it should be called before emitting a SEND instruction. 60428d7b3dSmrg */ 61428d7b3dSmrgvoid 62428d7b3dSmrggen6_resolve_implied_move(struct brw_compile *p, 63428d7b3dSmrg struct brw_reg *src, 64428d7b3dSmrg unsigned msg_reg_nr) 65428d7b3dSmrg{ 66428d7b3dSmrg if (p->gen < 060) 67428d7b3dSmrg return; 68428d7b3dSmrg 69428d7b3dSmrg if (src->file == BRW_MESSAGE_REGISTER_FILE) 70428d7b3dSmrg return; 71428d7b3dSmrg 72428d7b3dSmrg if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 73428d7b3dSmrg brw_push_insn_state(p); 74428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 75428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 76428d7b3dSmrg brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); 77428d7b3dSmrg brw_pop_insn_state(p); 78428d7b3dSmrg } 79428d7b3dSmrg *src = brw_message_reg(msg_reg_nr); 80428d7b3dSmrg} 81428d7b3dSmrg 82428d7b3dSmrgstatic void 83428d7b3dSmrggen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 84428d7b3dSmrg{ 85428d7b3dSmrg /* From the BSpec / ISA Reference / send - [DevIVB+]: 86428d7b3dSmrg * "The send with EOT should use register space R112-R127 for <src>. This is 87428d7b3dSmrg * to enable loading of a new thread into the same slot while the message 88428d7b3dSmrg * with EOT for current thread is pending dispatch." 89428d7b3dSmrg * 90428d7b3dSmrg * Since we're pretending to have 16 MRFs anyway, we may as well use the 91428d7b3dSmrg * registers required for messages with EOT. 92428d7b3dSmrg */ 93428d7b3dSmrg if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 94428d7b3dSmrg reg->file = BRW_GENERAL_REGISTER_FILE; 95428d7b3dSmrg reg->nr += 111; 96428d7b3dSmrg } 97428d7b3dSmrg} 98428d7b3dSmrg 99428d7b3dSmrgvoid 100428d7b3dSmrgbrw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 101428d7b3dSmrg struct brw_reg dest) 102428d7b3dSmrg{ 103428d7b3dSmrg if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 104428d7b3dSmrg dest.file != BRW_MESSAGE_REGISTER_FILE) 105428d7b3dSmrg assert(dest.nr < 128); 106428d7b3dSmrg 107428d7b3dSmrg gen7_convert_mrf_to_grf(p, &dest); 108428d7b3dSmrg 109428d7b3dSmrg insn->bits1.da1.dest_reg_file = dest.file; 110428d7b3dSmrg insn->bits1.da1.dest_reg_type = dest.type; 111428d7b3dSmrg insn->bits1.da1.dest_address_mode = dest.address_mode; 112428d7b3dSmrg 113428d7b3dSmrg if (dest.address_mode == BRW_ADDRESS_DIRECT) { 114428d7b3dSmrg insn->bits1.da1.dest_reg_nr = dest.nr; 115428d7b3dSmrg 116428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 117428d7b3dSmrg insn->bits1.da1.dest_subreg_nr = dest.subnr; 118428d7b3dSmrg if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 119428d7b3dSmrg dest.hstride = BRW_HORIZONTAL_STRIDE_1; 120428d7b3dSmrg insn->bits1.da1.dest_horiz_stride = dest.hstride; 121428d7b3dSmrg } else { 122428d7b3dSmrg insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 123428d7b3dSmrg insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 124428d7b3dSmrg /* even ignored in da16, still need to set as '01' */ 125428d7b3dSmrg insn->bits1.da16.dest_horiz_stride = 1; 126428d7b3dSmrg } 127428d7b3dSmrg } else { 128428d7b3dSmrg insn->bits1.ia1.dest_subreg_nr = dest.subnr; 129428d7b3dSmrg 130428d7b3dSmrg /* These are different sizes in align1 vs align16: 131428d7b3dSmrg */ 132428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 133428d7b3dSmrg insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 134428d7b3dSmrg if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 135428d7b3dSmrg dest.hstride = BRW_HORIZONTAL_STRIDE_1; 136428d7b3dSmrg insn->bits1.ia1.dest_horiz_stride = dest.hstride; 137428d7b3dSmrg } 138428d7b3dSmrg else { 139428d7b3dSmrg insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 140428d7b3dSmrg /* even ignored in da16, still need to set as '01' */ 141428d7b3dSmrg insn->bits1.ia16.dest_horiz_stride = 1; 142428d7b3dSmrg } 143428d7b3dSmrg } 144428d7b3dSmrg 145428d7b3dSmrg guess_execution_size(p, insn, dest); 146428d7b3dSmrg} 147428d7b3dSmrg 148428d7b3dSmrgstatic const int reg_type_size[8] = { 149428d7b3dSmrg [0] = 4, 150428d7b3dSmrg [1] = 4, 151428d7b3dSmrg [2] = 2, 152428d7b3dSmrg [3] = 2, 153428d7b3dSmrg [4] = 1, 154428d7b3dSmrg [5] = 1, 155428d7b3dSmrg [7] = 4 156428d7b3dSmrg}; 157428d7b3dSmrg 158428d7b3dSmrgstatic void 159428d7b3dSmrgvalidate_reg(struct brw_instruction *insn, struct brw_reg reg) 160428d7b3dSmrg{ 161428d7b3dSmrg int hstride_for_reg[] = {0, 1, 2, 4}; 162428d7b3dSmrg int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 163428d7b3dSmrg int width_for_reg[] = {1, 2, 4, 8, 16}; 164428d7b3dSmrg int execsize_for_reg[] = {1, 2, 4, 8, 16}; 165428d7b3dSmrg int width, hstride, vstride, execsize; 166428d7b3dSmrg 167428d7b3dSmrg if (reg.file == BRW_IMMEDIATE_VALUE) { 168428d7b3dSmrg /* 3.3.6: Region Parameters. Restriction: Immediate vectors 169428d7b3dSmrg * mean the destination has to be 128-bit aligned and the 170428d7b3dSmrg * destination horiz stride has to be a word. 171428d7b3dSmrg */ 172428d7b3dSmrg if (reg.type == BRW_REGISTER_TYPE_V) { 173428d7b3dSmrg assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 174428d7b3dSmrg reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 175428d7b3dSmrg } 176428d7b3dSmrg 177428d7b3dSmrg return; 178428d7b3dSmrg } 179428d7b3dSmrg 180428d7b3dSmrg if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 181428d7b3dSmrg reg.file == BRW_ARF_NULL) 182428d7b3dSmrg return; 183428d7b3dSmrg 184428d7b3dSmrg assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); 185428d7b3dSmrg assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg)); 186428d7b3dSmrg assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg)); 187428d7b3dSmrg assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg)); 188428d7b3dSmrg 189428d7b3dSmrg hstride = hstride_for_reg[reg.hstride]; 190428d7b3dSmrg 191428d7b3dSmrg if (reg.vstride == 0xf) { 192428d7b3dSmrg vstride = -1; 193428d7b3dSmrg } else { 194428d7b3dSmrg vstride = vstride_for_reg[reg.vstride]; 195428d7b3dSmrg } 196428d7b3dSmrg 197428d7b3dSmrg width = width_for_reg[reg.width]; 198428d7b3dSmrg 199428d7b3dSmrg execsize = execsize_for_reg[insn->header.execution_size]; 200428d7b3dSmrg 201428d7b3dSmrg /* Restrictions from 3.3.10: Register Region Restrictions. */ 202428d7b3dSmrg /* 3. */ 203428d7b3dSmrg assert(execsize >= width); 204428d7b3dSmrg 205428d7b3dSmrg /* 4. */ 206428d7b3dSmrg if (execsize == width && hstride != 0) { 207428d7b3dSmrg assert(vstride == -1 || vstride == width * hstride); 208428d7b3dSmrg } 209428d7b3dSmrg 210428d7b3dSmrg /* 5. */ 211428d7b3dSmrg if (execsize == width && hstride == 0) { 212428d7b3dSmrg /* no restriction on vstride. */ 213428d7b3dSmrg } 214428d7b3dSmrg 215428d7b3dSmrg /* 6. */ 216428d7b3dSmrg if (width == 1) { 217428d7b3dSmrg assert(hstride == 0); 218428d7b3dSmrg } 219428d7b3dSmrg 220428d7b3dSmrg /* 7. */ 221428d7b3dSmrg if (execsize == 1 && width == 1) { 222428d7b3dSmrg assert(hstride == 0); 223428d7b3dSmrg assert(vstride == 0); 224428d7b3dSmrg } 225428d7b3dSmrg 226428d7b3dSmrg /* 8. */ 227428d7b3dSmrg if (vstride == 0 && hstride == 0) { 228428d7b3dSmrg assert(width == 1); 229428d7b3dSmrg } 230428d7b3dSmrg 231428d7b3dSmrg /* 10. Check destination issues. */ 232428d7b3dSmrg} 233428d7b3dSmrg 234428d7b3dSmrgvoid 235428d7b3dSmrgbrw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 236428d7b3dSmrg struct brw_reg reg) 237428d7b3dSmrg{ 238428d7b3dSmrg if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 239428d7b3dSmrg assert(reg.nr < 128); 240428d7b3dSmrg 241428d7b3dSmrg gen7_convert_mrf_to_grf(p, ®); 242428d7b3dSmrg 243428d7b3dSmrg validate_reg(insn, reg); 244428d7b3dSmrg 245428d7b3dSmrg insn->bits1.da1.src0_reg_file = reg.file; 246428d7b3dSmrg insn->bits1.da1.src0_reg_type = reg.type; 247428d7b3dSmrg insn->bits2.da1.src0_abs = reg.abs; 248428d7b3dSmrg insn->bits2.da1.src0_negate = reg.negate; 249428d7b3dSmrg insn->bits2.da1.src0_address_mode = reg.address_mode; 250428d7b3dSmrg 251428d7b3dSmrg if (reg.file == BRW_IMMEDIATE_VALUE) { 252428d7b3dSmrg insn->bits3.ud = reg.dw1.ud; 253428d7b3dSmrg 254428d7b3dSmrg /* Required to set some fields in src1 as well: 255428d7b3dSmrg */ 256428d7b3dSmrg insn->bits1.da1.src1_reg_file = 0; /* arf */ 257428d7b3dSmrg insn->bits1.da1.src1_reg_type = reg.type; 258428d7b3dSmrg } else { 259428d7b3dSmrg if (reg.address_mode == BRW_ADDRESS_DIRECT) { 260428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 261428d7b3dSmrg insn->bits2.da1.src0_subreg_nr = reg.subnr; 262428d7b3dSmrg insn->bits2.da1.src0_reg_nr = reg.nr; 263428d7b3dSmrg } else { 264428d7b3dSmrg insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 265428d7b3dSmrg insn->bits2.da16.src0_reg_nr = reg.nr; 266428d7b3dSmrg } 267428d7b3dSmrg } else { 268428d7b3dSmrg insn->bits2.ia1.src0_subreg_nr = reg.subnr; 269428d7b3dSmrg 270428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 271428d7b3dSmrg insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 272428d7b3dSmrg } else { 273428d7b3dSmrg insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 274428d7b3dSmrg } 275428d7b3dSmrg } 276428d7b3dSmrg 277428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 278428d7b3dSmrg if (reg.width == BRW_WIDTH_1 && 279428d7b3dSmrg insn->header.execution_size == BRW_EXECUTE_1) { 280428d7b3dSmrg insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 281428d7b3dSmrg insn->bits2.da1.src0_width = BRW_WIDTH_1; 282428d7b3dSmrg insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 283428d7b3dSmrg } else { 284428d7b3dSmrg insn->bits2.da1.src0_horiz_stride = reg.hstride; 285428d7b3dSmrg insn->bits2.da1.src0_width = reg.width; 286428d7b3dSmrg insn->bits2.da1.src0_vert_stride = reg.vstride; 287428d7b3dSmrg } 288428d7b3dSmrg } else { 289428d7b3dSmrg insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 290428d7b3dSmrg insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 291428d7b3dSmrg insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 292428d7b3dSmrg insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 293428d7b3dSmrg 294428d7b3dSmrg /* This is an oddity of the fact we're using the same 295428d7b3dSmrg * descriptions for registers in align_16 as align_1: 296428d7b3dSmrg */ 297428d7b3dSmrg if (reg.vstride == BRW_VERTICAL_STRIDE_8) 298428d7b3dSmrg insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 299428d7b3dSmrg else 300428d7b3dSmrg insn->bits2.da16.src0_vert_stride = reg.vstride; 301428d7b3dSmrg } 302428d7b3dSmrg } 303428d7b3dSmrg} 304428d7b3dSmrg 305428d7b3dSmrgvoid brw_set_src1(struct brw_compile *p, 306428d7b3dSmrg struct brw_instruction *insn, 307428d7b3dSmrg struct brw_reg reg) 308428d7b3dSmrg{ 309428d7b3dSmrg assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 310428d7b3dSmrg assert(reg.nr < 128); 311428d7b3dSmrg 312428d7b3dSmrg gen7_convert_mrf_to_grf(p, ®); 313428d7b3dSmrg 314428d7b3dSmrg validate_reg(insn, reg); 315428d7b3dSmrg 316428d7b3dSmrg insn->bits1.da1.src1_reg_file = reg.file; 317428d7b3dSmrg insn->bits1.da1.src1_reg_type = reg.type; 318428d7b3dSmrg insn->bits3.da1.src1_abs = reg.abs; 319428d7b3dSmrg insn->bits3.da1.src1_negate = reg.negate; 320428d7b3dSmrg 321428d7b3dSmrg /* Only src1 can be immediate in two-argument instructions. */ 322428d7b3dSmrg assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 323428d7b3dSmrg 324428d7b3dSmrg if (reg.file == BRW_IMMEDIATE_VALUE) { 325428d7b3dSmrg insn->bits3.ud = reg.dw1.ud; 326428d7b3dSmrg } else { 327428d7b3dSmrg /* This is a hardware restriction, which may or may not be lifted 328428d7b3dSmrg * in the future: 329428d7b3dSmrg */ 330428d7b3dSmrg assert (reg.address_mode == BRW_ADDRESS_DIRECT); 331428d7b3dSmrg /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 332428d7b3dSmrg 333428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 334428d7b3dSmrg insn->bits3.da1.src1_subreg_nr = reg.subnr; 335428d7b3dSmrg insn->bits3.da1.src1_reg_nr = reg.nr; 336428d7b3dSmrg } else { 337428d7b3dSmrg insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 338428d7b3dSmrg insn->bits3.da16.src1_reg_nr = reg.nr; 339428d7b3dSmrg } 340428d7b3dSmrg 341428d7b3dSmrg if (insn->header.access_mode == BRW_ALIGN_1) { 342428d7b3dSmrg if (reg.width == BRW_WIDTH_1 && 343428d7b3dSmrg insn->header.execution_size == BRW_EXECUTE_1) { 344428d7b3dSmrg insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 345428d7b3dSmrg insn->bits3.da1.src1_width = BRW_WIDTH_1; 346428d7b3dSmrg insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 347428d7b3dSmrg } else { 348428d7b3dSmrg insn->bits3.da1.src1_horiz_stride = reg.hstride; 349428d7b3dSmrg insn->bits3.da1.src1_width = reg.width; 350428d7b3dSmrg insn->bits3.da1.src1_vert_stride = reg.vstride; 351428d7b3dSmrg } 352428d7b3dSmrg } else { 353428d7b3dSmrg insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 354428d7b3dSmrg insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 355428d7b3dSmrg insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 356428d7b3dSmrg insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 357428d7b3dSmrg 358428d7b3dSmrg /* This is an oddity of the fact we're using the same 359428d7b3dSmrg * descriptions for registers in align_16 as align_1: 360428d7b3dSmrg */ 361428d7b3dSmrg if (reg.vstride == BRW_VERTICAL_STRIDE_8) 362428d7b3dSmrg insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 363428d7b3dSmrg else 364428d7b3dSmrg insn->bits3.da16.src1_vert_stride = reg.vstride; 365428d7b3dSmrg } 366428d7b3dSmrg } 367428d7b3dSmrg} 368428d7b3dSmrg 369428d7b3dSmrg/** 370428d7b3dSmrg * Set the Message Descriptor and Extended Message Descriptor fields 371428d7b3dSmrg * for SEND messages. 372428d7b3dSmrg * 373428d7b3dSmrg * \note This zeroes out the Function Control bits, so it must be called 374428d7b3dSmrg * \b before filling out any message-specific data. Callers can 375428d7b3dSmrg * choose not to fill in irrelevant bits; they will be zero. 376428d7b3dSmrg */ 377428d7b3dSmrgstatic void 378428d7b3dSmrgbrw_set_message_descriptor(struct brw_compile *p, 379428d7b3dSmrg struct brw_instruction *inst, 380428d7b3dSmrg enum brw_message_target sfid, 381428d7b3dSmrg unsigned msg_length, 382428d7b3dSmrg unsigned response_length, 383428d7b3dSmrg bool header_present, 384428d7b3dSmrg bool end_of_thread) 385428d7b3dSmrg{ 386428d7b3dSmrg brw_set_src1(p, inst, brw_imm_d(0)); 387428d7b3dSmrg 388428d7b3dSmrg if (p->gen >= 050) { 389428d7b3dSmrg inst->bits3.generic_gen5.header_present = header_present; 390428d7b3dSmrg inst->bits3.generic_gen5.response_length = response_length; 391428d7b3dSmrg inst->bits3.generic_gen5.msg_length = msg_length; 392428d7b3dSmrg inst->bits3.generic_gen5.end_of_thread = end_of_thread; 393428d7b3dSmrg 394428d7b3dSmrg if (p->gen >= 060) { 395428d7b3dSmrg /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 396428d7b3dSmrg inst->header.destreg__conditionalmod = sfid; 397428d7b3dSmrg } else { 398428d7b3dSmrg /* Set Extended Message Descriptor (ex_desc) */ 399428d7b3dSmrg inst->bits2.send_gen5.sfid = sfid; 400428d7b3dSmrg inst->bits2.send_gen5.end_of_thread = end_of_thread; 401428d7b3dSmrg } 402428d7b3dSmrg } else { 403428d7b3dSmrg inst->bits3.generic.response_length = response_length; 404428d7b3dSmrg inst->bits3.generic.msg_length = msg_length; 405428d7b3dSmrg inst->bits3.generic.msg_target = sfid; 406428d7b3dSmrg inst->bits3.generic.end_of_thread = end_of_thread; 407428d7b3dSmrg } 408428d7b3dSmrg} 409428d7b3dSmrg 410428d7b3dSmrg 411428d7b3dSmrgstatic void brw_set_math_message(struct brw_compile *p, 412428d7b3dSmrg struct brw_instruction *insn, 413428d7b3dSmrg unsigned function, 414428d7b3dSmrg unsigned integer_type, 415428d7b3dSmrg bool low_precision, 416428d7b3dSmrg bool saturate, 417428d7b3dSmrg unsigned dataType) 418428d7b3dSmrg{ 419428d7b3dSmrg unsigned msg_length; 420428d7b3dSmrg unsigned response_length; 421428d7b3dSmrg 422428d7b3dSmrg /* Infer message length from the function */ 423428d7b3dSmrg switch (function) { 424428d7b3dSmrg case BRW_MATH_FUNCTION_POW: 425428d7b3dSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 426428d7b3dSmrg case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 427428d7b3dSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 428428d7b3dSmrg msg_length = 2; 429428d7b3dSmrg break; 430428d7b3dSmrg default: 431428d7b3dSmrg msg_length = 1; 432428d7b3dSmrg break; 433428d7b3dSmrg } 434428d7b3dSmrg 435428d7b3dSmrg /* Infer response length from the function */ 436428d7b3dSmrg switch (function) { 437428d7b3dSmrg case BRW_MATH_FUNCTION_SINCOS: 438428d7b3dSmrg case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 439428d7b3dSmrg response_length = 2; 440428d7b3dSmrg break; 441428d7b3dSmrg default: 442428d7b3dSmrg response_length = 1; 443428d7b3dSmrg break; 444428d7b3dSmrg } 445428d7b3dSmrg 446428d7b3dSmrg brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 447428d7b3dSmrg msg_length, response_length, 448428d7b3dSmrg false, false); 449428d7b3dSmrg if (p->gen == 050) { 450428d7b3dSmrg insn->bits3.math_gen5.function = function; 451428d7b3dSmrg insn->bits3.math_gen5.int_type = integer_type; 452428d7b3dSmrg insn->bits3.math_gen5.precision = low_precision; 453428d7b3dSmrg insn->bits3.math_gen5.saturate = saturate; 454428d7b3dSmrg insn->bits3.math_gen5.data_type = dataType; 455428d7b3dSmrg insn->bits3.math_gen5.snapshot = 0; 456428d7b3dSmrg } else { 457428d7b3dSmrg insn->bits3.math.function = function; 458428d7b3dSmrg insn->bits3.math.int_type = integer_type; 459428d7b3dSmrg insn->bits3.math.precision = low_precision; 460428d7b3dSmrg insn->bits3.math.saturate = saturate; 461428d7b3dSmrg insn->bits3.math.data_type = dataType; 462428d7b3dSmrg } 463428d7b3dSmrg} 464428d7b3dSmrg 465428d7b3dSmrgstatic void brw_set_ff_sync_message(struct brw_compile *p, 466428d7b3dSmrg struct brw_instruction *insn, 467428d7b3dSmrg bool allocate, 468428d7b3dSmrg unsigned response_length, 469428d7b3dSmrg bool end_of_thread) 470428d7b3dSmrg{ 471428d7b3dSmrg brw_set_message_descriptor(p, insn, BRW_SFID_URB, 472428d7b3dSmrg 1, response_length, 473428d7b3dSmrg true, end_of_thread); 474428d7b3dSmrg insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 475428d7b3dSmrg insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 476428d7b3dSmrg insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 477428d7b3dSmrg insn->bits3.urb_gen5.allocate = allocate; 478428d7b3dSmrg insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 479428d7b3dSmrg insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 480428d7b3dSmrg} 481428d7b3dSmrg 482428d7b3dSmrgstatic void brw_set_urb_message(struct brw_compile *p, 483428d7b3dSmrg struct brw_instruction *insn, 484428d7b3dSmrg bool allocate, 485428d7b3dSmrg bool used, 486428d7b3dSmrg unsigned msg_length, 487428d7b3dSmrg unsigned response_length, 488428d7b3dSmrg bool end_of_thread, 489428d7b3dSmrg bool complete, 490428d7b3dSmrg unsigned offset, 491428d7b3dSmrg unsigned swizzle_control) 492428d7b3dSmrg{ 493428d7b3dSmrg brw_set_message_descriptor(p, insn, BRW_SFID_URB, 494428d7b3dSmrg msg_length, response_length, true, end_of_thread); 495428d7b3dSmrg if (p->gen >= 070) { 496428d7b3dSmrg insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 497428d7b3dSmrg insn->bits3.urb_gen7.offset = offset; 498428d7b3dSmrg assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 499428d7b3dSmrg insn->bits3.urb_gen7.swizzle_control = swizzle_control; 500428d7b3dSmrg /* per_slot_offset = 0 makes it ignore offsets in message header */ 501428d7b3dSmrg insn->bits3.urb_gen7.per_slot_offset = 0; 502428d7b3dSmrg insn->bits3.urb_gen7.complete = complete; 503428d7b3dSmrg } else if (p->gen >= 050) { 504428d7b3dSmrg insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 505428d7b3dSmrg insn->bits3.urb_gen5.offset = offset; 506428d7b3dSmrg insn->bits3.urb_gen5.swizzle_control = swizzle_control; 507428d7b3dSmrg insn->bits3.urb_gen5.allocate = allocate; 508428d7b3dSmrg insn->bits3.urb_gen5.used = used; /* ? */ 509428d7b3dSmrg insn->bits3.urb_gen5.complete = complete; 510428d7b3dSmrg } else { 511428d7b3dSmrg insn->bits3.urb.opcode = 0; /* ? */ 512428d7b3dSmrg insn->bits3.urb.offset = offset; 513428d7b3dSmrg insn->bits3.urb.swizzle_control = swizzle_control; 514428d7b3dSmrg insn->bits3.urb.allocate = allocate; 515428d7b3dSmrg insn->bits3.urb.used = used; /* ? */ 516428d7b3dSmrg insn->bits3.urb.complete = complete; 517428d7b3dSmrg } 518428d7b3dSmrg} 519428d7b3dSmrg 520428d7b3dSmrgvoid 521428d7b3dSmrgbrw_set_dp_write_message(struct brw_compile *p, 522428d7b3dSmrg struct brw_instruction *insn, 523428d7b3dSmrg unsigned binding_table_index, 524428d7b3dSmrg unsigned msg_control, 525428d7b3dSmrg unsigned msg_type, 526428d7b3dSmrg unsigned msg_length, 527428d7b3dSmrg bool header_present, 528428d7b3dSmrg bool last_render_target, 529428d7b3dSmrg unsigned response_length, 530428d7b3dSmrg bool end_of_thread, 531428d7b3dSmrg bool send_commit_msg) 532428d7b3dSmrg{ 533428d7b3dSmrg unsigned sfid; 534428d7b3dSmrg 535428d7b3dSmrg if (p->gen >= 070) { 536428d7b3dSmrg /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 537428d7b3dSmrg if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 538428d7b3dSmrg sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 539428d7b3dSmrg else 540428d7b3dSmrg sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 541428d7b3dSmrg } else if (p->gen >= 060) { 542428d7b3dSmrg /* Use the render cache for all write messages. */ 543428d7b3dSmrg sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 544428d7b3dSmrg } else { 545428d7b3dSmrg sfid = BRW_SFID_DATAPORT_WRITE; 546428d7b3dSmrg } 547428d7b3dSmrg 548428d7b3dSmrg brw_set_message_descriptor(p, insn, sfid, 549428d7b3dSmrg msg_length, response_length, 550428d7b3dSmrg header_present, end_of_thread); 551428d7b3dSmrg 552428d7b3dSmrg if (p->gen >= 070) { 553428d7b3dSmrg insn->bits3.gen7_dp.binding_table_index = binding_table_index; 554428d7b3dSmrg insn->bits3.gen7_dp.msg_control = msg_control; 555428d7b3dSmrg insn->bits3.gen7_dp.last_render_target = last_render_target; 556428d7b3dSmrg insn->bits3.gen7_dp.msg_type = msg_type; 557428d7b3dSmrg } else if (p->gen >= 060) { 558428d7b3dSmrg insn->bits3.gen6_dp.binding_table_index = binding_table_index; 559428d7b3dSmrg insn->bits3.gen6_dp.msg_control = msg_control; 560428d7b3dSmrg insn->bits3.gen6_dp.last_render_target = last_render_target; 561428d7b3dSmrg insn->bits3.gen6_dp.msg_type = msg_type; 562428d7b3dSmrg insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 563428d7b3dSmrg } else if (p->gen >= 050) { 564428d7b3dSmrg insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 565428d7b3dSmrg insn->bits3.dp_write_gen5.msg_control = msg_control; 566428d7b3dSmrg insn->bits3.dp_write_gen5.last_render_target = last_render_target; 567428d7b3dSmrg insn->bits3.dp_write_gen5.msg_type = msg_type; 568428d7b3dSmrg insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 569428d7b3dSmrg } else { 570428d7b3dSmrg insn->bits3.dp_write.binding_table_index = binding_table_index; 571428d7b3dSmrg insn->bits3.dp_write.msg_control = msg_control; 572428d7b3dSmrg insn->bits3.dp_write.last_render_target = last_render_target; 573428d7b3dSmrg insn->bits3.dp_write.msg_type = msg_type; 574428d7b3dSmrg insn->bits3.dp_write.send_commit_msg = send_commit_msg; 575428d7b3dSmrg } 576428d7b3dSmrg} 577428d7b3dSmrg 578428d7b3dSmrgvoid 579428d7b3dSmrgbrw_set_dp_read_message(struct brw_compile *p, 580428d7b3dSmrg struct brw_instruction *insn, 581428d7b3dSmrg unsigned binding_table_index, 582428d7b3dSmrg unsigned msg_control, 583428d7b3dSmrg unsigned msg_type, 584428d7b3dSmrg unsigned target_cache, 585428d7b3dSmrg unsigned msg_length, 586428d7b3dSmrg unsigned response_length) 587428d7b3dSmrg{ 588428d7b3dSmrg unsigned sfid; 589428d7b3dSmrg 590428d7b3dSmrg if (p->gen >= 070) { 591428d7b3dSmrg sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 592428d7b3dSmrg } else if (p->gen >= 060) { 593428d7b3dSmrg if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 594428d7b3dSmrg sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 595428d7b3dSmrg else 596428d7b3dSmrg sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 597428d7b3dSmrg } else { 598428d7b3dSmrg sfid = BRW_SFID_DATAPORT_READ; 599428d7b3dSmrg } 600428d7b3dSmrg 601428d7b3dSmrg brw_set_message_descriptor(p, insn, sfid, 602428d7b3dSmrg msg_length, response_length, 603428d7b3dSmrg true, false); 604428d7b3dSmrg 605428d7b3dSmrg if (p->gen >= 070) { 606428d7b3dSmrg insn->bits3.gen7_dp.binding_table_index = binding_table_index; 607428d7b3dSmrg insn->bits3.gen7_dp.msg_control = msg_control; 608428d7b3dSmrg insn->bits3.gen7_dp.last_render_target = 0; 609428d7b3dSmrg insn->bits3.gen7_dp.msg_type = msg_type; 610428d7b3dSmrg } else if (p->gen >= 060) { 611428d7b3dSmrg insn->bits3.gen6_dp.binding_table_index = binding_table_index; 612428d7b3dSmrg insn->bits3.gen6_dp.msg_control = msg_control; 613428d7b3dSmrg insn->bits3.gen6_dp.last_render_target = 0; 614428d7b3dSmrg insn->bits3.gen6_dp.msg_type = msg_type; 615428d7b3dSmrg insn->bits3.gen6_dp.send_commit_msg = 0; 616428d7b3dSmrg } else if (p->gen >= 050) { 617428d7b3dSmrg insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 618428d7b3dSmrg insn->bits3.dp_read_gen5.msg_control = msg_control; 619428d7b3dSmrg insn->bits3.dp_read_gen5.msg_type = msg_type; 620428d7b3dSmrg insn->bits3.dp_read_gen5.target_cache = target_cache; 621428d7b3dSmrg } else if (p->gen >= 045) { 622428d7b3dSmrg insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 623428d7b3dSmrg insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 624428d7b3dSmrg insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 625428d7b3dSmrg insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 626428d7b3dSmrg } else { 627428d7b3dSmrg insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 628428d7b3dSmrg insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 629428d7b3dSmrg insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 630428d7b3dSmrg insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 631428d7b3dSmrg } 632428d7b3dSmrg} 633428d7b3dSmrg 634428d7b3dSmrgstatic void brw_set_sampler_message(struct brw_compile *p, 635428d7b3dSmrg struct brw_instruction *insn, 636428d7b3dSmrg unsigned binding_table_index, 637428d7b3dSmrg unsigned sampler, 638428d7b3dSmrg unsigned msg_type, 639428d7b3dSmrg unsigned response_length, 640428d7b3dSmrg unsigned msg_length, 641428d7b3dSmrg bool header_present, 642428d7b3dSmrg unsigned simd_mode) 643428d7b3dSmrg{ 644428d7b3dSmrg brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, 645428d7b3dSmrg msg_length, response_length, 646428d7b3dSmrg header_present, false); 647428d7b3dSmrg 648428d7b3dSmrg if (p->gen >= 070) { 649428d7b3dSmrg insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 650428d7b3dSmrg insn->bits3.sampler_gen7.sampler = sampler; 651428d7b3dSmrg insn->bits3.sampler_gen7.msg_type = msg_type; 652428d7b3dSmrg insn->bits3.sampler_gen7.simd_mode = simd_mode; 653428d7b3dSmrg } else if (p->gen >= 050) { 654428d7b3dSmrg insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 655428d7b3dSmrg insn->bits3.sampler_gen5.sampler = sampler; 656428d7b3dSmrg insn->bits3.sampler_gen5.msg_type = msg_type; 657428d7b3dSmrg insn->bits3.sampler_gen5.simd_mode = simd_mode; 658428d7b3dSmrg } else if (p->gen >= 045) { 659428d7b3dSmrg insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 660428d7b3dSmrg insn->bits3.sampler_g4x.sampler = sampler; 661428d7b3dSmrg insn->bits3.sampler_g4x.msg_type = msg_type; 662428d7b3dSmrg } else { 663428d7b3dSmrg insn->bits3.sampler.binding_table_index = binding_table_index; 664428d7b3dSmrg insn->bits3.sampler.sampler = sampler; 665428d7b3dSmrg insn->bits3.sampler.msg_type = msg_type; 666428d7b3dSmrg insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 667428d7b3dSmrg } 668428d7b3dSmrg} 669428d7b3dSmrg 670428d7b3dSmrg 671428d7b3dSmrgvoid brw_NOP(struct brw_compile *p) 672428d7b3dSmrg{ 673428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); 674428d7b3dSmrg brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); 675428d7b3dSmrg brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); 676428d7b3dSmrg brw_set_src1(p, insn, brw_imm_ud(0x0)); 677428d7b3dSmrg} 678428d7b3dSmrg 679428d7b3dSmrg/*********************************************************************** 680428d7b3dSmrg * Comparisons, if/else/endif 681428d7b3dSmrg */ 682428d7b3dSmrg 683428d7b3dSmrgstatic void 684428d7b3dSmrgpush_if_stack(struct brw_compile *p, struct brw_instruction *inst) 685428d7b3dSmrg{ 686428d7b3dSmrg p->if_stack[p->if_stack_depth] = inst; 687428d7b3dSmrg 688428d7b3dSmrg p->if_stack_depth++; 689428d7b3dSmrg if (p->if_stack_array_size <= p->if_stack_depth) { 690428d7b3dSmrg p->if_stack_array_size *= 2; 691428d7b3dSmrg p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); 692428d7b3dSmrg } 693428d7b3dSmrg} 694428d7b3dSmrg 695428d7b3dSmrg/* EU takes the value from the flag register and pushes it onto some 696428d7b3dSmrg * sort of a stack (presumably merging with any flag value already on 697428d7b3dSmrg * the stack). Within an if block, the flags at the top of the stack 698428d7b3dSmrg * control execution on each channel of the unit, eg. on each of the 699428d7b3dSmrg * 16 pixel values in our wm programs. 700428d7b3dSmrg * 701428d7b3dSmrg * When the matching 'else' instruction is reached (presumably by 702428d7b3dSmrg * countdown of the instruction count patched in by our ELSE/ENDIF 703428d7b3dSmrg * functions), the relevent flags are inverted. 704428d7b3dSmrg * 705428d7b3dSmrg * When the matching 'endif' instruction is reached, the flags are 706428d7b3dSmrg * popped off. If the stack is now empty, normal execution resumes. 707428d7b3dSmrg */ 708428d7b3dSmrgstruct brw_instruction * 709428d7b3dSmrgbrw_IF(struct brw_compile *p, unsigned execute_size) 710428d7b3dSmrg{ 711428d7b3dSmrg struct brw_instruction *insn; 712428d7b3dSmrg 713428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_IF); 714428d7b3dSmrg 715428d7b3dSmrg /* Override the defaults for this instruction: */ 716428d7b3dSmrg if (p->gen < 060) { 717428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 718428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 719428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 720428d7b3dSmrg } else if (p->gen < 070) { 721428d7b3dSmrg brw_set_dest(p, insn, brw_imm_w(0)); 722428d7b3dSmrg insn->bits1.branch_gen6.jump_count = 0; 723428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 724428d7b3dSmrg brw_set_src1(p, insn, __retype_d(brw_null_reg())); 725428d7b3dSmrg } else { 726428d7b3dSmrg brw_set_dest(p, insn, __retype_d(brw_null_reg())); 727428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 728428d7b3dSmrg brw_set_src1(p, insn, brw_imm_ud(0)); 729428d7b3dSmrg insn->bits3.break_cont.jip = 0; 730428d7b3dSmrg insn->bits3.break_cont.uip = 0; 731428d7b3dSmrg } 732428d7b3dSmrg 733428d7b3dSmrg insn->header.execution_size = execute_size; 734428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 735428d7b3dSmrg insn->header.predicate_control = BRW_PREDICATE_NORMAL; 736428d7b3dSmrg insn->header.mask_control = BRW_MASK_ENABLE; 737428d7b3dSmrg if (!p->single_program_flow) 738428d7b3dSmrg insn->header.thread_control = BRW_THREAD_SWITCH; 739428d7b3dSmrg 740428d7b3dSmrg p->current->header.predicate_control = BRW_PREDICATE_NONE; 741428d7b3dSmrg 742428d7b3dSmrg push_if_stack(p, insn); 743428d7b3dSmrg return insn; 744428d7b3dSmrg} 745428d7b3dSmrg 746428d7b3dSmrg/* This function is only used for gen6-style IF instructions with an 747428d7b3dSmrg * embedded comparison (conditional modifier). It is not used on gen7. 748428d7b3dSmrg */ 749428d7b3dSmrgstruct brw_instruction * 750428d7b3dSmrggen6_IF(struct brw_compile *p, uint32_t conditional, 751428d7b3dSmrg struct brw_reg src0, struct brw_reg src1) 752428d7b3dSmrg{ 753428d7b3dSmrg struct brw_instruction *insn; 754428d7b3dSmrg 755428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_IF); 756428d7b3dSmrg 757428d7b3dSmrg brw_set_dest(p, insn, brw_imm_w(0)); 758428d7b3dSmrg if (p->compressed) { 759428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_16; 760428d7b3dSmrg } else { 761428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_8; 762428d7b3dSmrg } 763428d7b3dSmrg insn->bits1.branch_gen6.jump_count = 0; 764428d7b3dSmrg brw_set_src0(p, insn, src0); 765428d7b3dSmrg brw_set_src1(p, insn, src1); 766428d7b3dSmrg 767428d7b3dSmrg assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 768428d7b3dSmrg assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 769428d7b3dSmrg insn->header.destreg__conditionalmod = conditional; 770428d7b3dSmrg 771428d7b3dSmrg if (!p->single_program_flow) 772428d7b3dSmrg insn->header.thread_control = BRW_THREAD_SWITCH; 773428d7b3dSmrg 774428d7b3dSmrg push_if_stack(p, insn); 775428d7b3dSmrg return insn; 776428d7b3dSmrg} 777428d7b3dSmrg 778428d7b3dSmrg/** 779428d7b3dSmrg * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 780428d7b3dSmrg */ 781428d7b3dSmrgstatic void 782428d7b3dSmrgconvert_IF_ELSE_to_ADD(struct brw_compile *p, 783428d7b3dSmrg struct brw_instruction *if_inst, 784428d7b3dSmrg struct brw_instruction *else_inst) 785428d7b3dSmrg{ 786428d7b3dSmrg /* The next instruction (where the ENDIF would be, if it existed) */ 787428d7b3dSmrg struct brw_instruction *next_inst = &p->store[p->nr_insn]; 788428d7b3dSmrg 789428d7b3dSmrg assert(p->single_program_flow); 790428d7b3dSmrg assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 791428d7b3dSmrg assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 792428d7b3dSmrg assert(if_inst->header.execution_size == BRW_EXECUTE_1); 793428d7b3dSmrg 794428d7b3dSmrg /* Convert IF to an ADD instruction that moves the instruction pointer 795428d7b3dSmrg * to the first instruction of the ELSE block. If there is no ELSE 796428d7b3dSmrg * block, point to where ENDIF would be. Reverse the predicate. 797428d7b3dSmrg * 798428d7b3dSmrg * There's no need to execute an ENDIF since we don't need to do any 799428d7b3dSmrg * stack operations, and if we're currently executing, we just want to 800428d7b3dSmrg * continue normally. 801428d7b3dSmrg */ 802428d7b3dSmrg if_inst->header.opcode = BRW_OPCODE_ADD; 803428d7b3dSmrg if_inst->header.predicate_inverse = 1; 804428d7b3dSmrg 805428d7b3dSmrg if (else_inst != NULL) { 806428d7b3dSmrg /* Convert ELSE to an ADD instruction that points where the ENDIF 807428d7b3dSmrg * would be. 808428d7b3dSmrg */ 809428d7b3dSmrg else_inst->header.opcode = BRW_OPCODE_ADD; 810428d7b3dSmrg 811428d7b3dSmrg if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 812428d7b3dSmrg else_inst->bits3.ud = (next_inst - else_inst) * 16; 813428d7b3dSmrg } else { 814428d7b3dSmrg if_inst->bits3.ud = (next_inst - if_inst) * 16; 815428d7b3dSmrg } 816428d7b3dSmrg} 817428d7b3dSmrg 818428d7b3dSmrg/** 819428d7b3dSmrg * Patch IF and ELSE instructions with appropriate jump targets. 820428d7b3dSmrg */ 821428d7b3dSmrgstatic void 822428d7b3dSmrgpatch_IF_ELSE(struct brw_compile *p, 823428d7b3dSmrg struct brw_instruction *if_inst, 824428d7b3dSmrg struct brw_instruction *else_inst, 825428d7b3dSmrg struct brw_instruction *endif_inst) 826428d7b3dSmrg{ 827428d7b3dSmrg unsigned br = 1; 828428d7b3dSmrg 829428d7b3dSmrg assert(!p->single_program_flow); 830428d7b3dSmrg assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 831428d7b3dSmrg assert(endif_inst != NULL); 832428d7b3dSmrg assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 833428d7b3dSmrg 834428d7b3dSmrg /* Jump count is for 64bit data chunk each, so one 128bit instruction 835428d7b3dSmrg * requires 2 chunks. 836428d7b3dSmrg */ 837428d7b3dSmrg if (p->gen >= 050) 838428d7b3dSmrg br = 2; 839428d7b3dSmrg 840428d7b3dSmrg assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 841428d7b3dSmrg endif_inst->header.execution_size = if_inst->header.execution_size; 842428d7b3dSmrg 843428d7b3dSmrg if (else_inst == NULL) { 844428d7b3dSmrg /* Patch IF -> ENDIF */ 845428d7b3dSmrg if (p->gen < 060) { 846428d7b3dSmrg /* Turn it into an IFF, which means no mask stack operations for 847428d7b3dSmrg * all-false and jumping past the ENDIF. 848428d7b3dSmrg */ 849428d7b3dSmrg if_inst->header.opcode = BRW_OPCODE_IFF; 850428d7b3dSmrg if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 851428d7b3dSmrg if_inst->bits3.if_else.pop_count = 0; 852428d7b3dSmrg if_inst->bits3.if_else.pad0 = 0; 853428d7b3dSmrg } else if (p->gen < 070) { 854428d7b3dSmrg /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 855428d7b3dSmrg if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 856428d7b3dSmrg } else { 857428d7b3dSmrg if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 858428d7b3dSmrg if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 859428d7b3dSmrg } 860428d7b3dSmrg } else { 861428d7b3dSmrg else_inst->header.execution_size = if_inst->header.execution_size; 862428d7b3dSmrg 863428d7b3dSmrg /* Patch IF -> ELSE */ 864428d7b3dSmrg if (p->gen < 060) { 865428d7b3dSmrg if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 866428d7b3dSmrg if_inst->bits3.if_else.pop_count = 0; 867428d7b3dSmrg if_inst->bits3.if_else.pad0 = 0; 868428d7b3dSmrg } else if (p->gen <= 070) { 869428d7b3dSmrg if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 870428d7b3dSmrg } 871428d7b3dSmrg 872428d7b3dSmrg /* Patch ELSE -> ENDIF */ 873428d7b3dSmrg if (p->gen < 060) { 874428d7b3dSmrg /* BRW_OPCODE_ELSE pre-gen6 should point just past the 875428d7b3dSmrg * matching ENDIF. 876428d7b3dSmrg */ 877428d7b3dSmrg else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 878428d7b3dSmrg else_inst->bits3.if_else.pop_count = 1; 879428d7b3dSmrg else_inst->bits3.if_else.pad0 = 0; 880428d7b3dSmrg } else if (p->gen < 070) { 881428d7b3dSmrg /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 882428d7b3dSmrg else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 883428d7b3dSmrg } else { 884428d7b3dSmrg /* The IF instruction's JIP should point just past the ELSE */ 885428d7b3dSmrg if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 886428d7b3dSmrg /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 887428d7b3dSmrg if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 888428d7b3dSmrg else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 889428d7b3dSmrg } 890428d7b3dSmrg } 891428d7b3dSmrg} 892428d7b3dSmrg 893428d7b3dSmrgvoid 894428d7b3dSmrgbrw_ELSE(struct brw_compile *p) 895428d7b3dSmrg{ 896428d7b3dSmrg struct brw_instruction *insn; 897428d7b3dSmrg 898428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_ELSE); 899428d7b3dSmrg 900428d7b3dSmrg if (p->gen < 060) { 901428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 902428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 903428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 904428d7b3dSmrg } else if (p->gen < 070) { 905428d7b3dSmrg brw_set_dest(p, insn, brw_imm_w(0)); 906428d7b3dSmrg insn->bits1.branch_gen6.jump_count = 0; 907428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 908428d7b3dSmrg brw_set_src1(p, insn, __retype_d(brw_null_reg())); 909428d7b3dSmrg } else { 910428d7b3dSmrg brw_set_dest(p, insn, __retype_d(brw_null_reg())); 911428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 912428d7b3dSmrg brw_set_src1(p, insn, brw_imm_ud(0)); 913428d7b3dSmrg insn->bits3.break_cont.jip = 0; 914428d7b3dSmrg insn->bits3.break_cont.uip = 0; 915428d7b3dSmrg } 916428d7b3dSmrg 917428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 918428d7b3dSmrg insn->header.mask_control = BRW_MASK_ENABLE; 919428d7b3dSmrg if (!p->single_program_flow) 920428d7b3dSmrg insn->header.thread_control = BRW_THREAD_SWITCH; 921428d7b3dSmrg 922428d7b3dSmrg push_if_stack(p, insn); 923428d7b3dSmrg} 924428d7b3dSmrg 925428d7b3dSmrgvoid 926428d7b3dSmrgbrw_ENDIF(struct brw_compile *p) 927428d7b3dSmrg{ 928428d7b3dSmrg struct brw_instruction *insn; 929428d7b3dSmrg struct brw_instruction *else_inst = NULL; 930428d7b3dSmrg struct brw_instruction *if_inst = NULL; 931428d7b3dSmrg 932428d7b3dSmrg /* Pop the IF and (optional) ELSE instructions from the stack */ 933428d7b3dSmrg p->if_stack_depth--; 934428d7b3dSmrg if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { 935428d7b3dSmrg else_inst = p->if_stack[p->if_stack_depth]; 936428d7b3dSmrg p->if_stack_depth--; 937428d7b3dSmrg } 938428d7b3dSmrg if_inst = p->if_stack[p->if_stack_depth]; 939428d7b3dSmrg 940428d7b3dSmrg if (p->single_program_flow) { 941428d7b3dSmrg /* ENDIF is useless; don't bother emitting it. */ 942428d7b3dSmrg convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 943428d7b3dSmrg return; 944428d7b3dSmrg } 945428d7b3dSmrg 946428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_ENDIF); 947428d7b3dSmrg 948428d7b3dSmrg if (p->gen < 060) { 949428d7b3dSmrg brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); 950428d7b3dSmrg brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); 951428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 952428d7b3dSmrg } else if (p->gen < 070) { 953428d7b3dSmrg brw_set_dest(p, insn, brw_imm_w(0)); 954428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 955428d7b3dSmrg brw_set_src1(p, insn, __retype_d(brw_null_reg())); 956428d7b3dSmrg } else { 957428d7b3dSmrg brw_set_dest(p, insn, __retype_d(brw_null_reg())); 958428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 959428d7b3dSmrg brw_set_src1(p, insn, brw_imm_ud(0)); 960428d7b3dSmrg } 961428d7b3dSmrg 962428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 963428d7b3dSmrg insn->header.mask_control = BRW_MASK_ENABLE; 964428d7b3dSmrg insn->header.thread_control = BRW_THREAD_SWITCH; 965428d7b3dSmrg 966428d7b3dSmrg /* Also pop item off the stack in the endif instruction: */ 967428d7b3dSmrg if (p->gen < 060) { 968428d7b3dSmrg insn->bits3.if_else.jump_count = 0; 969428d7b3dSmrg insn->bits3.if_else.pop_count = 1; 970428d7b3dSmrg insn->bits3.if_else.pad0 = 0; 971428d7b3dSmrg } else if (p->gen < 070) { 972428d7b3dSmrg insn->bits1.branch_gen6.jump_count = 2; 973428d7b3dSmrg } else { 974428d7b3dSmrg insn->bits3.break_cont.jip = 2; 975428d7b3dSmrg } 976428d7b3dSmrg patch_IF_ELSE(p, if_inst, else_inst, insn); 977428d7b3dSmrg} 978428d7b3dSmrg 979428d7b3dSmrgstruct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 980428d7b3dSmrg{ 981428d7b3dSmrg struct brw_instruction *insn; 982428d7b3dSmrg 983428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_BREAK); 984428d7b3dSmrg if (p->gen >= 060) { 985428d7b3dSmrg brw_set_dest(p, insn, __retype_d(brw_null_reg())); 986428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 987428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 988428d7b3dSmrg } else { 989428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 990428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 991428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 992428d7b3dSmrg insn->bits3.if_else.pad0 = 0; 993428d7b3dSmrg insn->bits3.if_else.pop_count = pop_count; 994428d7b3dSmrg } 995428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 996428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_8; 997428d7b3dSmrg 998428d7b3dSmrg return insn; 999428d7b3dSmrg} 1000428d7b3dSmrg 1001428d7b3dSmrgstruct brw_instruction *gen6_CONT(struct brw_compile *p, 1002428d7b3dSmrg struct brw_instruction *do_insn) 1003428d7b3dSmrg{ 1004428d7b3dSmrg struct brw_instruction *insn; 1005428d7b3dSmrg 1006428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); 1007428d7b3dSmrg brw_set_dest(p, insn, __retype_d(brw_null_reg())); 1008428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 1009428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 1010428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 1011428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 1012428d7b3dSmrg 1013428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1014428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_8; 1015428d7b3dSmrg return insn; 1016428d7b3dSmrg} 1017428d7b3dSmrg 1018428d7b3dSmrgstruct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1019428d7b3dSmrg{ 1020428d7b3dSmrg struct brw_instruction *insn; 1021428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); 1022428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 1023428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 1024428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0x0)); 1025428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1026428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_8; 1027428d7b3dSmrg /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1028428d7b3dSmrg insn->bits3.if_else.pad0 = 0; 1029428d7b3dSmrg insn->bits3.if_else.pop_count = pop_count; 1030428d7b3dSmrg return insn; 1031428d7b3dSmrg} 1032428d7b3dSmrg 1033428d7b3dSmrg/* DO/WHILE loop: 1034428d7b3dSmrg * 1035428d7b3dSmrg * The DO/WHILE is just an unterminated loop -- break or continue are 1036428d7b3dSmrg * used for control within the loop. We have a few ways they can be 1037428d7b3dSmrg * done. 1038428d7b3dSmrg * 1039428d7b3dSmrg * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1040428d7b3dSmrg * jip and no DO instruction. 1041428d7b3dSmrg * 1042428d7b3dSmrg * For non-uniform control flow pre-gen6, there's a DO instruction to 1043428d7b3dSmrg * push the mask, and a WHILE to jump back, and BREAK to get out and 1044428d7b3dSmrg * pop the mask. 1045428d7b3dSmrg * 1046428d7b3dSmrg * For gen6, there's no more mask stack, so no need for DO. WHILE 1047428d7b3dSmrg * just points back to the first instruction of the loop. 1048428d7b3dSmrg */ 1049428d7b3dSmrgstruct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) 1050428d7b3dSmrg{ 1051428d7b3dSmrg if (p->gen >= 060 || p->single_program_flow) { 1052428d7b3dSmrg return &p->store[p->nr_insn]; 1053428d7b3dSmrg } else { 1054428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); 1055428d7b3dSmrg 1056428d7b3dSmrg /* Override the defaults for this instruction: 1057428d7b3dSmrg */ 1058428d7b3dSmrg brw_set_dest(p, insn, brw_null_reg()); 1059428d7b3dSmrg brw_set_src0(p, insn, brw_null_reg()); 1060428d7b3dSmrg brw_set_src1(p, insn, brw_null_reg()); 1061428d7b3dSmrg 1062428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1063428d7b3dSmrg insn->header.execution_size = execute_size; 1064428d7b3dSmrg insn->header.predicate_control = BRW_PREDICATE_NONE; 1065428d7b3dSmrg /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1066428d7b3dSmrg /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1067428d7b3dSmrg 1068428d7b3dSmrg return insn; 1069428d7b3dSmrg } 1070428d7b3dSmrg} 1071428d7b3dSmrg 1072428d7b3dSmrgstruct brw_instruction *brw_WHILE(struct brw_compile *p, 1073428d7b3dSmrg struct brw_instruction *do_insn) 1074428d7b3dSmrg{ 1075428d7b3dSmrg struct brw_instruction *insn; 1076428d7b3dSmrg unsigned br = 1; 1077428d7b3dSmrg 1078428d7b3dSmrg if (p->gen >= 050) 1079428d7b3dSmrg br = 2; 1080428d7b3dSmrg 1081428d7b3dSmrg if (p->gen >= 070) { 1082428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_WHILE); 1083428d7b3dSmrg 1084428d7b3dSmrg brw_set_dest(p, insn, __retype_d(brw_null_reg())); 1085428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 1086428d7b3dSmrg brw_set_src1(p, insn, brw_imm_ud(0)); 1087428d7b3dSmrg insn->bits3.break_cont.jip = br * (do_insn - insn); 1088428d7b3dSmrg 1089428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_8; 1090428d7b3dSmrg } else if (p->gen >= 060) { 1091428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_WHILE); 1092428d7b3dSmrg 1093428d7b3dSmrg brw_set_dest(p, insn, brw_imm_w(0)); 1094428d7b3dSmrg insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1095428d7b3dSmrg brw_set_src0(p, insn, __retype_d(brw_null_reg())); 1096428d7b3dSmrg brw_set_src1(p, insn, __retype_d(brw_null_reg())); 1097428d7b3dSmrg 1098428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_8; 1099428d7b3dSmrg } else { 1100428d7b3dSmrg if (p->single_program_flow) { 1101428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_ADD); 1102428d7b3dSmrg 1103428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 1104428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 1105428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1106428d7b3dSmrg insn->header.execution_size = BRW_EXECUTE_1; 1107428d7b3dSmrg } else { 1108428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_WHILE); 1109428d7b3dSmrg 1110428d7b3dSmrg assert(do_insn->header.opcode == BRW_OPCODE_DO); 1111428d7b3dSmrg 1112428d7b3dSmrg brw_set_dest(p, insn, brw_ip_reg()); 1113428d7b3dSmrg brw_set_src0(p, insn, brw_ip_reg()); 1114428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0)); 1115428d7b3dSmrg 1116428d7b3dSmrg insn->header.execution_size = do_insn->header.execution_size; 1117428d7b3dSmrg insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1118428d7b3dSmrg insn->bits3.if_else.pop_count = 0; 1119428d7b3dSmrg insn->bits3.if_else.pad0 = 0; 1120428d7b3dSmrg } 1121428d7b3dSmrg } 1122428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1123428d7b3dSmrg p->current->header.predicate_control = BRW_PREDICATE_NONE; 1124428d7b3dSmrg 1125428d7b3dSmrg return insn; 1126428d7b3dSmrg} 1127428d7b3dSmrg 1128428d7b3dSmrg/* FORWARD JUMPS: 1129428d7b3dSmrg */ 1130428d7b3dSmrgvoid brw_land_fwd_jump(struct brw_compile *p, 1131428d7b3dSmrg struct brw_instruction *jmp_insn) 1132428d7b3dSmrg{ 1133428d7b3dSmrg struct brw_instruction *landing = &p->store[p->nr_insn]; 1134428d7b3dSmrg unsigned jmpi = 1; 1135428d7b3dSmrg 1136428d7b3dSmrg if (p->gen >= 050) 1137428d7b3dSmrg jmpi = 2; 1138428d7b3dSmrg 1139428d7b3dSmrg assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1140428d7b3dSmrg assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1141428d7b3dSmrg 1142428d7b3dSmrg jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1143428d7b3dSmrg} 1144428d7b3dSmrg 1145428d7b3dSmrg 1146428d7b3dSmrg 1147428d7b3dSmrg/* To integrate with the above, it makes sense that the comparison 1148428d7b3dSmrg * instruction should populate the flag register. It might be simpler 1149428d7b3dSmrg * just to use the flag reg for most WM tasks? 1150428d7b3dSmrg */ 1151428d7b3dSmrgvoid brw_CMP(struct brw_compile *p, 1152428d7b3dSmrg struct brw_reg dest, 1153428d7b3dSmrg unsigned conditional, 1154428d7b3dSmrg struct brw_reg src0, 1155428d7b3dSmrg struct brw_reg src1) 1156428d7b3dSmrg{ 1157428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); 1158428d7b3dSmrg 1159428d7b3dSmrg insn->header.destreg__conditionalmod = conditional; 1160428d7b3dSmrg brw_set_dest(p, insn, dest); 1161428d7b3dSmrg brw_set_src0(p, insn, src0); 1162428d7b3dSmrg brw_set_src1(p, insn, src1); 1163428d7b3dSmrg 1164428d7b3dSmrg /* Make it so that future instructions will use the computed flag 1165428d7b3dSmrg * value until brw_set_predicate_control_flag_value() is called 1166428d7b3dSmrg * again. 1167428d7b3dSmrg */ 1168428d7b3dSmrg if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1169428d7b3dSmrg dest.nr == 0) { 1170428d7b3dSmrg p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1171428d7b3dSmrg p->flag_value = 0xff; 1172428d7b3dSmrg } 1173428d7b3dSmrg} 1174428d7b3dSmrg 1175428d7b3dSmrg/* Issue 'wait' instruction for n1, host could program MMIO 1176428d7b3dSmrg to wake up thread. */ 1177428d7b3dSmrgvoid brw_WAIT(struct brw_compile *p) 1178428d7b3dSmrg{ 1179428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); 1180428d7b3dSmrg struct brw_reg src = brw_notification_1_reg(); 1181428d7b3dSmrg 1182428d7b3dSmrg brw_set_dest(p, insn, src); 1183428d7b3dSmrg brw_set_src0(p, insn, src); 1184428d7b3dSmrg brw_set_src1(p, insn, brw_null_reg()); 1185428d7b3dSmrg insn->header.execution_size = 0; /* must */ 1186428d7b3dSmrg insn->header.predicate_control = 0; 1187428d7b3dSmrg insn->header.compression_control = 0; 1188428d7b3dSmrg} 1189428d7b3dSmrg 1190428d7b3dSmrg/*********************************************************************** 1191428d7b3dSmrg * Helpers for the various SEND message types: 1192428d7b3dSmrg */ 1193428d7b3dSmrg 1194428d7b3dSmrg/** Extended math function, float[8]. 1195428d7b3dSmrg */ 1196428d7b3dSmrgvoid brw_math(struct brw_compile *p, 1197428d7b3dSmrg struct brw_reg dest, 1198428d7b3dSmrg unsigned function, 1199428d7b3dSmrg unsigned saturate, 1200428d7b3dSmrg unsigned msg_reg_nr, 1201428d7b3dSmrg struct brw_reg src, 1202428d7b3dSmrg unsigned data_type, 1203428d7b3dSmrg unsigned precision) 1204428d7b3dSmrg{ 1205428d7b3dSmrg if (p->gen >= 060) { 1206428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); 1207428d7b3dSmrg 1208428d7b3dSmrg assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1209428d7b3dSmrg assert(src.file == BRW_GENERAL_REGISTER_FILE); 1210428d7b3dSmrg 1211428d7b3dSmrg assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1212428d7b3dSmrg assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1213428d7b3dSmrg 1214428d7b3dSmrg /* Source modifiers are ignored for extended math instructions. */ 1215428d7b3dSmrg assert(!src.negate); 1216428d7b3dSmrg assert(!src.abs); 1217428d7b3dSmrg 1218428d7b3dSmrg if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1219428d7b3dSmrg function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1220428d7b3dSmrg assert(src.type == BRW_REGISTER_TYPE_F); 1221428d7b3dSmrg } 1222428d7b3dSmrg 1223428d7b3dSmrg /* Math is the same ISA format as other opcodes, except that CondModifier 1224428d7b3dSmrg * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1225428d7b3dSmrg */ 1226428d7b3dSmrg insn->header.destreg__conditionalmod = function; 1227428d7b3dSmrg insn->header.saturate = saturate; 1228428d7b3dSmrg 1229428d7b3dSmrg brw_set_dest(p, insn, dest); 1230428d7b3dSmrg brw_set_src0(p, insn, src); 1231428d7b3dSmrg brw_set_src1(p, insn, brw_null_reg()); 1232428d7b3dSmrg } else { 1233428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1234428d7b3dSmrg /* Example code doesn't set predicate_control for send 1235428d7b3dSmrg * instructions. 1236428d7b3dSmrg */ 1237428d7b3dSmrg insn->header.predicate_control = 0; 1238428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 1239428d7b3dSmrg 1240428d7b3dSmrg brw_set_dest(p, insn, dest); 1241428d7b3dSmrg brw_set_src0(p, insn, src); 1242428d7b3dSmrg brw_set_math_message(p, insn, function, 1243428d7b3dSmrg src.type == BRW_REGISTER_TYPE_D, 1244428d7b3dSmrg precision, 1245428d7b3dSmrg saturate, 1246428d7b3dSmrg data_type); 1247428d7b3dSmrg } 1248428d7b3dSmrg} 1249428d7b3dSmrg 1250428d7b3dSmrg/** Extended math function, float[8]. 1251428d7b3dSmrg */ 1252428d7b3dSmrgvoid brw_math2(struct brw_compile *p, 1253428d7b3dSmrg struct brw_reg dest, 1254428d7b3dSmrg unsigned function, 1255428d7b3dSmrg struct brw_reg src0, 1256428d7b3dSmrg struct brw_reg src1) 1257428d7b3dSmrg{ 1258428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); 1259428d7b3dSmrg 1260428d7b3dSmrg assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1261428d7b3dSmrg assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1262428d7b3dSmrg assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1263428d7b3dSmrg 1264428d7b3dSmrg assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1265428d7b3dSmrg assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1266428d7b3dSmrg assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1267428d7b3dSmrg 1268428d7b3dSmrg if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1269428d7b3dSmrg function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1270428d7b3dSmrg assert(src0.type == BRW_REGISTER_TYPE_F); 1271428d7b3dSmrg assert(src1.type == BRW_REGISTER_TYPE_F); 1272428d7b3dSmrg } 1273428d7b3dSmrg 1274428d7b3dSmrg /* Source modifiers are ignored for extended math instructions. */ 1275428d7b3dSmrg assert(!src0.negate); 1276428d7b3dSmrg assert(!src0.abs); 1277428d7b3dSmrg assert(!src1.negate); 1278428d7b3dSmrg assert(!src1.abs); 1279428d7b3dSmrg 1280428d7b3dSmrg /* Math is the same ISA format as other opcodes, except that CondModifier 1281428d7b3dSmrg * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1282428d7b3dSmrg */ 1283428d7b3dSmrg insn->header.destreg__conditionalmod = function; 1284428d7b3dSmrg 1285428d7b3dSmrg brw_set_dest(p, insn, dest); 1286428d7b3dSmrg brw_set_src0(p, insn, src0); 1287428d7b3dSmrg brw_set_src1(p, insn, src1); 1288428d7b3dSmrg} 1289428d7b3dSmrg 1290428d7b3dSmrg/** 1291428d7b3dSmrg * Extended math function, float[16]. 1292428d7b3dSmrg * Use 2 send instructions. 1293428d7b3dSmrg */ 1294428d7b3dSmrgvoid brw_math_16(struct brw_compile *p, 1295428d7b3dSmrg struct brw_reg dest, 1296428d7b3dSmrg unsigned function, 1297428d7b3dSmrg unsigned saturate, 1298428d7b3dSmrg unsigned msg_reg_nr, 1299428d7b3dSmrg struct brw_reg src, 1300428d7b3dSmrg unsigned precision) 1301428d7b3dSmrg{ 1302428d7b3dSmrg struct brw_instruction *insn; 1303428d7b3dSmrg 1304428d7b3dSmrg if (p->gen >= 060) { 1305428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_MATH); 1306428d7b3dSmrg 1307428d7b3dSmrg /* Math is the same ISA format as other opcodes, except that CondModifier 1308428d7b3dSmrg * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1309428d7b3dSmrg */ 1310428d7b3dSmrg insn->header.destreg__conditionalmod = function; 1311428d7b3dSmrg insn->header.saturate = saturate; 1312428d7b3dSmrg 1313428d7b3dSmrg /* Source modifiers are ignored for extended math instructions. */ 1314428d7b3dSmrg assert(!src.negate); 1315428d7b3dSmrg assert(!src.abs); 1316428d7b3dSmrg 1317428d7b3dSmrg brw_set_dest(p, insn, dest); 1318428d7b3dSmrg brw_set_src0(p, insn, src); 1319428d7b3dSmrg brw_set_src1(p, insn, brw_null_reg()); 1320428d7b3dSmrg return; 1321428d7b3dSmrg } 1322428d7b3dSmrg 1323428d7b3dSmrg /* First instruction: 1324428d7b3dSmrg */ 1325428d7b3dSmrg brw_push_insn_state(p); 1326428d7b3dSmrg brw_set_predicate_control_flag_value(p, 0xff); 1327428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1328428d7b3dSmrg 1329428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1330428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 1331428d7b3dSmrg 1332428d7b3dSmrg brw_set_dest(p, insn, dest); 1333428d7b3dSmrg brw_set_src0(p, insn, src); 1334428d7b3dSmrg brw_set_math_message(p, insn, function, 1335428d7b3dSmrg BRW_MATH_INTEGER_UNSIGNED, 1336428d7b3dSmrg precision, 1337428d7b3dSmrg saturate, 1338428d7b3dSmrg BRW_MATH_DATA_VECTOR); 1339428d7b3dSmrg 1340428d7b3dSmrg /* Second instruction: 1341428d7b3dSmrg */ 1342428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1343428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1344428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr+1; 1345428d7b3dSmrg 1346428d7b3dSmrg brw_set_dest(p, insn, __offset(dest,1)); 1347428d7b3dSmrg brw_set_src0(p, insn, src); 1348428d7b3dSmrg brw_set_math_message(p, insn, function, 1349428d7b3dSmrg BRW_MATH_INTEGER_UNSIGNED, 1350428d7b3dSmrg precision, 1351428d7b3dSmrg saturate, 1352428d7b3dSmrg BRW_MATH_DATA_VECTOR); 1353428d7b3dSmrg 1354428d7b3dSmrg brw_pop_insn_state(p); 1355428d7b3dSmrg} 1356428d7b3dSmrg 1357428d7b3dSmrg/** 1358428d7b3dSmrg * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1359428d7b3dSmrg * using a constant offset per channel. 1360428d7b3dSmrg * 1361428d7b3dSmrg * The offset must be aligned to oword size (16 bytes). Used for 1362428d7b3dSmrg * register spilling. 1363428d7b3dSmrg */ 1364428d7b3dSmrgvoid brw_oword_block_write_scratch(struct brw_compile *p, 1365428d7b3dSmrg struct brw_reg mrf, 1366428d7b3dSmrg int num_regs, 1367428d7b3dSmrg unsigned offset) 1368428d7b3dSmrg{ 1369428d7b3dSmrg uint32_t msg_control, msg_type; 1370428d7b3dSmrg int mlen; 1371428d7b3dSmrg 1372428d7b3dSmrg if (p->gen >= 060) 1373428d7b3dSmrg offset /= 16; 1374428d7b3dSmrg 1375428d7b3dSmrg mrf = __retype_ud(mrf); 1376428d7b3dSmrg 1377428d7b3dSmrg if (num_regs == 1) { 1378428d7b3dSmrg msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1379428d7b3dSmrg mlen = 2; 1380428d7b3dSmrg } else { 1381428d7b3dSmrg msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1382428d7b3dSmrg mlen = 3; 1383428d7b3dSmrg } 1384428d7b3dSmrg 1385428d7b3dSmrg /* Set up the message header. This is g0, with g0.2 filled with 1386428d7b3dSmrg * the offset. We don't want to leave our offset around in g0 or 1387428d7b3dSmrg * it'll screw up texture samples, so set it up inside the message 1388428d7b3dSmrg * reg. 1389428d7b3dSmrg */ 1390428d7b3dSmrg { 1391428d7b3dSmrg brw_push_insn_state(p); 1392428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1393428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1394428d7b3dSmrg 1395428d7b3dSmrg brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1396428d7b3dSmrg 1397428d7b3dSmrg /* set message header global offset field (reg 0, element 2) */ 1398428d7b3dSmrg brw_MOV(p, 1399428d7b3dSmrg __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), 1400428d7b3dSmrg brw_imm_ud(offset)); 1401428d7b3dSmrg 1402428d7b3dSmrg brw_pop_insn_state(p); 1403428d7b3dSmrg } 1404428d7b3dSmrg 1405428d7b3dSmrg { 1406428d7b3dSmrg struct brw_reg dest; 1407428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1408428d7b3dSmrg int send_commit_msg; 1409428d7b3dSmrg struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); 1410428d7b3dSmrg 1411428d7b3dSmrg if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1412428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1413428d7b3dSmrg src_header = vec16(src_header); 1414428d7b3dSmrg } 1415428d7b3dSmrg assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1416428d7b3dSmrg insn->header.destreg__conditionalmod = mrf.nr; 1417428d7b3dSmrg 1418428d7b3dSmrg /* Until gen6, writes followed by reads from the same location 1419428d7b3dSmrg * are not guaranteed to be ordered unless write_commit is set. 1420428d7b3dSmrg * If set, then a no-op write is issued to the destination 1421428d7b3dSmrg * register to set a dependency, and a read from the destination 1422428d7b3dSmrg * can be used to ensure the ordering. 1423428d7b3dSmrg * 1424428d7b3dSmrg * For gen6, only writes between different threads need ordering 1425428d7b3dSmrg * protection. Our use of DP writes is all about register 1426428d7b3dSmrg * spilling within a thread. 1427428d7b3dSmrg */ 1428428d7b3dSmrg if (p->gen >= 060) { 1429428d7b3dSmrg dest = __retype_uw(vec16(brw_null_reg())); 1430428d7b3dSmrg send_commit_msg = 0; 1431428d7b3dSmrg } else { 1432428d7b3dSmrg dest = src_header; 1433428d7b3dSmrg send_commit_msg = 1; 1434428d7b3dSmrg } 1435428d7b3dSmrg 1436428d7b3dSmrg brw_set_dest(p, insn, dest); 1437428d7b3dSmrg if (p->gen >= 060) { 1438428d7b3dSmrg brw_set_src0(p, insn, mrf); 1439428d7b3dSmrg } else { 1440428d7b3dSmrg brw_set_src0(p, insn, brw_null_reg()); 1441428d7b3dSmrg } 1442428d7b3dSmrg 1443428d7b3dSmrg if (p->gen >= 060) 1444428d7b3dSmrg msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1445428d7b3dSmrg else 1446428d7b3dSmrg msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1447428d7b3dSmrg 1448428d7b3dSmrg brw_set_dp_write_message(p, 1449428d7b3dSmrg insn, 1450428d7b3dSmrg 255, /* binding table index (255=stateless) */ 1451428d7b3dSmrg msg_control, 1452428d7b3dSmrg msg_type, 1453428d7b3dSmrg mlen, 1454428d7b3dSmrg true, /* header_present */ 1455428d7b3dSmrg 0, /* pixel scoreboard */ 1456428d7b3dSmrg send_commit_msg, /* response_length */ 1457428d7b3dSmrg 0, /* eot */ 1458428d7b3dSmrg send_commit_msg); 1459428d7b3dSmrg } 1460428d7b3dSmrg} 1461428d7b3dSmrg 1462428d7b3dSmrg 1463428d7b3dSmrg/** 1464428d7b3dSmrg * Read a block of owords (half a GRF each) from the scratch buffer 1465428d7b3dSmrg * using a constant index per channel. 1466428d7b3dSmrg * 1467428d7b3dSmrg * Offset must be aligned to oword size (16 bytes). Used for register 1468428d7b3dSmrg * spilling. 1469428d7b3dSmrg */ 1470428d7b3dSmrgvoid 1471428d7b3dSmrgbrw_oword_block_read_scratch(struct brw_compile *p, 1472428d7b3dSmrg struct brw_reg dest, 1473428d7b3dSmrg struct brw_reg mrf, 1474428d7b3dSmrg int num_regs, 1475428d7b3dSmrg unsigned offset) 1476428d7b3dSmrg{ 1477428d7b3dSmrg uint32_t msg_control; 1478428d7b3dSmrg int rlen; 1479428d7b3dSmrg 1480428d7b3dSmrg if (p->gen >= 060) 1481428d7b3dSmrg offset /= 16; 1482428d7b3dSmrg 1483428d7b3dSmrg mrf = __retype_ud(mrf); 1484428d7b3dSmrg dest = __retype_uw(dest); 1485428d7b3dSmrg 1486428d7b3dSmrg if (num_regs == 1) { 1487428d7b3dSmrg msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1488428d7b3dSmrg rlen = 1; 1489428d7b3dSmrg } else { 1490428d7b3dSmrg msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1491428d7b3dSmrg rlen = 2; 1492428d7b3dSmrg } 1493428d7b3dSmrg 1494428d7b3dSmrg { 1495428d7b3dSmrg brw_push_insn_state(p); 1496428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1497428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1498428d7b3dSmrg 1499428d7b3dSmrg brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1500428d7b3dSmrg 1501428d7b3dSmrg /* set message header global offset field (reg 0, element 2) */ 1502428d7b3dSmrg brw_MOV(p, 1503428d7b3dSmrg __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), 1504428d7b3dSmrg brw_imm_ud(offset)); 1505428d7b3dSmrg 1506428d7b3dSmrg brw_pop_insn_state(p); 1507428d7b3dSmrg } 1508428d7b3dSmrg 1509428d7b3dSmrg { 1510428d7b3dSmrg struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1511428d7b3dSmrg 1512428d7b3dSmrg assert(insn->header.predicate_control == 0); 1513428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1514428d7b3dSmrg insn->header.destreg__conditionalmod = mrf.nr; 1515428d7b3dSmrg 1516428d7b3dSmrg brw_set_dest(p, insn, dest); /* UW? */ 1517428d7b3dSmrg if (p->gen >= 060) { 1518428d7b3dSmrg brw_set_src0(p, insn, mrf); 1519428d7b3dSmrg } else { 1520428d7b3dSmrg brw_set_src0(p, insn, brw_null_reg()); 1521428d7b3dSmrg } 1522428d7b3dSmrg 1523428d7b3dSmrg brw_set_dp_read_message(p, 1524428d7b3dSmrg insn, 1525428d7b3dSmrg 255, /* binding table index (255=stateless) */ 1526428d7b3dSmrg msg_control, 1527428d7b3dSmrg BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1528428d7b3dSmrg BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1529428d7b3dSmrg 1, /* msg_length */ 1530428d7b3dSmrg rlen); 1531428d7b3dSmrg } 1532428d7b3dSmrg} 1533428d7b3dSmrg 1534428d7b3dSmrg/** 1535428d7b3dSmrg * Read a float[4] vector from the data port Data Cache (const buffer). 1536428d7b3dSmrg * Location (in buffer) should be a multiple of 16. 1537428d7b3dSmrg * Used for fetching shader constants. 1538428d7b3dSmrg */ 1539428d7b3dSmrgvoid brw_oword_block_read(struct brw_compile *p, 1540428d7b3dSmrg struct brw_reg dest, 1541428d7b3dSmrg struct brw_reg mrf, 1542428d7b3dSmrg uint32_t offset, 1543428d7b3dSmrg uint32_t bind_table_index) 1544428d7b3dSmrg{ 1545428d7b3dSmrg struct brw_instruction *insn; 1546428d7b3dSmrg 1547428d7b3dSmrg /* On newer hardware, offset is in units of owords. */ 1548428d7b3dSmrg if (p->gen >= 060) 1549428d7b3dSmrg offset /= 16; 1550428d7b3dSmrg 1551428d7b3dSmrg mrf = __retype_ud(mrf); 1552428d7b3dSmrg 1553428d7b3dSmrg brw_push_insn_state(p); 1554428d7b3dSmrg brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1555428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1556428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1557428d7b3dSmrg 1558428d7b3dSmrg brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1559428d7b3dSmrg 1560428d7b3dSmrg /* set message header global offset field (reg 0, element 2) */ 1561428d7b3dSmrg brw_MOV(p, 1562428d7b3dSmrg __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), 1563428d7b3dSmrg brw_imm_ud(offset)); 1564428d7b3dSmrg 1565428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1566428d7b3dSmrg insn->header.destreg__conditionalmod = mrf.nr; 1567428d7b3dSmrg 1568428d7b3dSmrg /* cast dest to a uword[8] vector */ 1569428d7b3dSmrg dest = __retype_uw(vec8(dest)); 1570428d7b3dSmrg 1571428d7b3dSmrg brw_set_dest(p, insn, dest); 1572428d7b3dSmrg if (p->gen >= 060) { 1573428d7b3dSmrg brw_set_src0(p, insn, mrf); 1574428d7b3dSmrg } else { 1575428d7b3dSmrg brw_set_src0(p, insn, brw_null_reg()); 1576428d7b3dSmrg } 1577428d7b3dSmrg 1578428d7b3dSmrg brw_set_dp_read_message(p, 1579428d7b3dSmrg insn, 1580428d7b3dSmrg bind_table_index, 1581428d7b3dSmrg BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1582428d7b3dSmrg BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1583428d7b3dSmrg BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1584428d7b3dSmrg 1, /* msg_length */ 1585428d7b3dSmrg 1); /* response_length (1 reg, 2 owords!) */ 1586428d7b3dSmrg 1587428d7b3dSmrg brw_pop_insn_state(p); 1588428d7b3dSmrg} 1589428d7b3dSmrg 1590428d7b3dSmrg/** 1591428d7b3dSmrg * Read a set of dwords from the data port Data Cache (const buffer). 1592428d7b3dSmrg * 1593428d7b3dSmrg * Location (in buffer) appears as UD offsets in the register after 1594428d7b3dSmrg * the provided mrf header reg. 1595428d7b3dSmrg */ 1596428d7b3dSmrgvoid brw_dword_scattered_read(struct brw_compile *p, 1597428d7b3dSmrg struct brw_reg dest, 1598428d7b3dSmrg struct brw_reg mrf, 1599428d7b3dSmrg uint32_t bind_table_index) 1600428d7b3dSmrg{ 1601428d7b3dSmrg struct brw_instruction *insn; 1602428d7b3dSmrg 1603428d7b3dSmrg mrf = __retype_ud(mrf); 1604428d7b3dSmrg 1605428d7b3dSmrg brw_push_insn_state(p); 1606428d7b3dSmrg brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1607428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1608428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1609428d7b3dSmrg brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1610428d7b3dSmrg brw_pop_insn_state(p); 1611428d7b3dSmrg 1612428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1613428d7b3dSmrg insn->header.destreg__conditionalmod = mrf.nr; 1614428d7b3dSmrg 1615428d7b3dSmrg /* cast dest to a uword[8] vector */ 1616428d7b3dSmrg dest = __retype_uw(vec8(dest)); 1617428d7b3dSmrg 1618428d7b3dSmrg brw_set_dest(p, insn, dest); 1619428d7b3dSmrg brw_set_src0(p, insn, brw_null_reg()); 1620428d7b3dSmrg 1621428d7b3dSmrg brw_set_dp_read_message(p, 1622428d7b3dSmrg insn, 1623428d7b3dSmrg bind_table_index, 1624428d7b3dSmrg BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1625428d7b3dSmrg BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1626428d7b3dSmrg BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1627428d7b3dSmrg 2, /* msg_length */ 1628428d7b3dSmrg 1); /* response_length */ 1629428d7b3dSmrg} 1630428d7b3dSmrg 1631428d7b3dSmrg/** 1632428d7b3dSmrg * Read float[4] constant(s) from VS constant buffer. 1633428d7b3dSmrg * For relative addressing, two float[4] constants will be read into 'dest'. 1634428d7b3dSmrg * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1635428d7b3dSmrg */ 1636428d7b3dSmrgvoid brw_dp_READ_4_vs(struct brw_compile *p, 1637428d7b3dSmrg struct brw_reg dest, 1638428d7b3dSmrg unsigned location, 1639428d7b3dSmrg unsigned bind_table_index) 1640428d7b3dSmrg{ 1641428d7b3dSmrg struct brw_instruction *insn; 1642428d7b3dSmrg unsigned msg_reg_nr = 1; 1643428d7b3dSmrg 1644428d7b3dSmrg if (p->gen >= 060) 1645428d7b3dSmrg location /= 16; 1646428d7b3dSmrg 1647428d7b3dSmrg /* Setup MRF[1] with location/offset into const buffer */ 1648428d7b3dSmrg brw_push_insn_state(p); 1649428d7b3dSmrg brw_set_access_mode(p, BRW_ALIGN_1); 1650428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1651428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1652428d7b3dSmrg brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1653428d7b3dSmrg brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), 1654428d7b3dSmrg brw_imm_ud(location)); 1655428d7b3dSmrg brw_pop_insn_state(p); 1656428d7b3dSmrg 1657428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1658428d7b3dSmrg 1659428d7b3dSmrg insn->header.predicate_control = BRW_PREDICATE_NONE; 1660428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1661428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 1662428d7b3dSmrg insn->header.mask_control = BRW_MASK_DISABLE; 1663428d7b3dSmrg 1664428d7b3dSmrg brw_set_dest(p, insn, dest); 1665428d7b3dSmrg if (p->gen >= 060) { 1666428d7b3dSmrg brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 1667428d7b3dSmrg } else { 1668428d7b3dSmrg brw_set_src0(p, insn, brw_null_reg()); 1669428d7b3dSmrg } 1670428d7b3dSmrg 1671428d7b3dSmrg brw_set_dp_read_message(p, 1672428d7b3dSmrg insn, 1673428d7b3dSmrg bind_table_index, 1674428d7b3dSmrg 0, 1675428d7b3dSmrg BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1676428d7b3dSmrg BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1677428d7b3dSmrg 1, /* msg_length */ 1678428d7b3dSmrg 1); /* response_length (1 Oword) */ 1679428d7b3dSmrg} 1680428d7b3dSmrg 1681428d7b3dSmrg/** 1682428d7b3dSmrg * Read a float[4] constant per vertex from VS constant buffer, with 1683428d7b3dSmrg * relative addressing. 1684428d7b3dSmrg */ 1685428d7b3dSmrgvoid brw_dp_READ_4_vs_relative(struct brw_compile *p, 1686428d7b3dSmrg struct brw_reg dest, 1687428d7b3dSmrg struct brw_reg addr_reg, 1688428d7b3dSmrg unsigned offset, 1689428d7b3dSmrg unsigned bind_table_index) 1690428d7b3dSmrg{ 1691428d7b3dSmrg struct brw_reg src = brw_vec8_grf(0, 0); 1692428d7b3dSmrg struct brw_instruction *insn; 1693428d7b3dSmrg int msg_type; 1694428d7b3dSmrg 1695428d7b3dSmrg /* Setup MRF[1] with offset into const buffer */ 1696428d7b3dSmrg brw_push_insn_state(p); 1697428d7b3dSmrg brw_set_access_mode(p, BRW_ALIGN_1); 1698428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1699428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1700428d7b3dSmrg brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1701428d7b3dSmrg 1702428d7b3dSmrg /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1703428d7b3dSmrg * fields ignored. 1704428d7b3dSmrg */ 1705428d7b3dSmrg brw_ADD(p, __retype_d(brw_message_reg(1)), 1706428d7b3dSmrg addr_reg, brw_imm_d(offset)); 1707428d7b3dSmrg brw_pop_insn_state(p); 1708428d7b3dSmrg 1709428d7b3dSmrg gen6_resolve_implied_move(p, &src, 0); 1710428d7b3dSmrg 1711428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1712428d7b3dSmrg insn->header.predicate_control = BRW_PREDICATE_NONE; 1713428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1714428d7b3dSmrg insn->header.destreg__conditionalmod = 0; 1715428d7b3dSmrg insn->header.mask_control = BRW_MASK_DISABLE; 1716428d7b3dSmrg 1717428d7b3dSmrg brw_set_dest(p, insn, dest); 1718428d7b3dSmrg brw_set_src0(p, insn, src); 1719428d7b3dSmrg 1720428d7b3dSmrg if (p->gen >= 060) 1721428d7b3dSmrg msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1722428d7b3dSmrg else if (p->gen >= 045) 1723428d7b3dSmrg msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1724428d7b3dSmrg else 1725428d7b3dSmrg msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1726428d7b3dSmrg 1727428d7b3dSmrg brw_set_dp_read_message(p, 1728428d7b3dSmrg insn, 1729428d7b3dSmrg bind_table_index, 1730428d7b3dSmrg BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1731428d7b3dSmrg msg_type, 1732428d7b3dSmrg BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1733428d7b3dSmrg 2, /* msg_length */ 1734428d7b3dSmrg 1); /* response_length */ 1735428d7b3dSmrg} 1736428d7b3dSmrg 1737428d7b3dSmrgvoid brw_fb_WRITE(struct brw_compile *p, 1738428d7b3dSmrg int dispatch_width, 1739428d7b3dSmrg unsigned msg_reg_nr, 1740428d7b3dSmrg struct brw_reg src0, 1741428d7b3dSmrg unsigned msg_control, 1742428d7b3dSmrg unsigned binding_table_index, 1743428d7b3dSmrg unsigned msg_length, 1744428d7b3dSmrg unsigned response_length, 1745428d7b3dSmrg bool eot, 1746428d7b3dSmrg bool header_present) 1747428d7b3dSmrg{ 1748428d7b3dSmrg struct brw_instruction *insn; 1749428d7b3dSmrg unsigned msg_type; 1750428d7b3dSmrg struct brw_reg dest; 1751428d7b3dSmrg 1752428d7b3dSmrg if (dispatch_width == 16) 1753428d7b3dSmrg dest = __retype_uw(vec16(brw_null_reg())); 1754428d7b3dSmrg else 1755428d7b3dSmrg dest = __retype_uw(vec8(brw_null_reg())); 1756428d7b3dSmrg 1757428d7b3dSmrg if (p->gen >= 060 && binding_table_index == 0) { 1758428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SENDC); 1759428d7b3dSmrg } else { 1760428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1761428d7b3dSmrg } 1762428d7b3dSmrg /* The execution mask is ignored for render target writes. */ 1763428d7b3dSmrg insn->header.predicate_control = 0; 1764428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1765428d7b3dSmrg 1766428d7b3dSmrg if (p->gen >= 060) { 1767428d7b3dSmrg /* headerless version, just submit color payload */ 1768428d7b3dSmrg src0 = brw_message_reg(msg_reg_nr); 1769428d7b3dSmrg 1770428d7b3dSmrg msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1771428d7b3dSmrg } else { 1772428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 1773428d7b3dSmrg 1774428d7b3dSmrg msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1775428d7b3dSmrg } 1776428d7b3dSmrg 1777428d7b3dSmrg brw_set_dest(p, insn, dest); 1778428d7b3dSmrg brw_set_src0(p, insn, src0); 1779428d7b3dSmrg brw_set_dp_write_message(p, 1780428d7b3dSmrg insn, 1781428d7b3dSmrg binding_table_index, 1782428d7b3dSmrg msg_control, 1783428d7b3dSmrg msg_type, 1784428d7b3dSmrg msg_length, 1785428d7b3dSmrg header_present, 1786428d7b3dSmrg eot, 1787428d7b3dSmrg response_length, 1788428d7b3dSmrg eot, 1789428d7b3dSmrg 0 /* send_commit_msg */); 1790428d7b3dSmrg} 1791428d7b3dSmrg 1792428d7b3dSmrg/** 1793428d7b3dSmrg * Texture sample instruction. 1794428d7b3dSmrg * Note: the msg_type plus msg_length values determine exactly what kind 1795428d7b3dSmrg * of sampling operation is performed. See volume 4, page 161 of docs. 1796428d7b3dSmrg */ 1797428d7b3dSmrgvoid brw_SAMPLE(struct brw_compile *p, 1798428d7b3dSmrg struct brw_reg dest, 1799428d7b3dSmrg unsigned msg_reg_nr, 1800428d7b3dSmrg struct brw_reg src0, 1801428d7b3dSmrg unsigned binding_table_index, 1802428d7b3dSmrg unsigned sampler, 1803428d7b3dSmrg unsigned writemask, 1804428d7b3dSmrg unsigned msg_type, 1805428d7b3dSmrg unsigned response_length, 1806428d7b3dSmrg unsigned msg_length, 1807428d7b3dSmrg bool header_present, 1808428d7b3dSmrg unsigned simd_mode) 1809428d7b3dSmrg{ 1810428d7b3dSmrg assert(writemask); 1811428d7b3dSmrg 1812428d7b3dSmrg if (p->gen < 050 || writemask != WRITEMASK_XYZW) { 1813428d7b3dSmrg struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1814428d7b3dSmrg 1815428d7b3dSmrg writemask = ~writemask & WRITEMASK_XYZW; 1816428d7b3dSmrg 1817428d7b3dSmrg brw_push_insn_state(p); 1818428d7b3dSmrg 1819428d7b3dSmrg brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1820428d7b3dSmrg brw_set_mask_control(p, BRW_MASK_DISABLE); 1821428d7b3dSmrg 1822428d7b3dSmrg brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); 1823428d7b3dSmrg brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); 1824428d7b3dSmrg 1825428d7b3dSmrg brw_pop_insn_state(p); 1826428d7b3dSmrg 1827428d7b3dSmrg src0 = __retype_uw(brw_null_reg()); 1828428d7b3dSmrg } 1829428d7b3dSmrg 1830428d7b3dSmrg { 1831428d7b3dSmrg struct brw_instruction *insn; 1832428d7b3dSmrg 1833428d7b3dSmrg gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1834428d7b3dSmrg 1835428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1836428d7b3dSmrg insn->header.predicate_control = 0; /* XXX */ 1837428d7b3dSmrg insn->header.compression_control = BRW_COMPRESSION_NONE; 1838428d7b3dSmrg if (p->gen < 060) 1839428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 1840428d7b3dSmrg 1841428d7b3dSmrg brw_set_dest(p, insn, dest); 1842428d7b3dSmrg brw_set_src0(p, insn, src0); 1843428d7b3dSmrg brw_set_sampler_message(p, insn, 1844428d7b3dSmrg binding_table_index, 1845428d7b3dSmrg sampler, 1846428d7b3dSmrg msg_type, 1847428d7b3dSmrg response_length, 1848428d7b3dSmrg msg_length, 1849428d7b3dSmrg header_present, 1850428d7b3dSmrg simd_mode); 1851428d7b3dSmrg } 1852428d7b3dSmrg} 1853428d7b3dSmrg 1854428d7b3dSmrg/* All these variables are pretty confusing - we might be better off 1855428d7b3dSmrg * using bitmasks and macros for this, in the old style. Or perhaps 1856428d7b3dSmrg * just having the caller instantiate the fields in dword3 itself. 1857428d7b3dSmrg */ 1858428d7b3dSmrgvoid brw_urb_WRITE(struct brw_compile *p, 1859428d7b3dSmrg struct brw_reg dest, 1860428d7b3dSmrg unsigned msg_reg_nr, 1861428d7b3dSmrg struct brw_reg src0, 1862428d7b3dSmrg bool allocate, 1863428d7b3dSmrg bool used, 1864428d7b3dSmrg unsigned msg_length, 1865428d7b3dSmrg unsigned response_length, 1866428d7b3dSmrg bool eot, 1867428d7b3dSmrg bool writes_complete, 1868428d7b3dSmrg unsigned offset, 1869428d7b3dSmrg unsigned swizzle) 1870428d7b3dSmrg{ 1871428d7b3dSmrg struct brw_instruction *insn; 1872428d7b3dSmrg 1873428d7b3dSmrg gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1874428d7b3dSmrg 1875428d7b3dSmrg if (p->gen >= 070) { 1876428d7b3dSmrg /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 1877428d7b3dSmrg brw_push_insn_state(p); 1878428d7b3dSmrg brw_set_access_mode(p, BRW_ALIGN_1); 1879428d7b3dSmrg brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), 1880428d7b3dSmrg __retype_ud(brw_vec1_grf(0, 5)), 1881428d7b3dSmrg brw_imm_ud(0xff00)); 1882428d7b3dSmrg brw_pop_insn_state(p); 1883428d7b3dSmrg } 1884428d7b3dSmrg 1885428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1886428d7b3dSmrg 1887428d7b3dSmrg assert(msg_length < BRW_MAX_MRF); 1888428d7b3dSmrg 1889428d7b3dSmrg brw_set_dest(p, insn, dest); 1890428d7b3dSmrg brw_set_src0(p, insn, src0); 1891428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0)); 1892428d7b3dSmrg 1893428d7b3dSmrg if (p->gen <= 060) 1894428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 1895428d7b3dSmrg 1896428d7b3dSmrg brw_set_urb_message(p, 1897428d7b3dSmrg insn, 1898428d7b3dSmrg allocate, 1899428d7b3dSmrg used, 1900428d7b3dSmrg msg_length, 1901428d7b3dSmrg response_length, 1902428d7b3dSmrg eot, 1903428d7b3dSmrg writes_complete, 1904428d7b3dSmrg offset, 1905428d7b3dSmrg swizzle); 1906428d7b3dSmrg} 1907428d7b3dSmrg 1908428d7b3dSmrgstatic int 1909428d7b3dSmrgbrw_find_next_block_end(struct brw_compile *p, int start) 1910428d7b3dSmrg{ 1911428d7b3dSmrg int ip; 1912428d7b3dSmrg 1913428d7b3dSmrg for (ip = start + 1; ip < p->nr_insn; ip++) { 1914428d7b3dSmrg struct brw_instruction *insn = &p->store[ip]; 1915428d7b3dSmrg 1916428d7b3dSmrg switch (insn->header.opcode) { 1917428d7b3dSmrg case BRW_OPCODE_ENDIF: 1918428d7b3dSmrg case BRW_OPCODE_ELSE: 1919428d7b3dSmrg case BRW_OPCODE_WHILE: 1920428d7b3dSmrg return ip; 1921428d7b3dSmrg } 1922428d7b3dSmrg } 1923428d7b3dSmrg assert(!"not reached"); 1924428d7b3dSmrg return start + 1; 1925428d7b3dSmrg} 1926428d7b3dSmrg 1927428d7b3dSmrg/* There is no DO instruction on gen6, so to find the end of the loop 1928428d7b3dSmrg * we have to see if the loop is jumping back before our start 1929428d7b3dSmrg * instruction. 1930428d7b3dSmrg */ 1931428d7b3dSmrgstatic int 1932428d7b3dSmrgbrw_find_loop_end(struct brw_compile *p, int start) 1933428d7b3dSmrg{ 1934428d7b3dSmrg int ip; 1935428d7b3dSmrg int br = 2; 1936428d7b3dSmrg 1937428d7b3dSmrg for (ip = start + 1; ip < p->nr_insn; ip++) { 1938428d7b3dSmrg struct brw_instruction *insn = &p->store[ip]; 1939428d7b3dSmrg 1940428d7b3dSmrg if (insn->header.opcode == BRW_OPCODE_WHILE) { 1941428d7b3dSmrg int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count 1942428d7b3dSmrg : insn->bits3.break_cont.jip; 1943428d7b3dSmrg if (ip + jip / br <= start) 1944428d7b3dSmrg return ip; 1945428d7b3dSmrg } 1946428d7b3dSmrg } 1947428d7b3dSmrg assert(!"not reached"); 1948428d7b3dSmrg return start + 1; 1949428d7b3dSmrg} 1950428d7b3dSmrg 1951428d7b3dSmrg/* After program generation, go back and update the UIP and JIP of 1952428d7b3dSmrg * BREAK and CONT instructions to their correct locations. 1953428d7b3dSmrg */ 1954428d7b3dSmrgvoid 1955428d7b3dSmrgbrw_set_uip_jip(struct brw_compile *p) 1956428d7b3dSmrg{ 1957428d7b3dSmrg int ip; 1958428d7b3dSmrg int br = 2; 1959428d7b3dSmrg 1960428d7b3dSmrg if (p->gen <= 060) 1961428d7b3dSmrg return; 1962428d7b3dSmrg 1963428d7b3dSmrg for (ip = 0; ip < p->nr_insn; ip++) { 1964428d7b3dSmrg struct brw_instruction *insn = &p->store[ip]; 1965428d7b3dSmrg 1966428d7b3dSmrg switch (insn->header.opcode) { 1967428d7b3dSmrg case BRW_OPCODE_BREAK: 1968428d7b3dSmrg insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 1969428d7b3dSmrg /* Gen7 UIP points to WHILE; Gen6 points just after it */ 1970428d7b3dSmrg insn->bits3.break_cont.uip = 1971428d7b3dSmrg br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0)); 1972428d7b3dSmrg break; 1973428d7b3dSmrg case BRW_OPCODE_CONTINUE: 1974428d7b3dSmrg insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 1975428d7b3dSmrg insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); 1976428d7b3dSmrg 1977428d7b3dSmrg assert(insn->bits3.break_cont.uip != 0); 1978428d7b3dSmrg assert(insn->bits3.break_cont.jip != 0); 1979428d7b3dSmrg break; 1980428d7b3dSmrg } 1981428d7b3dSmrg } 1982428d7b3dSmrg} 1983428d7b3dSmrg 1984428d7b3dSmrgvoid brw_ff_sync(struct brw_compile *p, 1985428d7b3dSmrg struct brw_reg dest, 1986428d7b3dSmrg unsigned msg_reg_nr, 1987428d7b3dSmrg struct brw_reg src0, 1988428d7b3dSmrg bool allocate, 1989428d7b3dSmrg unsigned response_length, 1990428d7b3dSmrg bool eot) 1991428d7b3dSmrg{ 1992428d7b3dSmrg struct brw_instruction *insn; 1993428d7b3dSmrg 1994428d7b3dSmrg gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1995428d7b3dSmrg 1996428d7b3dSmrg insn = brw_next_insn(p, BRW_OPCODE_SEND); 1997428d7b3dSmrg brw_set_dest(p, insn, dest); 1998428d7b3dSmrg brw_set_src0(p, insn, src0); 1999428d7b3dSmrg brw_set_src1(p, insn, brw_imm_d(0)); 2000428d7b3dSmrg 2001428d7b3dSmrg if (p->gen < 060) 2002428d7b3dSmrg insn->header.destreg__conditionalmod = msg_reg_nr; 2003428d7b3dSmrg 2004428d7b3dSmrg brw_set_ff_sync_message(p, 2005428d7b3dSmrg insn, 2006428d7b3dSmrg allocate, 2007428d7b3dSmrg response_length, 2008428d7b3dSmrg eot); 2009428d7b3dSmrg} 2010