1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27/* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32#include "brw_eu.h" 33 34#include <string.h> 35#include <stdlib.h> 36 37#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0])) 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 unsigned msg_reg_nr) 65{ 66 if (p->gen < 060) 67 return; 68 69 if (src->file == BRW_MESSAGE_REGISTER_FILE) 70 return; 71 72 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 73 brw_push_insn_state(p); 74 brw_set_mask_control(p, BRW_MASK_DISABLE); 75 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 76 brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); 77 brw_pop_insn_state(p); 78 } 79 *src = brw_message_reg(msg_reg_nr); 80} 81 82static void 83gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 84{ 85 /* From the BSpec / ISA Reference / send - [DevIVB+]: 86 * "The send with EOT should use register space R112-R127 for <src>. This is 87 * to enable loading of a new thread into the same slot while the message 88 * with EOT for current thread is pending dispatch." 89 * 90 * Since we're pretending to have 16 MRFs anyway, we may as well use the 91 * registers required for messages with EOT. 92 */ 93 if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 94 reg->file = BRW_GENERAL_REGISTER_FILE; 95 reg->nr += 111; 96 } 97} 98 99void 100brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 101 struct brw_reg dest) 102{ 103 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 104 dest.file != BRW_MESSAGE_REGISTER_FILE) 105 assert(dest.nr < 128); 106 107 gen7_convert_mrf_to_grf(p, &dest); 108 109 insn->bits1.da1.dest_reg_file = dest.file; 110 insn->bits1.da1.dest_reg_type = dest.type; 111 insn->bits1.da1.dest_address_mode = dest.address_mode; 112 113 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 114 insn->bits1.da1.dest_reg_nr = dest.nr; 115 116 if (insn->header.access_mode == BRW_ALIGN_1) { 117 insn->bits1.da1.dest_subreg_nr = dest.subnr; 118 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 119 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 120 insn->bits1.da1.dest_horiz_stride = dest.hstride; 121 } else { 122 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 123 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 124 /* even ignored in da16, still need to set as '01' */ 125 insn->bits1.da16.dest_horiz_stride = 1; 126 } 127 } else { 128 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 129 130 /* These are different sizes in align1 vs align16: 131 */ 132 if (insn->header.access_mode == BRW_ALIGN_1) { 133 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 134 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 135 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 136 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 137 } 138 else { 139 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 140 /* even ignored in da16, still need to set as '01' */ 141 insn->bits1.ia16.dest_horiz_stride = 1; 142 } 143 } 144 145 guess_execution_size(p, insn, dest); 146} 147 148static const int reg_type_size[8] = { 149 [0] = 4, 150 [1] = 4, 151 [2] = 2, 152 [3] = 2, 153 [4] = 1, 154 [5] = 1, 155 [7] = 4 156}; 157 158static void 159validate_reg(struct brw_instruction *insn, struct brw_reg reg) 160{ 161 int hstride_for_reg[] = {0, 1, 2, 4}; 162 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 163 int width_for_reg[] = {1, 2, 4, 8, 16}; 164 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 165 int width, hstride, vstride, execsize; 166 167 if (reg.file == BRW_IMMEDIATE_VALUE) { 168 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 169 * mean the destination has to be 128-bit aligned and the 170 * destination horiz stride has to be a word. 171 */ 172 if (reg.type == BRW_REGISTER_TYPE_V) { 173 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 174 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 175 } 176 177 return; 178 } 179 180 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 181 reg.file == BRW_ARF_NULL) 182 return; 183 184 assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); 185 assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg)); 186 assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg)); 187 assert(insn->header.execution_size >= 0 && insn->header.execution_size < ARRAY_SIZE(execsize_for_reg)); 188 189 hstride = hstride_for_reg[reg.hstride]; 190 191 if (reg.vstride == 0xf) { 192 vstride = -1; 193 } else { 194 vstride = vstride_for_reg[reg.vstride]; 195 } 196 197 width = width_for_reg[reg.width]; 198 199 execsize = execsize_for_reg[insn->header.execution_size]; 200 201 /* Restrictions from 3.3.10: Register Region Restrictions. */ 202 /* 3. */ 203 assert(execsize >= width); 204 205 /* 4. */ 206 if (execsize == width && hstride != 0) { 207 assert(vstride == -1 || vstride == width * hstride); 208 } 209 210 /* 5. */ 211 if (execsize == width && hstride == 0) { 212 /* no restriction on vstride. */ 213 } 214 215 /* 6. */ 216 if (width == 1) { 217 assert(hstride == 0); 218 } 219 220 /* 7. */ 221 if (execsize == 1 && width == 1) { 222 assert(hstride == 0); 223 assert(vstride == 0); 224 } 225 226 /* 8. */ 227 if (vstride == 0 && hstride == 0) { 228 assert(width == 1); 229 } 230 231 /* 10. Check destination issues. */ 232} 233 234void 235brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 236 struct brw_reg reg) 237{ 238 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 239 assert(reg.nr < 128); 240 241 gen7_convert_mrf_to_grf(p, ®); 242 243 validate_reg(insn, reg); 244 245 insn->bits1.da1.src0_reg_file = reg.file; 246 insn->bits1.da1.src0_reg_type = reg.type; 247 insn->bits2.da1.src0_abs = reg.abs; 248 insn->bits2.da1.src0_negate = reg.negate; 249 insn->bits2.da1.src0_address_mode = reg.address_mode; 250 251 if (reg.file == BRW_IMMEDIATE_VALUE) { 252 insn->bits3.ud = reg.dw1.ud; 253 254 /* Required to set some fields in src1 as well: 255 */ 256 insn->bits1.da1.src1_reg_file = 0; /* arf */ 257 insn->bits1.da1.src1_reg_type = reg.type; 258 } else { 259 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 260 if (insn->header.access_mode == BRW_ALIGN_1) { 261 insn->bits2.da1.src0_subreg_nr = reg.subnr; 262 insn->bits2.da1.src0_reg_nr = reg.nr; 263 } else { 264 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 265 insn->bits2.da16.src0_reg_nr = reg.nr; 266 } 267 } else { 268 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 269 270 if (insn->header.access_mode == BRW_ALIGN_1) { 271 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 272 } else { 273 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 274 } 275 } 276 277 if (insn->header.access_mode == BRW_ALIGN_1) { 278 if (reg.width == BRW_WIDTH_1 && 279 insn->header.execution_size == BRW_EXECUTE_1) { 280 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 281 insn->bits2.da1.src0_width = BRW_WIDTH_1; 282 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 283 } else { 284 insn->bits2.da1.src0_horiz_stride = reg.hstride; 285 insn->bits2.da1.src0_width = reg.width; 286 insn->bits2.da1.src0_vert_stride = reg.vstride; 287 } 288 } else { 289 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 290 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 291 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 292 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 293 294 /* This is an oddity of the fact we're using the same 295 * descriptions for registers in align_16 as align_1: 296 */ 297 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 298 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 299 else 300 insn->bits2.da16.src0_vert_stride = reg.vstride; 301 } 302 } 303} 304 305void brw_set_src1(struct brw_compile *p, 306 struct brw_instruction *insn, 307 struct brw_reg reg) 308{ 309 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 310 assert(reg.nr < 128); 311 312 gen7_convert_mrf_to_grf(p, ®); 313 314 validate_reg(insn, reg); 315 316 insn->bits1.da1.src1_reg_file = reg.file; 317 insn->bits1.da1.src1_reg_type = reg.type; 318 insn->bits3.da1.src1_abs = reg.abs; 319 insn->bits3.da1.src1_negate = reg.negate; 320 321 /* Only src1 can be immediate in two-argument instructions. */ 322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 323 324 if (reg.file == BRW_IMMEDIATE_VALUE) { 325 insn->bits3.ud = reg.dw1.ud; 326 } else { 327 /* This is a hardware restriction, which may or may not be lifted 328 * in the future: 329 */ 330 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 331 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 332 333 if (insn->header.access_mode == BRW_ALIGN_1) { 334 insn->bits3.da1.src1_subreg_nr = reg.subnr; 335 insn->bits3.da1.src1_reg_nr = reg.nr; 336 } else { 337 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 338 insn->bits3.da16.src1_reg_nr = reg.nr; 339 } 340 341 if (insn->header.access_mode == BRW_ALIGN_1) { 342 if (reg.width == BRW_WIDTH_1 && 343 insn->header.execution_size == BRW_EXECUTE_1) { 344 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 345 insn->bits3.da1.src1_width = BRW_WIDTH_1; 346 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 347 } else { 348 insn->bits3.da1.src1_horiz_stride = reg.hstride; 349 insn->bits3.da1.src1_width = reg.width; 350 insn->bits3.da1.src1_vert_stride = reg.vstride; 351 } 352 } else { 353 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 354 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 355 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 356 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 357 358 /* This is an oddity of the fact we're using the same 359 * descriptions for registers in align_16 as align_1: 360 */ 361 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 362 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 363 else 364 insn->bits3.da16.src1_vert_stride = reg.vstride; 365 } 366 } 367} 368 369/** 370 * Set the Message Descriptor and Extended Message Descriptor fields 371 * for SEND messages. 372 * 373 * \note This zeroes out the Function Control bits, so it must be called 374 * \b before filling out any message-specific data. Callers can 375 * choose not to fill in irrelevant bits; they will be zero. 376 */ 377static void 378brw_set_message_descriptor(struct brw_compile *p, 379 struct brw_instruction *inst, 380 enum brw_message_target sfid, 381 unsigned msg_length, 382 unsigned response_length, 383 bool header_present, 384 bool end_of_thread) 385{ 386 brw_set_src1(p, inst, brw_imm_d(0)); 387 388 if (p->gen >= 050) { 389 inst->bits3.generic_gen5.header_present = header_present; 390 inst->bits3.generic_gen5.response_length = response_length; 391 inst->bits3.generic_gen5.msg_length = msg_length; 392 inst->bits3.generic_gen5.end_of_thread = end_of_thread; 393 394 if (p->gen >= 060) { 395 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 396 inst->header.destreg__conditionalmod = sfid; 397 } else { 398 /* Set Extended Message Descriptor (ex_desc) */ 399 inst->bits2.send_gen5.sfid = sfid; 400 inst->bits2.send_gen5.end_of_thread = end_of_thread; 401 } 402 } else { 403 inst->bits3.generic.response_length = response_length; 404 inst->bits3.generic.msg_length = msg_length; 405 inst->bits3.generic.msg_target = sfid; 406 inst->bits3.generic.end_of_thread = end_of_thread; 407 } 408} 409 410 411static void brw_set_math_message(struct brw_compile *p, 412 struct brw_instruction *insn, 413 unsigned function, 414 unsigned integer_type, 415 bool low_precision, 416 bool saturate, 417 unsigned dataType) 418{ 419 unsigned msg_length; 420 unsigned response_length; 421 422 /* Infer message length from the function */ 423 switch (function) { 424 case BRW_MATH_FUNCTION_POW: 425 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 426 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 427 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 428 msg_length = 2; 429 break; 430 default: 431 msg_length = 1; 432 break; 433 } 434 435 /* Infer response length from the function */ 436 switch (function) { 437 case BRW_MATH_FUNCTION_SINCOS: 438 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 439 response_length = 2; 440 break; 441 default: 442 response_length = 1; 443 break; 444 } 445 446 brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 447 msg_length, response_length, 448 false, false); 449 if (p->gen == 050) { 450 insn->bits3.math_gen5.function = function; 451 insn->bits3.math_gen5.int_type = integer_type; 452 insn->bits3.math_gen5.precision = low_precision; 453 insn->bits3.math_gen5.saturate = saturate; 454 insn->bits3.math_gen5.data_type = dataType; 455 insn->bits3.math_gen5.snapshot = 0; 456 } else { 457 insn->bits3.math.function = function; 458 insn->bits3.math.int_type = integer_type; 459 insn->bits3.math.precision = low_precision; 460 insn->bits3.math.saturate = saturate; 461 insn->bits3.math.data_type = dataType; 462 } 463} 464 465static void brw_set_ff_sync_message(struct brw_compile *p, 466 struct brw_instruction *insn, 467 bool allocate, 468 unsigned response_length, 469 bool end_of_thread) 470{ 471 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 472 1, response_length, 473 true, end_of_thread); 474 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 475 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 476 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 477 insn->bits3.urb_gen5.allocate = allocate; 478 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 479 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 480} 481 482static void brw_set_urb_message(struct brw_compile *p, 483 struct brw_instruction *insn, 484 bool allocate, 485 bool used, 486 unsigned msg_length, 487 unsigned response_length, 488 bool end_of_thread, 489 bool complete, 490 unsigned offset, 491 unsigned swizzle_control) 492{ 493 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 494 msg_length, response_length, true, end_of_thread); 495 if (p->gen >= 070) { 496 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 497 insn->bits3.urb_gen7.offset = offset; 498 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 499 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 500 /* per_slot_offset = 0 makes it ignore offsets in message header */ 501 insn->bits3.urb_gen7.per_slot_offset = 0; 502 insn->bits3.urb_gen7.complete = complete; 503 } else if (p->gen >= 050) { 504 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 505 insn->bits3.urb_gen5.offset = offset; 506 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 507 insn->bits3.urb_gen5.allocate = allocate; 508 insn->bits3.urb_gen5.used = used; /* ? */ 509 insn->bits3.urb_gen5.complete = complete; 510 } else { 511 insn->bits3.urb.opcode = 0; /* ? */ 512 insn->bits3.urb.offset = offset; 513 insn->bits3.urb.swizzle_control = swizzle_control; 514 insn->bits3.urb.allocate = allocate; 515 insn->bits3.urb.used = used; /* ? */ 516 insn->bits3.urb.complete = complete; 517 } 518} 519 520void 521brw_set_dp_write_message(struct brw_compile *p, 522 struct brw_instruction *insn, 523 unsigned binding_table_index, 524 unsigned msg_control, 525 unsigned msg_type, 526 unsigned msg_length, 527 bool header_present, 528 bool last_render_target, 529 unsigned response_length, 530 bool end_of_thread, 531 bool send_commit_msg) 532{ 533 unsigned sfid; 534 535 if (p->gen >= 070) { 536 /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 537 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 538 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 539 else 540 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 541 } else if (p->gen >= 060) { 542 /* Use the render cache for all write messages. */ 543 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 544 } else { 545 sfid = BRW_SFID_DATAPORT_WRITE; 546 } 547 548 brw_set_message_descriptor(p, insn, sfid, 549 msg_length, response_length, 550 header_present, end_of_thread); 551 552 if (p->gen >= 070) { 553 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 554 insn->bits3.gen7_dp.msg_control = msg_control; 555 insn->bits3.gen7_dp.last_render_target = last_render_target; 556 insn->bits3.gen7_dp.msg_type = msg_type; 557 } else if (p->gen >= 060) { 558 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 559 insn->bits3.gen6_dp.msg_control = msg_control; 560 insn->bits3.gen6_dp.last_render_target = last_render_target; 561 insn->bits3.gen6_dp.msg_type = msg_type; 562 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 563 } else if (p->gen >= 050) { 564 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 565 insn->bits3.dp_write_gen5.msg_control = msg_control; 566 insn->bits3.dp_write_gen5.last_render_target = last_render_target; 567 insn->bits3.dp_write_gen5.msg_type = msg_type; 568 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 569 } else { 570 insn->bits3.dp_write.binding_table_index = binding_table_index; 571 insn->bits3.dp_write.msg_control = msg_control; 572 insn->bits3.dp_write.last_render_target = last_render_target; 573 insn->bits3.dp_write.msg_type = msg_type; 574 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 575 } 576} 577 578void 579brw_set_dp_read_message(struct brw_compile *p, 580 struct brw_instruction *insn, 581 unsigned binding_table_index, 582 unsigned msg_control, 583 unsigned msg_type, 584 unsigned target_cache, 585 unsigned msg_length, 586 unsigned response_length) 587{ 588 unsigned sfid; 589 590 if (p->gen >= 070) { 591 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 592 } else if (p->gen >= 060) { 593 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 594 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 595 else 596 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 597 } else { 598 sfid = BRW_SFID_DATAPORT_READ; 599 } 600 601 brw_set_message_descriptor(p, insn, sfid, 602 msg_length, response_length, 603 true, false); 604 605 if (p->gen >= 070) { 606 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 607 insn->bits3.gen7_dp.msg_control = msg_control; 608 insn->bits3.gen7_dp.last_render_target = 0; 609 insn->bits3.gen7_dp.msg_type = msg_type; 610 } else if (p->gen >= 060) { 611 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 612 insn->bits3.gen6_dp.msg_control = msg_control; 613 insn->bits3.gen6_dp.last_render_target = 0; 614 insn->bits3.gen6_dp.msg_type = msg_type; 615 insn->bits3.gen6_dp.send_commit_msg = 0; 616 } else if (p->gen >= 050) { 617 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 618 insn->bits3.dp_read_gen5.msg_control = msg_control; 619 insn->bits3.dp_read_gen5.msg_type = msg_type; 620 insn->bits3.dp_read_gen5.target_cache = target_cache; 621 } else if (p->gen >= 045) { 622 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 623 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 624 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 625 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 626 } else { 627 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 628 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 629 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 630 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 631 } 632} 633 634static void brw_set_sampler_message(struct brw_compile *p, 635 struct brw_instruction *insn, 636 unsigned binding_table_index, 637 unsigned sampler, 638 unsigned msg_type, 639 unsigned response_length, 640 unsigned msg_length, 641 bool header_present, 642 unsigned simd_mode) 643{ 644 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, 645 msg_length, response_length, 646 header_present, false); 647 648 if (p->gen >= 070) { 649 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 650 insn->bits3.sampler_gen7.sampler = sampler; 651 insn->bits3.sampler_gen7.msg_type = msg_type; 652 insn->bits3.sampler_gen7.simd_mode = simd_mode; 653 } else if (p->gen >= 050) { 654 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 655 insn->bits3.sampler_gen5.sampler = sampler; 656 insn->bits3.sampler_gen5.msg_type = msg_type; 657 insn->bits3.sampler_gen5.simd_mode = simd_mode; 658 } else if (p->gen >= 045) { 659 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 660 insn->bits3.sampler_g4x.sampler = sampler; 661 insn->bits3.sampler_g4x.msg_type = msg_type; 662 } else { 663 insn->bits3.sampler.binding_table_index = binding_table_index; 664 insn->bits3.sampler.sampler = sampler; 665 insn->bits3.sampler.msg_type = msg_type; 666 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 667 } 668} 669 670 671void brw_NOP(struct brw_compile *p) 672{ 673 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); 674 brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); 675 brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); 676 brw_set_src1(p, insn, brw_imm_ud(0x0)); 677} 678 679/*********************************************************************** 680 * Comparisons, if/else/endif 681 */ 682 683static void 684push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 685{ 686 p->if_stack[p->if_stack_depth] = inst; 687 688 p->if_stack_depth++; 689 if (p->if_stack_array_size <= p->if_stack_depth) { 690 p->if_stack_array_size *= 2; 691 p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); 692 } 693} 694 695/* EU takes the value from the flag register and pushes it onto some 696 * sort of a stack (presumably merging with any flag value already on 697 * the stack). Within an if block, the flags at the top of the stack 698 * control execution on each channel of the unit, eg. on each of the 699 * 16 pixel values in our wm programs. 700 * 701 * When the matching 'else' instruction is reached (presumably by 702 * countdown of the instruction count patched in by our ELSE/ENDIF 703 * functions), the relevent flags are inverted. 704 * 705 * When the matching 'endif' instruction is reached, the flags are 706 * popped off. If the stack is now empty, normal execution resumes. 707 */ 708struct brw_instruction * 709brw_IF(struct brw_compile *p, unsigned execute_size) 710{ 711 struct brw_instruction *insn; 712 713 insn = brw_next_insn(p, BRW_OPCODE_IF); 714 715 /* Override the defaults for this instruction: */ 716 if (p->gen < 060) { 717 brw_set_dest(p, insn, brw_ip_reg()); 718 brw_set_src0(p, insn, brw_ip_reg()); 719 brw_set_src1(p, insn, brw_imm_d(0x0)); 720 } else if (p->gen < 070) { 721 brw_set_dest(p, insn, brw_imm_w(0)); 722 insn->bits1.branch_gen6.jump_count = 0; 723 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 724 brw_set_src1(p, insn, __retype_d(brw_null_reg())); 725 } else { 726 brw_set_dest(p, insn, __retype_d(brw_null_reg())); 727 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 728 brw_set_src1(p, insn, brw_imm_ud(0)); 729 insn->bits3.break_cont.jip = 0; 730 insn->bits3.break_cont.uip = 0; 731 } 732 733 insn->header.execution_size = execute_size; 734 insn->header.compression_control = BRW_COMPRESSION_NONE; 735 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 736 insn->header.mask_control = BRW_MASK_ENABLE; 737 if (!p->single_program_flow) 738 insn->header.thread_control = BRW_THREAD_SWITCH; 739 740 p->current->header.predicate_control = BRW_PREDICATE_NONE; 741 742 push_if_stack(p, insn); 743 return insn; 744} 745 746/* This function is only used for gen6-style IF instructions with an 747 * embedded comparison (conditional modifier). It is not used on gen7. 748 */ 749struct brw_instruction * 750gen6_IF(struct brw_compile *p, uint32_t conditional, 751 struct brw_reg src0, struct brw_reg src1) 752{ 753 struct brw_instruction *insn; 754 755 insn = brw_next_insn(p, BRW_OPCODE_IF); 756 757 brw_set_dest(p, insn, brw_imm_w(0)); 758 if (p->compressed) { 759 insn->header.execution_size = BRW_EXECUTE_16; 760 } else { 761 insn->header.execution_size = BRW_EXECUTE_8; 762 } 763 insn->bits1.branch_gen6.jump_count = 0; 764 brw_set_src0(p, insn, src0); 765 brw_set_src1(p, insn, src1); 766 767 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 768 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 769 insn->header.destreg__conditionalmod = conditional; 770 771 if (!p->single_program_flow) 772 insn->header.thread_control = BRW_THREAD_SWITCH; 773 774 push_if_stack(p, insn); 775 return insn; 776} 777 778/** 779 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 780 */ 781static void 782convert_IF_ELSE_to_ADD(struct brw_compile *p, 783 struct brw_instruction *if_inst, 784 struct brw_instruction *else_inst) 785{ 786 /* The next instruction (where the ENDIF would be, if it existed) */ 787 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 788 789 assert(p->single_program_flow); 790 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 791 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 792 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 793 794 /* Convert IF to an ADD instruction that moves the instruction pointer 795 * to the first instruction of the ELSE block. If there is no ELSE 796 * block, point to where ENDIF would be. Reverse the predicate. 797 * 798 * There's no need to execute an ENDIF since we don't need to do any 799 * stack operations, and if we're currently executing, we just want to 800 * continue normally. 801 */ 802 if_inst->header.opcode = BRW_OPCODE_ADD; 803 if_inst->header.predicate_inverse = 1; 804 805 if (else_inst != NULL) { 806 /* Convert ELSE to an ADD instruction that points where the ENDIF 807 * would be. 808 */ 809 else_inst->header.opcode = BRW_OPCODE_ADD; 810 811 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 812 else_inst->bits3.ud = (next_inst - else_inst) * 16; 813 } else { 814 if_inst->bits3.ud = (next_inst - if_inst) * 16; 815 } 816} 817 818/** 819 * Patch IF and ELSE instructions with appropriate jump targets. 820 */ 821static void 822patch_IF_ELSE(struct brw_compile *p, 823 struct brw_instruction *if_inst, 824 struct brw_instruction *else_inst, 825 struct brw_instruction *endif_inst) 826{ 827 unsigned br = 1; 828 829 assert(!p->single_program_flow); 830 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 831 assert(endif_inst != NULL); 832 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 833 834 /* Jump count is for 64bit data chunk each, so one 128bit instruction 835 * requires 2 chunks. 836 */ 837 if (p->gen >= 050) 838 br = 2; 839 840 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 841 endif_inst->header.execution_size = if_inst->header.execution_size; 842 843 if (else_inst == NULL) { 844 /* Patch IF -> ENDIF */ 845 if (p->gen < 060) { 846 /* Turn it into an IFF, which means no mask stack operations for 847 * all-false and jumping past the ENDIF. 848 */ 849 if_inst->header.opcode = BRW_OPCODE_IFF; 850 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 851 if_inst->bits3.if_else.pop_count = 0; 852 if_inst->bits3.if_else.pad0 = 0; 853 } else if (p->gen < 070) { 854 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 855 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 856 } else { 857 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 858 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 859 } 860 } else { 861 else_inst->header.execution_size = if_inst->header.execution_size; 862 863 /* Patch IF -> ELSE */ 864 if (p->gen < 060) { 865 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 866 if_inst->bits3.if_else.pop_count = 0; 867 if_inst->bits3.if_else.pad0 = 0; 868 } else if (p->gen <= 070) { 869 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 870 } 871 872 /* Patch ELSE -> ENDIF */ 873 if (p->gen < 060) { 874 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 875 * matching ENDIF. 876 */ 877 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 878 else_inst->bits3.if_else.pop_count = 1; 879 else_inst->bits3.if_else.pad0 = 0; 880 } else if (p->gen < 070) { 881 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 882 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 883 } else { 884 /* The IF instruction's JIP should point just past the ELSE */ 885 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 886 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 887 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 888 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 889 } 890 } 891} 892 893void 894brw_ELSE(struct brw_compile *p) 895{ 896 struct brw_instruction *insn; 897 898 insn = brw_next_insn(p, BRW_OPCODE_ELSE); 899 900 if (p->gen < 060) { 901 brw_set_dest(p, insn, brw_ip_reg()); 902 brw_set_src0(p, insn, brw_ip_reg()); 903 brw_set_src1(p, insn, brw_imm_d(0x0)); 904 } else if (p->gen < 070) { 905 brw_set_dest(p, insn, brw_imm_w(0)); 906 insn->bits1.branch_gen6.jump_count = 0; 907 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 908 brw_set_src1(p, insn, __retype_d(brw_null_reg())); 909 } else { 910 brw_set_dest(p, insn, __retype_d(brw_null_reg())); 911 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 912 brw_set_src1(p, insn, brw_imm_ud(0)); 913 insn->bits3.break_cont.jip = 0; 914 insn->bits3.break_cont.uip = 0; 915 } 916 917 insn->header.compression_control = BRW_COMPRESSION_NONE; 918 insn->header.mask_control = BRW_MASK_ENABLE; 919 if (!p->single_program_flow) 920 insn->header.thread_control = BRW_THREAD_SWITCH; 921 922 push_if_stack(p, insn); 923} 924 925void 926brw_ENDIF(struct brw_compile *p) 927{ 928 struct brw_instruction *insn; 929 struct brw_instruction *else_inst = NULL; 930 struct brw_instruction *if_inst = NULL; 931 932 /* Pop the IF and (optional) ELSE instructions from the stack */ 933 p->if_stack_depth--; 934 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { 935 else_inst = p->if_stack[p->if_stack_depth]; 936 p->if_stack_depth--; 937 } 938 if_inst = p->if_stack[p->if_stack_depth]; 939 940 if (p->single_program_flow) { 941 /* ENDIF is useless; don't bother emitting it. */ 942 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 943 return; 944 } 945 946 insn = brw_next_insn(p, BRW_OPCODE_ENDIF); 947 948 if (p->gen < 060) { 949 brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); 950 brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); 951 brw_set_src1(p, insn, brw_imm_d(0x0)); 952 } else if (p->gen < 070) { 953 brw_set_dest(p, insn, brw_imm_w(0)); 954 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 955 brw_set_src1(p, insn, __retype_d(brw_null_reg())); 956 } else { 957 brw_set_dest(p, insn, __retype_d(brw_null_reg())); 958 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 959 brw_set_src1(p, insn, brw_imm_ud(0)); 960 } 961 962 insn->header.compression_control = BRW_COMPRESSION_NONE; 963 insn->header.mask_control = BRW_MASK_ENABLE; 964 insn->header.thread_control = BRW_THREAD_SWITCH; 965 966 /* Also pop item off the stack in the endif instruction: */ 967 if (p->gen < 060) { 968 insn->bits3.if_else.jump_count = 0; 969 insn->bits3.if_else.pop_count = 1; 970 insn->bits3.if_else.pad0 = 0; 971 } else if (p->gen < 070) { 972 insn->bits1.branch_gen6.jump_count = 2; 973 } else { 974 insn->bits3.break_cont.jip = 2; 975 } 976 patch_IF_ELSE(p, if_inst, else_inst, insn); 977} 978 979struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 980{ 981 struct brw_instruction *insn; 982 983 insn = brw_next_insn(p, BRW_OPCODE_BREAK); 984 if (p->gen >= 060) { 985 brw_set_dest(p, insn, __retype_d(brw_null_reg())); 986 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 987 brw_set_src1(p, insn, brw_imm_d(0x0)); 988 } else { 989 brw_set_dest(p, insn, brw_ip_reg()); 990 brw_set_src0(p, insn, brw_ip_reg()); 991 brw_set_src1(p, insn, brw_imm_d(0x0)); 992 insn->bits3.if_else.pad0 = 0; 993 insn->bits3.if_else.pop_count = pop_count; 994 } 995 insn->header.compression_control = BRW_COMPRESSION_NONE; 996 insn->header.execution_size = BRW_EXECUTE_8; 997 998 return insn; 999} 1000 1001struct brw_instruction *gen6_CONT(struct brw_compile *p, 1002 struct brw_instruction *do_insn) 1003{ 1004 struct brw_instruction *insn; 1005 1006 insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); 1007 brw_set_dest(p, insn, __retype_d(brw_null_reg())); 1008 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 1009 brw_set_dest(p, insn, brw_ip_reg()); 1010 brw_set_src0(p, insn, brw_ip_reg()); 1011 brw_set_src1(p, insn, brw_imm_d(0x0)); 1012 1013 insn->header.compression_control = BRW_COMPRESSION_NONE; 1014 insn->header.execution_size = BRW_EXECUTE_8; 1015 return insn; 1016} 1017 1018struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1019{ 1020 struct brw_instruction *insn; 1021 insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); 1022 brw_set_dest(p, insn, brw_ip_reg()); 1023 brw_set_src0(p, insn, brw_ip_reg()); 1024 brw_set_src1(p, insn, brw_imm_d(0x0)); 1025 insn->header.compression_control = BRW_COMPRESSION_NONE; 1026 insn->header.execution_size = BRW_EXECUTE_8; 1027 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1028 insn->bits3.if_else.pad0 = 0; 1029 insn->bits3.if_else.pop_count = pop_count; 1030 return insn; 1031} 1032 1033/* DO/WHILE loop: 1034 * 1035 * The DO/WHILE is just an unterminated loop -- break or continue are 1036 * used for control within the loop. We have a few ways they can be 1037 * done. 1038 * 1039 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1040 * jip and no DO instruction. 1041 * 1042 * For non-uniform control flow pre-gen6, there's a DO instruction to 1043 * push the mask, and a WHILE to jump back, and BREAK to get out and 1044 * pop the mask. 1045 * 1046 * For gen6, there's no more mask stack, so no need for DO. WHILE 1047 * just points back to the first instruction of the loop. 1048 */ 1049struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) 1050{ 1051 if (p->gen >= 060 || p->single_program_flow) { 1052 return &p->store[p->nr_insn]; 1053 } else { 1054 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); 1055 1056 /* Override the defaults for this instruction: 1057 */ 1058 brw_set_dest(p, insn, brw_null_reg()); 1059 brw_set_src0(p, insn, brw_null_reg()); 1060 brw_set_src1(p, insn, brw_null_reg()); 1061 1062 insn->header.compression_control = BRW_COMPRESSION_NONE; 1063 insn->header.execution_size = execute_size; 1064 insn->header.predicate_control = BRW_PREDICATE_NONE; 1065 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1066 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1067 1068 return insn; 1069 } 1070} 1071 1072struct brw_instruction *brw_WHILE(struct brw_compile *p, 1073 struct brw_instruction *do_insn) 1074{ 1075 struct brw_instruction *insn; 1076 unsigned br = 1; 1077 1078 if (p->gen >= 050) 1079 br = 2; 1080 1081 if (p->gen >= 070) { 1082 insn = brw_next_insn(p, BRW_OPCODE_WHILE); 1083 1084 brw_set_dest(p, insn, __retype_d(brw_null_reg())); 1085 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 1086 brw_set_src1(p, insn, brw_imm_ud(0)); 1087 insn->bits3.break_cont.jip = br * (do_insn - insn); 1088 1089 insn->header.execution_size = BRW_EXECUTE_8; 1090 } else if (p->gen >= 060) { 1091 insn = brw_next_insn(p, BRW_OPCODE_WHILE); 1092 1093 brw_set_dest(p, insn, brw_imm_w(0)); 1094 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1095 brw_set_src0(p, insn, __retype_d(brw_null_reg())); 1096 brw_set_src1(p, insn, __retype_d(brw_null_reg())); 1097 1098 insn->header.execution_size = BRW_EXECUTE_8; 1099 } else { 1100 if (p->single_program_flow) { 1101 insn = brw_next_insn(p, BRW_OPCODE_ADD); 1102 1103 brw_set_dest(p, insn, brw_ip_reg()); 1104 brw_set_src0(p, insn, brw_ip_reg()); 1105 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1106 insn->header.execution_size = BRW_EXECUTE_1; 1107 } else { 1108 insn = brw_next_insn(p, BRW_OPCODE_WHILE); 1109 1110 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1111 1112 brw_set_dest(p, insn, brw_ip_reg()); 1113 brw_set_src0(p, insn, brw_ip_reg()); 1114 brw_set_src1(p, insn, brw_imm_d(0)); 1115 1116 insn->header.execution_size = do_insn->header.execution_size; 1117 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1118 insn->bits3.if_else.pop_count = 0; 1119 insn->bits3.if_else.pad0 = 0; 1120 } 1121 } 1122 insn->header.compression_control = BRW_COMPRESSION_NONE; 1123 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1124 1125 return insn; 1126} 1127 1128/* FORWARD JUMPS: 1129 */ 1130void brw_land_fwd_jump(struct brw_compile *p, 1131 struct brw_instruction *jmp_insn) 1132{ 1133 struct brw_instruction *landing = &p->store[p->nr_insn]; 1134 unsigned jmpi = 1; 1135 1136 if (p->gen >= 050) 1137 jmpi = 2; 1138 1139 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1140 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1141 1142 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1143} 1144 1145 1146 1147/* To integrate with the above, it makes sense that the comparison 1148 * instruction should populate the flag register. It might be simpler 1149 * just to use the flag reg for most WM tasks? 1150 */ 1151void brw_CMP(struct brw_compile *p, 1152 struct brw_reg dest, 1153 unsigned conditional, 1154 struct brw_reg src0, 1155 struct brw_reg src1) 1156{ 1157 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); 1158 1159 insn->header.destreg__conditionalmod = conditional; 1160 brw_set_dest(p, insn, dest); 1161 brw_set_src0(p, insn, src0); 1162 brw_set_src1(p, insn, src1); 1163 1164 /* Make it so that future instructions will use the computed flag 1165 * value until brw_set_predicate_control_flag_value() is called 1166 * again. 1167 */ 1168 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1169 dest.nr == 0) { 1170 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1171 p->flag_value = 0xff; 1172 } 1173} 1174 1175/* Issue 'wait' instruction for n1, host could program MMIO 1176 to wake up thread. */ 1177void brw_WAIT(struct brw_compile *p) 1178{ 1179 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); 1180 struct brw_reg src = brw_notification_1_reg(); 1181 1182 brw_set_dest(p, insn, src); 1183 brw_set_src0(p, insn, src); 1184 brw_set_src1(p, insn, brw_null_reg()); 1185 insn->header.execution_size = 0; /* must */ 1186 insn->header.predicate_control = 0; 1187 insn->header.compression_control = 0; 1188} 1189 1190/*********************************************************************** 1191 * Helpers for the various SEND message types: 1192 */ 1193 1194/** Extended math function, float[8]. 1195 */ 1196void brw_math(struct brw_compile *p, 1197 struct brw_reg dest, 1198 unsigned function, 1199 unsigned saturate, 1200 unsigned msg_reg_nr, 1201 struct brw_reg src, 1202 unsigned data_type, 1203 unsigned precision) 1204{ 1205 if (p->gen >= 060) { 1206 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); 1207 1208 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1209 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1210 1211 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1212 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1213 1214 /* Source modifiers are ignored for extended math instructions. */ 1215 assert(!src.negate); 1216 assert(!src.abs); 1217 1218 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1219 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1220 assert(src.type == BRW_REGISTER_TYPE_F); 1221 } 1222 1223 /* Math is the same ISA format as other opcodes, except that CondModifier 1224 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1225 */ 1226 insn->header.destreg__conditionalmod = function; 1227 insn->header.saturate = saturate; 1228 1229 brw_set_dest(p, insn, dest); 1230 brw_set_src0(p, insn, src); 1231 brw_set_src1(p, insn, brw_null_reg()); 1232 } else { 1233 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1234 /* Example code doesn't set predicate_control for send 1235 * instructions. 1236 */ 1237 insn->header.predicate_control = 0; 1238 insn->header.destreg__conditionalmod = msg_reg_nr; 1239 1240 brw_set_dest(p, insn, dest); 1241 brw_set_src0(p, insn, src); 1242 brw_set_math_message(p, insn, function, 1243 src.type == BRW_REGISTER_TYPE_D, 1244 precision, 1245 saturate, 1246 data_type); 1247 } 1248} 1249 1250/** Extended math function, float[8]. 1251 */ 1252void brw_math2(struct brw_compile *p, 1253 struct brw_reg dest, 1254 unsigned function, 1255 struct brw_reg src0, 1256 struct brw_reg src1) 1257{ 1258 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); 1259 1260 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1261 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1262 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1263 1264 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1265 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1266 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1267 1268 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1269 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1270 assert(src0.type == BRW_REGISTER_TYPE_F); 1271 assert(src1.type == BRW_REGISTER_TYPE_F); 1272 } 1273 1274 /* Source modifiers are ignored for extended math instructions. */ 1275 assert(!src0.negate); 1276 assert(!src0.abs); 1277 assert(!src1.negate); 1278 assert(!src1.abs); 1279 1280 /* Math is the same ISA format as other opcodes, except that CondModifier 1281 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1282 */ 1283 insn->header.destreg__conditionalmod = function; 1284 1285 brw_set_dest(p, insn, dest); 1286 brw_set_src0(p, insn, src0); 1287 brw_set_src1(p, insn, src1); 1288} 1289 1290/** 1291 * Extended math function, float[16]. 1292 * Use 2 send instructions. 1293 */ 1294void brw_math_16(struct brw_compile *p, 1295 struct brw_reg dest, 1296 unsigned function, 1297 unsigned saturate, 1298 unsigned msg_reg_nr, 1299 struct brw_reg src, 1300 unsigned precision) 1301{ 1302 struct brw_instruction *insn; 1303 1304 if (p->gen >= 060) { 1305 insn = brw_next_insn(p, BRW_OPCODE_MATH); 1306 1307 /* Math is the same ISA format as other opcodes, except that CondModifier 1308 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1309 */ 1310 insn->header.destreg__conditionalmod = function; 1311 insn->header.saturate = saturate; 1312 1313 /* Source modifiers are ignored for extended math instructions. */ 1314 assert(!src.negate); 1315 assert(!src.abs); 1316 1317 brw_set_dest(p, insn, dest); 1318 brw_set_src0(p, insn, src); 1319 brw_set_src1(p, insn, brw_null_reg()); 1320 return; 1321 } 1322 1323 /* First instruction: 1324 */ 1325 brw_push_insn_state(p); 1326 brw_set_predicate_control_flag_value(p, 0xff); 1327 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1328 1329 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1330 insn->header.destreg__conditionalmod = msg_reg_nr; 1331 1332 brw_set_dest(p, insn, dest); 1333 brw_set_src0(p, insn, src); 1334 brw_set_math_message(p, insn, function, 1335 BRW_MATH_INTEGER_UNSIGNED, 1336 precision, 1337 saturate, 1338 BRW_MATH_DATA_VECTOR); 1339 1340 /* Second instruction: 1341 */ 1342 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1343 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1344 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1345 1346 brw_set_dest(p, insn, __offset(dest,1)); 1347 brw_set_src0(p, insn, src); 1348 brw_set_math_message(p, insn, function, 1349 BRW_MATH_INTEGER_UNSIGNED, 1350 precision, 1351 saturate, 1352 BRW_MATH_DATA_VECTOR); 1353 1354 brw_pop_insn_state(p); 1355} 1356 1357/** 1358 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1359 * using a constant offset per channel. 1360 * 1361 * The offset must be aligned to oword size (16 bytes). Used for 1362 * register spilling. 1363 */ 1364void brw_oword_block_write_scratch(struct brw_compile *p, 1365 struct brw_reg mrf, 1366 int num_regs, 1367 unsigned offset) 1368{ 1369 uint32_t msg_control, msg_type; 1370 int mlen; 1371 1372 if (p->gen >= 060) 1373 offset /= 16; 1374 1375 mrf = __retype_ud(mrf); 1376 1377 if (num_regs == 1) { 1378 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1379 mlen = 2; 1380 } else { 1381 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1382 mlen = 3; 1383 } 1384 1385 /* Set up the message header. This is g0, with g0.2 filled with 1386 * the offset. We don't want to leave our offset around in g0 or 1387 * it'll screw up texture samples, so set it up inside the message 1388 * reg. 1389 */ 1390 { 1391 brw_push_insn_state(p); 1392 brw_set_mask_control(p, BRW_MASK_DISABLE); 1393 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1394 1395 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1396 1397 /* set message header global offset field (reg 0, element 2) */ 1398 brw_MOV(p, 1399 __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), 1400 brw_imm_ud(offset)); 1401 1402 brw_pop_insn_state(p); 1403 } 1404 1405 { 1406 struct brw_reg dest; 1407 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1408 int send_commit_msg; 1409 struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); 1410 1411 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1412 insn->header.compression_control = BRW_COMPRESSION_NONE; 1413 src_header = vec16(src_header); 1414 } 1415 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1416 insn->header.destreg__conditionalmod = mrf.nr; 1417 1418 /* Until gen6, writes followed by reads from the same location 1419 * are not guaranteed to be ordered unless write_commit is set. 1420 * If set, then a no-op write is issued to the destination 1421 * register to set a dependency, and a read from the destination 1422 * can be used to ensure the ordering. 1423 * 1424 * For gen6, only writes between different threads need ordering 1425 * protection. Our use of DP writes is all about register 1426 * spilling within a thread. 1427 */ 1428 if (p->gen >= 060) { 1429 dest = __retype_uw(vec16(brw_null_reg())); 1430 send_commit_msg = 0; 1431 } else { 1432 dest = src_header; 1433 send_commit_msg = 1; 1434 } 1435 1436 brw_set_dest(p, insn, dest); 1437 if (p->gen >= 060) { 1438 brw_set_src0(p, insn, mrf); 1439 } else { 1440 brw_set_src0(p, insn, brw_null_reg()); 1441 } 1442 1443 if (p->gen >= 060) 1444 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1445 else 1446 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1447 1448 brw_set_dp_write_message(p, 1449 insn, 1450 255, /* binding table index (255=stateless) */ 1451 msg_control, 1452 msg_type, 1453 mlen, 1454 true, /* header_present */ 1455 0, /* pixel scoreboard */ 1456 send_commit_msg, /* response_length */ 1457 0, /* eot */ 1458 send_commit_msg); 1459 } 1460} 1461 1462 1463/** 1464 * Read a block of owords (half a GRF each) from the scratch buffer 1465 * using a constant index per channel. 1466 * 1467 * Offset must be aligned to oword size (16 bytes). Used for register 1468 * spilling. 1469 */ 1470void 1471brw_oword_block_read_scratch(struct brw_compile *p, 1472 struct brw_reg dest, 1473 struct brw_reg mrf, 1474 int num_regs, 1475 unsigned offset) 1476{ 1477 uint32_t msg_control; 1478 int rlen; 1479 1480 if (p->gen >= 060) 1481 offset /= 16; 1482 1483 mrf = __retype_ud(mrf); 1484 dest = __retype_uw(dest); 1485 1486 if (num_regs == 1) { 1487 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1488 rlen = 1; 1489 } else { 1490 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1491 rlen = 2; 1492 } 1493 1494 { 1495 brw_push_insn_state(p); 1496 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1497 brw_set_mask_control(p, BRW_MASK_DISABLE); 1498 1499 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1500 1501 /* set message header global offset field (reg 0, element 2) */ 1502 brw_MOV(p, 1503 __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), 1504 brw_imm_ud(offset)); 1505 1506 brw_pop_insn_state(p); 1507 } 1508 1509 { 1510 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 1511 1512 assert(insn->header.predicate_control == 0); 1513 insn->header.compression_control = BRW_COMPRESSION_NONE; 1514 insn->header.destreg__conditionalmod = mrf.nr; 1515 1516 brw_set_dest(p, insn, dest); /* UW? */ 1517 if (p->gen >= 060) { 1518 brw_set_src0(p, insn, mrf); 1519 } else { 1520 brw_set_src0(p, insn, brw_null_reg()); 1521 } 1522 1523 brw_set_dp_read_message(p, 1524 insn, 1525 255, /* binding table index (255=stateless) */ 1526 msg_control, 1527 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1528 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1529 1, /* msg_length */ 1530 rlen); 1531 } 1532} 1533 1534/** 1535 * Read a float[4] vector from the data port Data Cache (const buffer). 1536 * Location (in buffer) should be a multiple of 16. 1537 * Used for fetching shader constants. 1538 */ 1539void brw_oword_block_read(struct brw_compile *p, 1540 struct brw_reg dest, 1541 struct brw_reg mrf, 1542 uint32_t offset, 1543 uint32_t bind_table_index) 1544{ 1545 struct brw_instruction *insn; 1546 1547 /* On newer hardware, offset is in units of owords. */ 1548 if (p->gen >= 060) 1549 offset /= 16; 1550 1551 mrf = __retype_ud(mrf); 1552 1553 brw_push_insn_state(p); 1554 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1555 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1556 brw_set_mask_control(p, BRW_MASK_DISABLE); 1557 1558 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1559 1560 /* set message header global offset field (reg 0, element 2) */ 1561 brw_MOV(p, 1562 __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), 1563 brw_imm_ud(offset)); 1564 1565 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1566 insn->header.destreg__conditionalmod = mrf.nr; 1567 1568 /* cast dest to a uword[8] vector */ 1569 dest = __retype_uw(vec8(dest)); 1570 1571 brw_set_dest(p, insn, dest); 1572 if (p->gen >= 060) { 1573 brw_set_src0(p, insn, mrf); 1574 } else { 1575 brw_set_src0(p, insn, brw_null_reg()); 1576 } 1577 1578 brw_set_dp_read_message(p, 1579 insn, 1580 bind_table_index, 1581 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1582 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1583 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1584 1, /* msg_length */ 1585 1); /* response_length (1 reg, 2 owords!) */ 1586 1587 brw_pop_insn_state(p); 1588} 1589 1590/** 1591 * Read a set of dwords from the data port Data Cache (const buffer). 1592 * 1593 * Location (in buffer) appears as UD offsets in the register after 1594 * the provided mrf header reg. 1595 */ 1596void brw_dword_scattered_read(struct brw_compile *p, 1597 struct brw_reg dest, 1598 struct brw_reg mrf, 1599 uint32_t bind_table_index) 1600{ 1601 struct brw_instruction *insn; 1602 1603 mrf = __retype_ud(mrf); 1604 1605 brw_push_insn_state(p); 1606 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1607 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1608 brw_set_mask_control(p, BRW_MASK_DISABLE); 1609 brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); 1610 brw_pop_insn_state(p); 1611 1612 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1613 insn->header.destreg__conditionalmod = mrf.nr; 1614 1615 /* cast dest to a uword[8] vector */ 1616 dest = __retype_uw(vec8(dest)); 1617 1618 brw_set_dest(p, insn, dest); 1619 brw_set_src0(p, insn, brw_null_reg()); 1620 1621 brw_set_dp_read_message(p, 1622 insn, 1623 bind_table_index, 1624 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1625 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1626 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1627 2, /* msg_length */ 1628 1); /* response_length */ 1629} 1630 1631/** 1632 * Read float[4] constant(s) from VS constant buffer. 1633 * For relative addressing, two float[4] constants will be read into 'dest'. 1634 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1635 */ 1636void brw_dp_READ_4_vs(struct brw_compile *p, 1637 struct brw_reg dest, 1638 unsigned location, 1639 unsigned bind_table_index) 1640{ 1641 struct brw_instruction *insn; 1642 unsigned msg_reg_nr = 1; 1643 1644 if (p->gen >= 060) 1645 location /= 16; 1646 1647 /* Setup MRF[1] with location/offset into const buffer */ 1648 brw_push_insn_state(p); 1649 brw_set_access_mode(p, BRW_ALIGN_1); 1650 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1651 brw_set_mask_control(p, BRW_MASK_DISABLE); 1652 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1653 brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), 1654 brw_imm_ud(location)); 1655 brw_pop_insn_state(p); 1656 1657 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1658 1659 insn->header.predicate_control = BRW_PREDICATE_NONE; 1660 insn->header.compression_control = BRW_COMPRESSION_NONE; 1661 insn->header.destreg__conditionalmod = msg_reg_nr; 1662 insn->header.mask_control = BRW_MASK_DISABLE; 1663 1664 brw_set_dest(p, insn, dest); 1665 if (p->gen >= 060) { 1666 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 1667 } else { 1668 brw_set_src0(p, insn, brw_null_reg()); 1669 } 1670 1671 brw_set_dp_read_message(p, 1672 insn, 1673 bind_table_index, 1674 0, 1675 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1676 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1677 1, /* msg_length */ 1678 1); /* response_length (1 Oword) */ 1679} 1680 1681/** 1682 * Read a float[4] constant per vertex from VS constant buffer, with 1683 * relative addressing. 1684 */ 1685void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1686 struct brw_reg dest, 1687 struct brw_reg addr_reg, 1688 unsigned offset, 1689 unsigned bind_table_index) 1690{ 1691 struct brw_reg src = brw_vec8_grf(0, 0); 1692 struct brw_instruction *insn; 1693 int msg_type; 1694 1695 /* Setup MRF[1] with offset into const buffer */ 1696 brw_push_insn_state(p); 1697 brw_set_access_mode(p, BRW_ALIGN_1); 1698 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1699 brw_set_mask_control(p, BRW_MASK_DISABLE); 1700 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1701 1702 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1703 * fields ignored. 1704 */ 1705 brw_ADD(p, __retype_d(brw_message_reg(1)), 1706 addr_reg, brw_imm_d(offset)); 1707 brw_pop_insn_state(p); 1708 1709 gen6_resolve_implied_move(p, &src, 0); 1710 1711 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1712 insn->header.predicate_control = BRW_PREDICATE_NONE; 1713 insn->header.compression_control = BRW_COMPRESSION_NONE; 1714 insn->header.destreg__conditionalmod = 0; 1715 insn->header.mask_control = BRW_MASK_DISABLE; 1716 1717 brw_set_dest(p, insn, dest); 1718 brw_set_src0(p, insn, src); 1719 1720 if (p->gen >= 060) 1721 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1722 else if (p->gen >= 045) 1723 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1724 else 1725 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1726 1727 brw_set_dp_read_message(p, 1728 insn, 1729 bind_table_index, 1730 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1731 msg_type, 1732 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1733 2, /* msg_length */ 1734 1); /* response_length */ 1735} 1736 1737void brw_fb_WRITE(struct brw_compile *p, 1738 int dispatch_width, 1739 unsigned msg_reg_nr, 1740 struct brw_reg src0, 1741 unsigned msg_control, 1742 unsigned binding_table_index, 1743 unsigned msg_length, 1744 unsigned response_length, 1745 bool eot, 1746 bool header_present) 1747{ 1748 struct brw_instruction *insn; 1749 unsigned msg_type; 1750 struct brw_reg dest; 1751 1752 if (dispatch_width == 16) 1753 dest = __retype_uw(vec16(brw_null_reg())); 1754 else 1755 dest = __retype_uw(vec8(brw_null_reg())); 1756 1757 if (p->gen >= 060 && binding_table_index == 0) { 1758 insn = brw_next_insn(p, BRW_OPCODE_SENDC); 1759 } else { 1760 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1761 } 1762 /* The execution mask is ignored for render target writes. */ 1763 insn->header.predicate_control = 0; 1764 insn->header.compression_control = BRW_COMPRESSION_NONE; 1765 1766 if (p->gen >= 060) { 1767 /* headerless version, just submit color payload */ 1768 src0 = brw_message_reg(msg_reg_nr); 1769 1770 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1771 } else { 1772 insn->header.destreg__conditionalmod = msg_reg_nr; 1773 1774 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1775 } 1776 1777 brw_set_dest(p, insn, dest); 1778 brw_set_src0(p, insn, src0); 1779 brw_set_dp_write_message(p, 1780 insn, 1781 binding_table_index, 1782 msg_control, 1783 msg_type, 1784 msg_length, 1785 header_present, 1786 eot, 1787 response_length, 1788 eot, 1789 0 /* send_commit_msg */); 1790} 1791 1792/** 1793 * Texture sample instruction. 1794 * Note: the msg_type plus msg_length values determine exactly what kind 1795 * of sampling operation is performed. See volume 4, page 161 of docs. 1796 */ 1797void brw_SAMPLE(struct brw_compile *p, 1798 struct brw_reg dest, 1799 unsigned msg_reg_nr, 1800 struct brw_reg src0, 1801 unsigned binding_table_index, 1802 unsigned sampler, 1803 unsigned writemask, 1804 unsigned msg_type, 1805 unsigned response_length, 1806 unsigned msg_length, 1807 bool header_present, 1808 unsigned simd_mode) 1809{ 1810 assert(writemask); 1811 1812 if (p->gen < 050 || writemask != WRITEMASK_XYZW) { 1813 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1814 1815 writemask = ~writemask & WRITEMASK_XYZW; 1816 1817 brw_push_insn_state(p); 1818 1819 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1820 brw_set_mask_control(p, BRW_MASK_DISABLE); 1821 1822 brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); 1823 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); 1824 1825 brw_pop_insn_state(p); 1826 1827 src0 = __retype_uw(brw_null_reg()); 1828 } 1829 1830 { 1831 struct brw_instruction *insn; 1832 1833 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1834 1835 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1836 insn->header.predicate_control = 0; /* XXX */ 1837 insn->header.compression_control = BRW_COMPRESSION_NONE; 1838 if (p->gen < 060) 1839 insn->header.destreg__conditionalmod = msg_reg_nr; 1840 1841 brw_set_dest(p, insn, dest); 1842 brw_set_src0(p, insn, src0); 1843 brw_set_sampler_message(p, insn, 1844 binding_table_index, 1845 sampler, 1846 msg_type, 1847 response_length, 1848 msg_length, 1849 header_present, 1850 simd_mode); 1851 } 1852} 1853 1854/* All these variables are pretty confusing - we might be better off 1855 * using bitmasks and macros for this, in the old style. Or perhaps 1856 * just having the caller instantiate the fields in dword3 itself. 1857 */ 1858void brw_urb_WRITE(struct brw_compile *p, 1859 struct brw_reg dest, 1860 unsigned msg_reg_nr, 1861 struct brw_reg src0, 1862 bool allocate, 1863 bool used, 1864 unsigned msg_length, 1865 unsigned response_length, 1866 bool eot, 1867 bool writes_complete, 1868 unsigned offset, 1869 unsigned swizzle) 1870{ 1871 struct brw_instruction *insn; 1872 1873 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1874 1875 if (p->gen >= 070) { 1876 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 1877 brw_push_insn_state(p); 1878 brw_set_access_mode(p, BRW_ALIGN_1); 1879 brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), 1880 __retype_ud(brw_vec1_grf(0, 5)), 1881 brw_imm_ud(0xff00)); 1882 brw_pop_insn_state(p); 1883 } 1884 1885 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1886 1887 assert(msg_length < BRW_MAX_MRF); 1888 1889 brw_set_dest(p, insn, dest); 1890 brw_set_src0(p, insn, src0); 1891 brw_set_src1(p, insn, brw_imm_d(0)); 1892 1893 if (p->gen <= 060) 1894 insn->header.destreg__conditionalmod = msg_reg_nr; 1895 1896 brw_set_urb_message(p, 1897 insn, 1898 allocate, 1899 used, 1900 msg_length, 1901 response_length, 1902 eot, 1903 writes_complete, 1904 offset, 1905 swizzle); 1906} 1907 1908static int 1909brw_find_next_block_end(struct brw_compile *p, int start) 1910{ 1911 int ip; 1912 1913 for (ip = start + 1; ip < p->nr_insn; ip++) { 1914 struct brw_instruction *insn = &p->store[ip]; 1915 1916 switch (insn->header.opcode) { 1917 case BRW_OPCODE_ENDIF: 1918 case BRW_OPCODE_ELSE: 1919 case BRW_OPCODE_WHILE: 1920 return ip; 1921 } 1922 } 1923 assert(!"not reached"); 1924 return start + 1; 1925} 1926 1927/* There is no DO instruction on gen6, so to find the end of the loop 1928 * we have to see if the loop is jumping back before our start 1929 * instruction. 1930 */ 1931static int 1932brw_find_loop_end(struct brw_compile *p, int start) 1933{ 1934 int ip; 1935 int br = 2; 1936 1937 for (ip = start + 1; ip < p->nr_insn; ip++) { 1938 struct brw_instruction *insn = &p->store[ip]; 1939 1940 if (insn->header.opcode == BRW_OPCODE_WHILE) { 1941 int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count 1942 : insn->bits3.break_cont.jip; 1943 if (ip + jip / br <= start) 1944 return ip; 1945 } 1946 } 1947 assert(!"not reached"); 1948 return start + 1; 1949} 1950 1951/* After program generation, go back and update the UIP and JIP of 1952 * BREAK and CONT instructions to their correct locations. 1953 */ 1954void 1955brw_set_uip_jip(struct brw_compile *p) 1956{ 1957 int ip; 1958 int br = 2; 1959 1960 if (p->gen <= 060) 1961 return; 1962 1963 for (ip = 0; ip < p->nr_insn; ip++) { 1964 struct brw_instruction *insn = &p->store[ip]; 1965 1966 switch (insn->header.opcode) { 1967 case BRW_OPCODE_BREAK: 1968 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 1969 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 1970 insn->bits3.break_cont.uip = 1971 br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0)); 1972 break; 1973 case BRW_OPCODE_CONTINUE: 1974 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 1975 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); 1976 1977 assert(insn->bits3.break_cont.uip != 0); 1978 assert(insn->bits3.break_cont.jip != 0); 1979 break; 1980 } 1981 } 1982} 1983 1984void brw_ff_sync(struct brw_compile *p, 1985 struct brw_reg dest, 1986 unsigned msg_reg_nr, 1987 struct brw_reg src0, 1988 bool allocate, 1989 unsigned response_length, 1990 bool eot) 1991{ 1992 struct brw_instruction *insn; 1993 1994 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1995 1996 insn = brw_next_insn(p, BRW_OPCODE_SEND); 1997 brw_set_dest(p, insn, dest); 1998 brw_set_src0(p, insn, src0); 1999 brw_set_src1(p, insn, brw_imm_d(0)); 2000 2001 if (p->gen < 060) 2002 insn->header.destreg__conditionalmod = msg_reg_nr; 2003 2004 brw_set_ff_sync_message(p, 2005 insn, 2006 allocate, 2007 response_length, 2008 eot); 2009} 2010