1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 */ 31 32 33#ifndef BRW_EU_H 34#define BRW_EU_H 35 36#include <stdbool.h> 37#include <stdio.h> 38#include "brw_inst.h" 39#include "brw_compiler.h" 40#include "brw_eu_defines.h" 41#include "brw_reg.h" 42#include "brw_disasm_info.h" 43 44#ifdef __cplusplus 45extern "C" { 46#endif 47 48#define BRW_EU_MAX_INSN_STACK 5 49 50struct brw_insn_state { 51 /* One of BRW_EXECUTE_* */ 52 unsigned exec_size:3; 53 54 /* Group in units of channels */ 55 unsigned group:5; 56 57 /* Compression control on gfx4-5 */ 58 bool compressed:1; 59 60 /* One of BRW_MASK_* */ 61 unsigned mask_control:1; 62 63 /* Scheduling info for Gfx12+ */ 64 struct tgl_swsb swsb; 65 66 bool saturate:1; 67 68 /* One of BRW_ALIGN_* */ 69 unsigned access_mode:1; 70 71 /* One of BRW_PREDICATE_* */ 72 enum brw_predicate predicate:4; 73 74 bool pred_inv:1; 75 76 /* Flag subreg. Bottom bit is subreg, top bit is reg */ 77 unsigned flag_subreg:2; 78 79 bool acc_wr_control:1; 80}; 81 82 83/* A helper for accessing the last instruction emitted. This makes it easy 84 * to set various bits on an instruction without having to create temporary 85 * variable and assign the emitted instruction to those. 86 */ 87#define brw_last_inst (&p->store[p->nr_insn - 1]) 88 89struct brw_codegen { 90 brw_inst *store; 91 int store_size; 92 unsigned nr_insn; 93 unsigned int next_insn_offset; 94 95 void *mem_ctx; 96 97 /* Allow clients to push/pop instruction state: 98 */ 99 struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK]; 100 struct brw_insn_state *current; 101 102 /** Whether or not the user wants automatic exec sizes 103 * 104 * If true, codegen will try to automatically infer the exec size of an 105 * instruction from the width of the destination register. If false, it 106 * will take whatever is set by brw_set_default_exec_size verbatim. 107 * 108 * This is set to true by default in brw_init_codegen. 109 */ 110 bool automatic_exec_sizes; 111 112 bool single_program_flow; 113 const struct intel_device_info *devinfo; 114 115 /* Control flow stacks: 116 * - if_stack contains IF and ELSE instructions which must be patched 117 * (and popped) once the matching ENDIF instruction is encountered. 118 * 119 * Just store the instruction pointer(an index). 120 */ 121 int *if_stack; 122 int if_stack_depth; 123 int if_stack_array_size; 124 125 /** 126 * loop_stack contains the instruction pointers of the starts of loops which 127 * must be patched (and popped) once the matching WHILE instruction is 128 * encountered. 129 */ 130 int *loop_stack; 131 /** 132 * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF 133 * blocks they were popping out of, to fix up the mask stack. This tracks 134 * the IF/ENDIF nesting in each current nested loop level. 135 */ 136 int *if_depth_in_loop; 137 int loop_stack_depth; 138 int loop_stack_array_size; 139 140 struct brw_shader_reloc *relocs; 141 int num_relocs; 142 int reloc_array_size; 143}; 144 145struct brw_label { 146 int offset; 147 int number; 148 struct brw_label *next; 149}; 150 151void brw_pop_insn_state( struct brw_codegen *p ); 152void brw_push_insn_state( struct brw_codegen *p ); 153unsigned brw_get_default_exec_size(struct brw_codegen *p); 154unsigned brw_get_default_group(struct brw_codegen *p); 155unsigned brw_get_default_access_mode(struct brw_codegen *p); 156struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p); 157void brw_set_default_exec_size(struct brw_codegen *p, unsigned value); 158void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ); 159void brw_set_default_saturate( struct brw_codegen *p, bool enable ); 160void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ); 161void brw_inst_set_compression(const struct intel_device_info *devinfo, 162 brw_inst *inst, bool on); 163void brw_set_default_compression(struct brw_codegen *p, bool on); 164void brw_inst_set_group(const struct intel_device_info *devinfo, 165 brw_inst *inst, unsigned group); 166void brw_set_default_group(struct brw_codegen *p, unsigned group); 167void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c); 168void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc); 169void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse); 170void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg); 171void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value); 172void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value); 173 174void brw_init_codegen(const struct intel_device_info *, struct brw_codegen *p, 175 void *mem_ctx); 176bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode); 177bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode); 178const struct brw_label *brw_find_label(const struct brw_label *root, int offset); 179void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx); 180int brw_disassemble_inst(FILE *file, const struct intel_device_info *devinfo, 181 const struct brw_inst *inst, bool is_compacted, 182 int offset, const struct brw_label *root_label); 183const struct 184brw_label *brw_label_assembly(const struct intel_device_info *devinfo, 185 const void *assembly, int start, int end, 186 void *mem_ctx); 187void brw_disassemble_with_labels(const struct intel_device_info *devinfo, 188 const void *assembly, int start, int end, FILE *out); 189void brw_disassemble(const struct intel_device_info *devinfo, 190 const void *assembly, int start, int end, 191 const struct brw_label *root_label, FILE *out); 192const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p, 193 unsigned *num_relocs); 194const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz ); 195 196bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, 197 const char *identifier); 198 199void brw_realign(struct brw_codegen *p, unsigned align); 200int brw_append_data(struct brw_codegen *p, void *data, 201 unsigned size, unsigned align); 202brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode); 203void brw_add_reloc(struct brw_codegen *p, uint32_t id, 204 enum brw_shader_reloc_type type, 205 uint32_t offset, uint32_t delta); 206void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest); 207void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg); 208 209void gfx6_resolve_implied_move(struct brw_codegen *p, 210 struct brw_reg *src, 211 unsigned msg_reg_nr); 212 213/* Helpers for regular instructions: 214 */ 215#define ALU1(OP) \ 216brw_inst *brw_##OP(struct brw_codegen *p, \ 217 struct brw_reg dest, \ 218 struct brw_reg src0); 219 220#define ALU2(OP) \ 221brw_inst *brw_##OP(struct brw_codegen *p, \ 222 struct brw_reg dest, \ 223 struct brw_reg src0, \ 224 struct brw_reg src1); 225 226#define ALU3(OP) \ 227brw_inst *brw_##OP(struct brw_codegen *p, \ 228 struct brw_reg dest, \ 229 struct brw_reg src0, \ 230 struct brw_reg src1, \ 231 struct brw_reg src2); 232 233ALU1(MOV) 234ALU2(SEL) 235ALU1(NOT) 236ALU2(AND) 237ALU2(OR) 238ALU2(XOR) 239ALU2(SHR) 240ALU2(SHL) 241ALU1(DIM) 242ALU2(ASR) 243ALU2(ROL) 244ALU2(ROR) 245ALU3(CSEL) 246ALU1(F32TO16) 247ALU1(F16TO32) 248ALU2(ADD) 249ALU3(ADD3) 250ALU2(AVG) 251ALU2(MUL) 252ALU1(FRC) 253ALU1(RNDD) 254ALU1(RNDE) 255ALU1(RNDU) 256ALU1(RNDZ) 257ALU2(MAC) 258ALU2(MACH) 259ALU1(LZD) 260ALU2(DP4) 261ALU2(DPH) 262ALU2(DP3) 263ALU2(DP2) 264ALU3(DP4A) 265ALU2(LINE) 266ALU2(PLN) 267ALU3(MAD) 268ALU3(LRP) 269ALU1(BFREV) 270ALU3(BFE) 271ALU2(BFI1) 272ALU3(BFI2) 273ALU1(FBH) 274ALU1(FBL) 275ALU1(CBIT) 276ALU2(ADDC) 277ALU2(SUBB) 278 279#undef ALU1 280#undef ALU2 281#undef ALU3 282 283 284/* Helpers for SEND instruction: 285 */ 286 287/** 288 * Construct a message descriptor immediate with the specified common 289 * descriptor controls. 290 */ 291static inline uint32_t 292brw_message_desc(const struct intel_device_info *devinfo, 293 unsigned msg_length, 294 unsigned response_length, 295 bool header_present) 296{ 297 if (devinfo->ver >= 5) { 298 return (SET_BITS(msg_length, 28, 25) | 299 SET_BITS(response_length, 24, 20) | 300 SET_BITS(header_present, 19, 19)); 301 } else { 302 return (SET_BITS(msg_length, 23, 20) | 303 SET_BITS(response_length, 19, 16)); 304 } 305} 306 307static inline unsigned 308brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc) 309{ 310 if (devinfo->ver >= 5) 311 return GET_BITS(desc, 28, 25); 312 else 313 return GET_BITS(desc, 23, 20); 314} 315 316static inline unsigned 317brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc) 318{ 319 if (devinfo->ver >= 5) 320 return GET_BITS(desc, 24, 20); 321 else 322 return GET_BITS(desc, 19, 16); 323} 324 325static inline bool 326brw_message_desc_header_present(ASSERTED 327 const struct intel_device_info *devinfo, 328 uint32_t desc) 329{ 330 assert(devinfo->ver >= 5); 331 return GET_BITS(desc, 19, 19); 332} 333 334static inline unsigned 335brw_message_ex_desc(UNUSED const struct intel_device_info *devinfo, 336 unsigned ex_msg_length) 337{ 338 return SET_BITS(ex_msg_length, 9, 6); 339} 340 341static inline unsigned 342brw_message_ex_desc_ex_mlen(UNUSED const struct intel_device_info *devinfo, 343 uint32_t ex_desc) 344{ 345 return GET_BITS(ex_desc, 9, 6); 346} 347 348static inline uint32_t 349brw_urb_desc(const struct intel_device_info *devinfo, 350 unsigned msg_type, 351 bool per_slot_offset_present, 352 bool channel_mask_present, 353 unsigned global_offset) 354{ 355 if (devinfo->ver >= 8) { 356 return (SET_BITS(per_slot_offset_present, 17, 17) | 357 SET_BITS(channel_mask_present, 15, 15) | 358 SET_BITS(global_offset, 14, 4) | 359 SET_BITS(msg_type, 3, 0)); 360 } else if (devinfo->ver >= 7) { 361 assert(!channel_mask_present); 362 return (SET_BITS(per_slot_offset_present, 16, 16) | 363 SET_BITS(global_offset, 13, 3) | 364 SET_BITS(msg_type, 3, 0)); 365 } else { 366 unreachable("unhandled URB write generation"); 367 } 368} 369 370static inline uint32_t 371brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo, 372 uint32_t desc) 373{ 374 assert(devinfo->ver >= 7); 375 return GET_BITS(desc, 3, 0); 376} 377 378static inline uint32_t 379brw_urb_fence_desc(const struct intel_device_info *devinfo) 380{ 381 assert(devinfo->has_lsc); 382 return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0); 383} 384 385/** 386 * Construct a message descriptor immediate with the specified sampler 387 * function controls. 388 */ 389static inline uint32_t 390brw_sampler_desc(const struct intel_device_info *devinfo, 391 unsigned binding_table_index, 392 unsigned sampler, 393 unsigned msg_type, 394 unsigned simd_mode, 395 unsigned return_format) 396{ 397 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) | 398 SET_BITS(sampler, 11, 8)); 399 if (devinfo->ver >= 7) 400 return (desc | SET_BITS(msg_type, 16, 12) | 401 SET_BITS(simd_mode, 18, 17)); 402 else if (devinfo->ver >= 5) 403 return (desc | SET_BITS(msg_type, 15, 12) | 404 SET_BITS(simd_mode, 17, 16)); 405 else if (devinfo->is_g4x) 406 return desc | SET_BITS(msg_type, 15, 12); 407 else 408 return (desc | SET_BITS(return_format, 13, 12) | 409 SET_BITS(msg_type, 15, 14)); 410} 411 412static inline unsigned 413brw_sampler_desc_binding_table_index(UNUSED 414 const struct intel_device_info *devinfo, 415 uint32_t desc) 416{ 417 return GET_BITS(desc, 7, 0); 418} 419 420static inline unsigned 421brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo, 422 uint32_t desc) 423{ 424 return GET_BITS(desc, 11, 8); 425} 426 427static inline unsigned 428brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) 429{ 430 if (devinfo->ver >= 7) 431 return GET_BITS(desc, 16, 12); 432 else if (devinfo->ver >= 5 || devinfo->is_g4x) 433 return GET_BITS(desc, 15, 12); 434 else 435 return GET_BITS(desc, 15, 14); 436} 437 438static inline unsigned 439brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo, 440 uint32_t desc) 441{ 442 assert(devinfo->ver >= 5); 443 if (devinfo->ver >= 7) 444 return GET_BITS(desc, 18, 17); 445 else 446 return GET_BITS(desc, 17, 16); 447} 448 449static inline unsigned 450brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo, 451 uint32_t desc) 452{ 453 assert(devinfo->ver == 4 && !devinfo->is_g4x); 454 return GET_BITS(desc, 13, 12); 455} 456 457/** 458 * Construct a message descriptor for the dataport 459 */ 460static inline uint32_t 461brw_dp_desc(const struct intel_device_info *devinfo, 462 unsigned binding_table_index, 463 unsigned msg_type, 464 unsigned msg_control) 465{ 466 /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc 467 * helpers instead. 468 */ 469 assert(devinfo->ver >= 6); 470 const unsigned desc = SET_BITS(binding_table_index, 7, 0); 471 if (devinfo->ver >= 8) { 472 return (desc | SET_BITS(msg_control, 13, 8) | 473 SET_BITS(msg_type, 18, 14)); 474 } else if (devinfo->ver >= 7) { 475 return (desc | SET_BITS(msg_control, 13, 8) | 476 SET_BITS(msg_type, 17, 14)); 477 } else { 478 return (desc | SET_BITS(msg_control, 12, 8) | 479 SET_BITS(msg_type, 16, 13)); 480 } 481} 482 483static inline unsigned 484brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo, 485 uint32_t desc) 486{ 487 return GET_BITS(desc, 7, 0); 488} 489 490static inline unsigned 491brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) 492{ 493 assert(devinfo->ver >= 6); 494 if (devinfo->ver >= 8) 495 return GET_BITS(desc, 18, 14); 496 else if (devinfo->ver >= 7) 497 return GET_BITS(desc, 17, 14); 498 else 499 return GET_BITS(desc, 16, 13); 500} 501 502static inline unsigned 503brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc) 504{ 505 assert(devinfo->ver >= 6); 506 if (devinfo->ver >= 7) 507 return GET_BITS(desc, 13, 8); 508 else 509 return GET_BITS(desc, 12, 8); 510} 511 512/** 513 * Construct a message descriptor immediate with the specified dataport read 514 * function controls. 515 */ 516static inline uint32_t 517brw_dp_read_desc(const struct intel_device_info *devinfo, 518 unsigned binding_table_index, 519 unsigned msg_control, 520 unsigned msg_type, 521 unsigned target_cache) 522{ 523 if (devinfo->ver >= 6) 524 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control); 525 else if (devinfo->ver >= 5 || devinfo->is_g4x) 526 return (SET_BITS(binding_table_index, 7, 0) | 527 SET_BITS(msg_control, 10, 8) | 528 SET_BITS(msg_type, 13, 11) | 529 SET_BITS(target_cache, 15, 14)); 530 else 531 return (SET_BITS(binding_table_index, 7, 0) | 532 SET_BITS(msg_control, 11, 8) | 533 SET_BITS(msg_type, 13, 12) | 534 SET_BITS(target_cache, 15, 14)); 535} 536 537static inline unsigned 538brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo, 539 uint32_t desc) 540{ 541 if (devinfo->ver >= 6) 542 return brw_dp_desc_msg_type(devinfo, desc); 543 else if (devinfo->ver >= 5 || devinfo->is_g4x) 544 return GET_BITS(desc, 13, 11); 545 else 546 return GET_BITS(desc, 13, 12); 547} 548 549static inline unsigned 550brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo, 551 uint32_t desc) 552{ 553 if (devinfo->ver >= 6) 554 return brw_dp_desc_msg_control(devinfo, desc); 555 else if (devinfo->ver >= 5 || devinfo->is_g4x) 556 return GET_BITS(desc, 10, 8); 557 else 558 return GET_BITS(desc, 11, 8); 559} 560 561/** 562 * Construct a message descriptor immediate with the specified dataport write 563 * function controls. 564 */ 565static inline uint32_t 566brw_dp_write_desc(const struct intel_device_info *devinfo, 567 unsigned binding_table_index, 568 unsigned msg_control, 569 unsigned msg_type, 570 unsigned send_commit_msg) 571{ 572 assert(devinfo->ver <= 6 || !send_commit_msg); 573 if (devinfo->ver >= 6) { 574 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) | 575 SET_BITS(send_commit_msg, 17, 17); 576 } else { 577 return (SET_BITS(binding_table_index, 7, 0) | 578 SET_BITS(msg_control, 11, 8) | 579 SET_BITS(msg_type, 14, 12) | 580 SET_BITS(send_commit_msg, 15, 15)); 581 } 582} 583 584static inline unsigned 585brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo, 586 uint32_t desc) 587{ 588 if (devinfo->ver >= 6) 589 return brw_dp_desc_msg_type(devinfo, desc); 590 else 591 return GET_BITS(desc, 14, 12); 592} 593 594static inline unsigned 595brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo, 596 uint32_t desc) 597{ 598 if (devinfo->ver >= 6) 599 return brw_dp_desc_msg_control(devinfo, desc); 600 else 601 return GET_BITS(desc, 11, 8); 602} 603 604static inline bool 605brw_dp_write_desc_write_commit(const struct intel_device_info *devinfo, 606 uint32_t desc) 607{ 608 assert(devinfo->ver <= 6); 609 if (devinfo->ver >= 6) 610 return GET_BITS(desc, 17, 17); 611 else 612 return GET_BITS(desc, 15, 15); 613} 614 615/** 616 * Construct a message descriptor immediate with the specified dataport 617 * surface function controls. 618 */ 619static inline uint32_t 620brw_dp_surface_desc(const struct intel_device_info *devinfo, 621 unsigned msg_type, 622 unsigned msg_control) 623{ 624 assert(devinfo->ver >= 7); 625 /* We'll OR in the binding table index later */ 626 return brw_dp_desc(devinfo, 0, msg_type, msg_control); 627} 628 629static inline uint32_t 630brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo, 631 unsigned exec_size, /**< 0 for SIMD4x2 */ 632 unsigned atomic_op, 633 bool response_expected) 634{ 635 assert(exec_size <= 8 || exec_size == 16); 636 637 unsigned msg_type; 638 if (devinfo->verx10 >= 75) { 639 if (exec_size > 0) { 640 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; 641 } else { 642 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; 643 } 644 } else { 645 msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP; 646 } 647 648 const unsigned msg_control = 649 SET_BITS(atomic_op, 3, 0) | 650 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) | 651 SET_BITS(response_expected, 5, 5); 652 653 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 654} 655 656static inline uint32_t 657brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo, 658 unsigned exec_size, 659 unsigned atomic_op, 660 bool response_expected) 661{ 662 assert(exec_size <= 8 || exec_size == 16); 663 assert(devinfo->ver >= 9); 664 665 assert(exec_size > 0); 666 const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP; 667 668 const unsigned msg_control = 669 SET_BITS(atomic_op, 1, 0) | 670 SET_BITS(exec_size <= 8, 4, 4) | 671 SET_BITS(response_expected, 5, 5); 672 673 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 674} 675 676static inline unsigned 677brw_mdc_cmask(unsigned num_channels) 678{ 679 /* See also MDC_CMASK in the SKL PRM Vol 2d. */ 680 return 0xf & (0xf << num_channels); 681} 682 683static inline unsigned 684lsc_cmask(unsigned num_channels) 685{ 686 assert(num_channels > 0 && num_channels <= 4); 687 return BITSET_MASK(num_channels); 688} 689 690static inline uint32_t 691brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo, 692 unsigned exec_size, /**< 0 for SIMD4x2 */ 693 unsigned num_channels, 694 bool write) 695{ 696 assert(exec_size <= 8 || exec_size == 16); 697 698 unsigned msg_type; 699 if (write) { 700 if (devinfo->verx10 >= 75) { 701 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE; 702 } else { 703 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE; 704 } 705 } else { 706 /* Read */ 707 if (devinfo->verx10 >= 75) { 708 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ; 709 } else { 710 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ; 711 } 712 } 713 714 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */ 715 if (write && devinfo->verx10 == 70 && exec_size == 0) 716 exec_size = 8; 717 718 /* See also MDC_SM3 in the SKL PRM Vol 2d. */ 719 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ 720 exec_size <= 8 ? 2 : 1; 721 722 const unsigned msg_control = 723 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 724 SET_BITS(simd_mode, 5, 4); 725 726 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 727} 728 729static inline unsigned 730brw_mdc_ds(unsigned bit_size) 731{ 732 switch (bit_size) { 733 case 8: 734 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE; 735 case 16: 736 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD; 737 case 32: 738 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD; 739 default: 740 unreachable("Unsupported bit_size for byte scattered messages"); 741 } 742} 743 744static inline uint32_t 745brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo, 746 unsigned exec_size, 747 unsigned bit_size, 748 bool write) 749{ 750 assert(exec_size <= 8 || exec_size == 16); 751 752 assert(devinfo->verx10 >= 75); 753 const unsigned msg_type = 754 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE : 755 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ; 756 757 assert(exec_size > 0); 758 const unsigned msg_control = 759 SET_BITS(exec_size == 16, 0, 0) | 760 SET_BITS(brw_mdc_ds(bit_size), 3, 2); 761 762 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 763} 764 765static inline uint32_t 766brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo, 767 unsigned exec_size, 768 bool write) 769{ 770 assert(exec_size == 8 || exec_size == 16); 771 772 unsigned msg_type; 773 if (write) { 774 if (devinfo->ver >= 6) { 775 msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE; 776 } else { 777 msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE; 778 } 779 } else { 780 if (devinfo->ver >= 7) { 781 msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ; 782 } else if (devinfo->ver > 4 || devinfo->is_g4x) { 783 msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; 784 } else { 785 msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; 786 } 787 } 788 789 const unsigned msg_control = 790 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */ 791 SET_BITS(exec_size == 16, 0, 0); 792 793 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 794} 795 796static inline uint32_t 797brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo, 798 bool align_16B, 799 unsigned num_dwords, 800 bool write) 801{ 802 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */ 803 assert(!write || align_16B); 804 805 const unsigned msg_type = 806 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE : 807 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ : 808 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ; 809 810 const unsigned msg_control = 811 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0); 812 813 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 814} 815 816static inline uint32_t 817brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo, 818 unsigned exec_size, /**< 0 for SIMD4x2 */ 819 unsigned num_channels, 820 bool write) 821{ 822 assert(exec_size <= 8 || exec_size == 16); 823 assert(devinfo->ver >= 8); 824 825 unsigned msg_type = 826 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE : 827 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ; 828 829 /* See also MDC_SM3 in the SKL PRM Vol 2d. */ 830 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ 831 exec_size <= 8 ? 2 : 1; 832 833 const unsigned msg_control = 834 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 835 SET_BITS(simd_mode, 5, 4); 836 837 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 838 msg_type, msg_control); 839} 840 841static inline uint32_t 842brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo, 843 bool align_16B, 844 unsigned num_dwords, 845 bool write) 846{ 847 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */ 848 assert(!write || align_16B); 849 850 unsigned msg_type = 851 write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE : 852 GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ; 853 854 unsigned msg_control = 855 SET_BITS(!align_16B, 4, 3) | 856 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0); 857 858 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 859 msg_type, msg_control); 860} 861 862/** 863 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the 864 * Skylake PRM). 865 */ 866static inline uint32_t 867brw_mdc_a64_ds(unsigned elems) 868{ 869 switch (elems) { 870 case 1: return 0; 871 case 2: return 1; 872 case 4: return 2; 873 case 8: return 3; 874 default: 875 unreachable("Unsupported elmeent count for A64 scattered message"); 876 } 877} 878 879static inline uint32_t 880brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo, 881 unsigned exec_size, /**< 0 for SIMD4x2 */ 882 unsigned bit_size, 883 bool write) 884{ 885 assert(exec_size <= 8 || exec_size == 16); 886 assert(devinfo->ver >= 8); 887 888 unsigned msg_type = 889 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE : 890 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ; 891 892 const unsigned msg_control = 893 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) | 894 SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) | 895 SET_BITS(exec_size == 16, 4, 4); 896 897 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 898 msg_type, msg_control); 899} 900 901static inline uint32_t 902brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo, 903 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */ 904 unsigned bit_size, 905 unsigned atomic_op, 906 bool response_expected) 907{ 908 assert(exec_size == 8); 909 assert(devinfo->ver >= 8); 910 assert(bit_size == 16 || bit_size == 32 || bit_size == 64); 911 assert(devinfo->ver >= 12 || bit_size >= 32); 912 913 const unsigned msg_type = bit_size == 16 ? 914 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP : 915 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP; 916 917 const unsigned msg_control = 918 SET_BITS(atomic_op, 3, 0) | 919 SET_BITS(bit_size == 64, 4, 4) | 920 SET_BITS(response_expected, 5, 5); 921 922 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 923 msg_type, msg_control); 924} 925 926static inline uint32_t 927brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo, 928 ASSERTED unsigned exec_size, 929 unsigned bit_size, 930 unsigned atomic_op, 931 bool response_expected) 932{ 933 assert(exec_size == 8); 934 assert(devinfo->ver >= 9); 935 assert(bit_size == 16 || bit_size == 32); 936 assert(devinfo->ver >= 12 || bit_size == 32); 937 938 assert(exec_size > 0); 939 const unsigned msg_type = bit_size == 32 ? 940 GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP : 941 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP; 942 943 const unsigned msg_control = 944 SET_BITS(atomic_op, 1, 0) | 945 SET_BITS(response_expected, 5, 5); 946 947 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 948 msg_type, msg_control); 949} 950 951static inline uint32_t 952brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo, 953 unsigned exec_size, 954 unsigned exec_group, 955 unsigned atomic_op, 956 bool response_expected) 957{ 958 assert(exec_size > 0 || exec_group == 0); 959 assert(exec_group % 8 == 0); 960 961 unsigned msg_type; 962 if (devinfo->verx10 >= 75) { 963 if (exec_size == 0) { 964 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2; 965 } else { 966 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP; 967 } 968 } else { 969 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */ 970 assert(exec_size > 0); 971 msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP; 972 } 973 974 const bool high_sample_mask = (exec_group / 8) % 2 == 1; 975 976 const unsigned msg_control = 977 SET_BITS(atomic_op, 3, 0) | 978 SET_BITS(high_sample_mask, 4, 4) | 979 SET_BITS(response_expected, 5, 5); 980 981 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 982} 983 984static inline uint32_t 985brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo, 986 unsigned exec_size, 987 unsigned exec_group, 988 unsigned num_channels, 989 bool write) 990{ 991 assert(exec_size > 0 || exec_group == 0); 992 assert(exec_group % 8 == 0); 993 994 /* Typed surface reads and writes don't support SIMD16 */ 995 assert(exec_size <= 8); 996 997 unsigned msg_type; 998 if (write) { 999 if (devinfo->verx10 >= 75) { 1000 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE; 1001 } else { 1002 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE; 1003 } 1004 } else { 1005 if (devinfo->verx10 >= 75) { 1006 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ; 1007 } else { 1008 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ; 1009 } 1010 } 1011 1012 /* See also MDC_SG3 in the SKL PRM Vol 2d. */ 1013 unsigned msg_control; 1014 if (devinfo->verx10 >= 75) { 1015 /* See also MDC_SG3 in the SKL PRM Vol 2d. */ 1016 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */ 1017 1 + ((exec_group / 8) % 2); 1018 1019 msg_control = 1020 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 1021 SET_BITS(slot_group, 5, 4); 1022 } else { 1023 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */ 1024 assert(exec_size > 0); 1025 const unsigned slot_group = ((exec_group / 8) % 2); 1026 1027 msg_control = 1028 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 1029 SET_BITS(slot_group, 5, 5); 1030 } 1031 1032 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 1033} 1034 1035static inline uint32_t 1036brw_fb_desc(const struct intel_device_info *devinfo, 1037 unsigned binding_table_index, 1038 unsigned msg_type, 1039 unsigned msg_control) 1040{ 1041 /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc 1042 * helpers instead. 1043 */ 1044 assert(devinfo->ver >= 6); 1045 const unsigned desc = SET_BITS(binding_table_index, 7, 0); 1046 if (devinfo->ver >= 7) { 1047 return (desc | SET_BITS(msg_control, 13, 8) | 1048 SET_BITS(msg_type, 17, 14)); 1049 } else { 1050 return (desc | SET_BITS(msg_control, 12, 8) | 1051 SET_BITS(msg_type, 16, 13)); 1052 } 1053} 1054 1055static inline unsigned 1056brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo, 1057 uint32_t desc) 1058{ 1059 return GET_BITS(desc, 7, 0); 1060} 1061 1062static inline uint32_t 1063brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc) 1064{ 1065 assert(devinfo->ver >= 6); 1066 if (devinfo->ver >= 7) 1067 return GET_BITS(desc, 13, 8); 1068 else 1069 return GET_BITS(desc, 12, 8); 1070} 1071 1072static inline unsigned 1073brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) 1074{ 1075 assert(devinfo->ver >= 6); 1076 if (devinfo->ver >= 7) 1077 return GET_BITS(desc, 17, 14); 1078 else 1079 return GET_BITS(desc, 16, 13); 1080} 1081 1082static inline uint32_t 1083brw_fb_read_desc(const struct intel_device_info *devinfo, 1084 unsigned binding_table_index, 1085 unsigned msg_control, 1086 unsigned exec_size, 1087 bool per_sample) 1088{ 1089 assert(devinfo->ver >= 9); 1090 assert(exec_size == 8 || exec_size == 16); 1091 1092 return brw_fb_desc(devinfo, binding_table_index, 1093 GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) | 1094 SET_BITS(per_sample, 13, 13) | 1095 SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */; 1096} 1097 1098static inline uint32_t 1099brw_fb_write_desc(const struct intel_device_info *devinfo, 1100 unsigned binding_table_index, 1101 unsigned msg_control, 1102 bool last_render_target, 1103 bool coarse_write) 1104{ 1105 const unsigned msg_type = 1106 devinfo->ver >= 6 ? 1107 GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE : 1108 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1109 1110 assert(devinfo->ver >= 10 || !coarse_write); 1111 1112 if (devinfo->ver >= 6) { 1113 return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) | 1114 SET_BITS(last_render_target, 12, 12) | 1115 SET_BITS(coarse_write, 18, 18); 1116 } else { 1117 return (SET_BITS(binding_table_index, 7, 0) | 1118 SET_BITS(msg_control, 11, 8) | 1119 SET_BITS(last_render_target, 11, 11) | 1120 SET_BITS(msg_type, 14, 12)); 1121 } 1122} 1123 1124static inline unsigned 1125brw_fb_write_desc_msg_type(const struct intel_device_info *devinfo, 1126 uint32_t desc) 1127{ 1128 if (devinfo->ver >= 6) 1129 return brw_fb_desc_msg_type(devinfo, desc); 1130 else 1131 return GET_BITS(desc, 14, 12); 1132} 1133 1134static inline unsigned 1135brw_fb_write_desc_msg_control(const struct intel_device_info *devinfo, 1136 uint32_t desc) 1137{ 1138 if (devinfo->ver >= 6) 1139 return brw_fb_desc_msg_control(devinfo, desc); 1140 else 1141 return GET_BITS(desc, 11, 8); 1142} 1143 1144static inline bool 1145brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo, 1146 uint32_t desc) 1147{ 1148 if (devinfo->ver >= 6) 1149 return GET_BITS(desc, 12, 12); 1150 else 1151 return GET_BITS(desc, 11, 11); 1152} 1153 1154static inline bool 1155brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo, 1156 uint32_t desc) 1157{ 1158 assert(devinfo->ver <= 6); 1159 if (devinfo->ver >= 6) 1160 return GET_BITS(desc, 17, 17); 1161 else 1162 return GET_BITS(desc, 15, 15); 1163} 1164 1165static inline bool 1166brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, 1167 uint32_t desc) 1168{ 1169 assert(devinfo->ver >= 10); 1170 return GET_BITS(desc, 18, 18); 1171} 1172 1173static inline bool 1174lsc_opcode_has_cmask(enum lsc_opcode opcode) 1175{ 1176 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK; 1177} 1178 1179static inline bool 1180lsc_opcode_has_transpose(enum lsc_opcode opcode) 1181{ 1182 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE; 1183} 1184 1185static inline uint32_t 1186lsc_data_size_bytes(enum lsc_data_size data_size) 1187{ 1188 switch (data_size) { 1189 case LSC_DATA_SIZE_D8: 1190 return 1; 1191 case LSC_DATA_SIZE_D16: 1192 return 2; 1193 case LSC_DATA_SIZE_D32: 1194 case LSC_DATA_SIZE_D8U32: 1195 case LSC_DATA_SIZE_D16U32: 1196 case LSC_DATA_SIZE_D16BF32: 1197 return 4; 1198 case LSC_DATA_SIZE_D64: 1199 return 8; 1200 default: 1201 unreachable("Unsupported data payload size."); 1202 } 1203} 1204 1205static inline uint32_t 1206lsc_addr_size_bytes(enum lsc_addr_size addr_size) 1207{ 1208 switch (addr_size) { 1209 case LSC_ADDR_SIZE_A16: return 2; 1210 case LSC_ADDR_SIZE_A32: return 4; 1211 case LSC_ADDR_SIZE_A64: return 8; 1212 default: 1213 unreachable("Unsupported address size."); 1214 } 1215} 1216 1217static inline uint32_t 1218lsc_vector_length(enum lsc_vect_size vect_size) 1219{ 1220 switch (vect_size) { 1221 case LSC_VECT_SIZE_V1: return 1; 1222 case LSC_VECT_SIZE_V2: return 2; 1223 case LSC_VECT_SIZE_V3: return 3; 1224 case LSC_VECT_SIZE_V4: return 4; 1225 case LSC_VECT_SIZE_V8: return 8; 1226 case LSC_VECT_SIZE_V16: return 16; 1227 case LSC_VECT_SIZE_V32: return 32; 1228 case LSC_VECT_SIZE_V64: return 64; 1229 default: 1230 unreachable("Unsupported size of vector"); 1231 } 1232} 1233 1234static inline enum lsc_vect_size 1235lsc_vect_size(unsigned vect_size) 1236{ 1237 switch(vect_size) { 1238 case 1: return LSC_VECT_SIZE_V1; 1239 case 2: return LSC_VECT_SIZE_V2; 1240 case 3: return LSC_VECT_SIZE_V3; 1241 case 4: return LSC_VECT_SIZE_V4; 1242 case 8: return LSC_VECT_SIZE_V8; 1243 case 16: return LSC_VECT_SIZE_V16; 1244 case 32: return LSC_VECT_SIZE_V32; 1245 case 64: return LSC_VECT_SIZE_V64; 1246 default: 1247 unreachable("Unsupported vector size for dataport"); 1248 } 1249} 1250 1251static inline uint32_t 1252lsc_msg_desc(UNUSED const struct intel_device_info *devinfo, 1253 enum lsc_opcode opcode, unsigned simd_size, 1254 enum lsc_addr_surface_type addr_type, 1255 enum lsc_addr_size addr_sz, unsigned num_coordinates, 1256 enum lsc_data_size data_sz, unsigned num_channels, 1257 bool transpose, unsigned cache_ctrl, bool has_dest) 1258{ 1259 assert(devinfo->has_lsc); 1260 1261 unsigned dest_length = !has_dest ? 0 : 1262 DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size, 1263 REG_SIZE); 1264 1265 unsigned src0_length = 1266 DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size, 1267 REG_SIZE); 1268 1269 assert(!transpose || lsc_opcode_has_transpose(opcode)); 1270 1271 unsigned msg_desc = 1272 SET_BITS(opcode, 5, 0) | 1273 SET_BITS(addr_sz, 8, 7) | 1274 SET_BITS(data_sz, 11, 9) | 1275 SET_BITS(transpose, 15, 15) | 1276 SET_BITS(cache_ctrl, 19, 17) | 1277 SET_BITS(dest_length, 24, 20) | 1278 SET_BITS(src0_length, 28, 25) | 1279 SET_BITS(addr_type, 30, 29); 1280 1281 if (lsc_opcode_has_cmask(opcode)) 1282 msg_desc |= SET_BITS(lsc_cmask(num_channels), 15, 12); 1283 else 1284 msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12); 1285 1286 return msg_desc; 1287} 1288 1289static inline enum lsc_opcode 1290lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo, 1291 uint32_t desc) 1292{ 1293 assert(devinfo->has_lsc); 1294 return (enum lsc_opcode) GET_BITS(desc, 5, 0); 1295} 1296 1297static inline enum lsc_addr_size 1298lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo, 1299 uint32_t desc) 1300{ 1301 assert(devinfo->has_lsc); 1302 return (enum lsc_addr_size) GET_BITS(desc, 8, 7); 1303} 1304 1305static inline enum lsc_data_size 1306lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo, 1307 uint32_t desc) 1308{ 1309 assert(devinfo->has_lsc); 1310 return (enum lsc_data_size) GET_BITS(desc, 11, 9); 1311} 1312 1313static inline enum lsc_vect_size 1314lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo, 1315 uint32_t desc) 1316{ 1317 assert(devinfo->has_lsc); 1318 assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc))); 1319 return (enum lsc_vect_size) GET_BITS(desc, 14, 12); 1320} 1321 1322static inline enum lsc_cmask 1323lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo, 1324 uint32_t desc) 1325{ 1326 assert(devinfo->has_lsc); 1327 assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc))); 1328 return (enum lsc_cmask) GET_BITS(desc, 15, 12); 1329} 1330 1331static inline bool 1332lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo, 1333 uint32_t desc) 1334{ 1335 assert(devinfo->has_lsc); 1336 return GET_BITS(desc, 15, 15); 1337} 1338 1339static inline unsigned 1340lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo, 1341 uint32_t desc) 1342{ 1343 assert(devinfo->has_lsc); 1344 return GET_BITS(desc, 19, 17); 1345} 1346 1347static inline unsigned 1348lsc_msg_desc_dest_len(const struct intel_device_info *devinfo, 1349 uint32_t desc) 1350{ 1351 assert(devinfo->has_lsc); 1352 return GET_BITS(desc, 24, 20); 1353} 1354 1355static inline unsigned 1356lsc_msg_desc_src0_len(const struct intel_device_info *devinfo, 1357 uint32_t desc) 1358{ 1359 assert(devinfo->has_lsc); 1360 return GET_BITS(desc, 28, 25); 1361} 1362 1363static inline enum lsc_addr_surface_type 1364lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo, 1365 uint32_t desc) 1366{ 1367 assert(devinfo->has_lsc); 1368 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29); 1369} 1370 1371static inline uint32_t 1372lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo, 1373 enum lsc_fence_scope scope, 1374 enum lsc_flush_type flush_type, 1375 bool route_to_lsc) 1376{ 1377 assert(devinfo->has_lsc); 1378 return SET_BITS(LSC_OP_FENCE, 5, 0) | 1379 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) | 1380 SET_BITS(scope, 11, 9) | 1381 SET_BITS(flush_type, 14, 12) | 1382 SET_BITS(route_to_lsc, 18, 18) | 1383 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29); 1384} 1385 1386static inline enum lsc_fence_scope 1387lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo, 1388 uint32_t desc) 1389{ 1390 assert(devinfo->has_lsc); 1391 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9); 1392} 1393 1394static inline enum lsc_flush_type 1395lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo, 1396 uint32_t desc) 1397{ 1398 assert(devinfo->has_lsc); 1399 return (enum lsc_flush_type) GET_BITS(desc, 14, 12); 1400} 1401 1402static inline enum lsc_backup_fence_routing 1403lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo, 1404 uint32_t desc) 1405{ 1406 assert(devinfo->has_lsc); 1407 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18); 1408} 1409 1410static inline uint32_t 1411lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti) 1412{ 1413 assert(devinfo->has_lsc); 1414 return SET_BITS(bti, 31, 24) | 1415 SET_BITS(0, 23, 12); /* base offset */ 1416} 1417 1418static inline unsigned 1419lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo, 1420 uint32_t ex_desc) 1421{ 1422 assert(devinfo->has_lsc); 1423 return GET_BITS(ex_desc, 23, 12); 1424} 1425 1426static inline unsigned 1427lsc_bti_ex_desc_index(const struct intel_device_info *devinfo, 1428 uint32_t ex_desc) 1429{ 1430 assert(devinfo->has_lsc); 1431 return GET_BITS(ex_desc, 31, 24); 1432} 1433 1434static inline unsigned 1435lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo, 1436 uint32_t ex_desc) 1437{ 1438 assert(devinfo->has_lsc); 1439 return GET_BITS(ex_desc, 31, 12); 1440} 1441 1442static inline uint32_t 1443lsc_bss_ex_desc(const struct intel_device_info *devinfo, 1444 unsigned surface_state_index) 1445{ 1446 assert(devinfo->has_lsc); 1447 return SET_BITS(surface_state_index, 31, 6); 1448} 1449 1450static inline unsigned 1451lsc_bss_ex_desc_index(const struct intel_device_info *devinfo, 1452 uint32_t ex_desc) 1453{ 1454 assert(devinfo->has_lsc); 1455 return GET_BITS(ex_desc, 31, 6); 1456} 1457 1458static inline uint32_t 1459brw_mdc_sm2(unsigned exec_size) 1460{ 1461 assert(exec_size == 8 || exec_size == 16); 1462 return exec_size > 8; 1463} 1464 1465static inline uint32_t 1466brw_mdc_sm2_exec_size(uint32_t sm2) 1467{ 1468 assert(sm2 <= 1); 1469 return 8 << sm2; 1470} 1471 1472static inline uint32_t 1473brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo, 1474 unsigned exec_size, unsigned msg_type) 1475{ 1476 assert(devinfo->has_ray_tracing); 1477 1478 return SET_BITS(0, 19, 19) | /* No header */ 1479 SET_BITS(msg_type, 17, 14) | 1480 SET_BITS(brw_mdc_sm2(exec_size), 8, 8); 1481} 1482 1483static inline uint32_t 1484brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo, 1485 uint32_t desc) 1486{ 1487 return GET_BITS(desc, 17, 14); 1488} 1489 1490static inline uint32_t 1491brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo, 1492 uint32_t desc) 1493{ 1494 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); 1495} 1496 1497static inline uint32_t 1498brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo, 1499 unsigned exec_size) 1500{ 1501 assert(devinfo->has_ray_tracing); 1502 1503 return SET_BITS(0, 19, 19) | /* No header */ 1504 SET_BITS(0, 17, 14) | /* Message type */ 1505 SET_BITS(brw_mdc_sm2(exec_size), 8, 8); 1506} 1507 1508static inline uint32_t 1509brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo, 1510 uint32_t desc) 1511{ 1512 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); 1513} 1514 1515/** 1516 * Construct a message descriptor immediate with the specified pixel 1517 * interpolator function controls. 1518 */ 1519static inline uint32_t 1520brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, 1521 unsigned msg_type, 1522 bool noperspective, 1523 bool coarse_pixel_rate, 1524 unsigned simd_mode, 1525 unsigned slot_group) 1526{ 1527 assert(devinfo->ver >= 10 || !coarse_pixel_rate); 1528 return (SET_BITS(slot_group, 11, 11) | 1529 SET_BITS(msg_type, 13, 12) | 1530 SET_BITS(!!noperspective, 14, 14) | 1531 SET_BITS(coarse_pixel_rate, 15, 15) | 1532 SET_BITS(simd_mode, 16, 16)); 1533} 1534 1535void brw_urb_WRITE(struct brw_codegen *p, 1536 struct brw_reg dest, 1537 unsigned msg_reg_nr, 1538 struct brw_reg src0, 1539 enum brw_urb_write_flags flags, 1540 unsigned msg_length, 1541 unsigned response_length, 1542 unsigned offset, 1543 unsigned swizzle); 1544 1545/** 1546 * Send message to shared unit \p sfid with a possibly indirect descriptor \p 1547 * desc. If \p desc is not an immediate it will be transparently loaded to an 1548 * address register using an OR instruction. 1549 */ 1550void 1551brw_send_indirect_message(struct brw_codegen *p, 1552 unsigned sfid, 1553 struct brw_reg dst, 1554 struct brw_reg payload, 1555 struct brw_reg desc, 1556 unsigned desc_imm, 1557 bool eot); 1558 1559void 1560brw_send_indirect_split_message(struct brw_codegen *p, 1561 unsigned sfid, 1562 struct brw_reg dst, 1563 struct brw_reg payload0, 1564 struct brw_reg payload1, 1565 struct brw_reg desc, 1566 unsigned desc_imm, 1567 struct brw_reg ex_desc, 1568 unsigned ex_desc_imm, 1569 bool eot); 1570 1571void brw_ff_sync(struct brw_codegen *p, 1572 struct brw_reg dest, 1573 unsigned msg_reg_nr, 1574 struct brw_reg src0, 1575 bool allocate, 1576 unsigned response_length, 1577 bool eot); 1578 1579void brw_svb_write(struct brw_codegen *p, 1580 struct brw_reg dest, 1581 unsigned msg_reg_nr, 1582 struct brw_reg src0, 1583 unsigned binding_table_index, 1584 bool send_commit_msg); 1585 1586brw_inst *brw_fb_WRITE(struct brw_codegen *p, 1587 struct brw_reg payload, 1588 struct brw_reg implied_header, 1589 unsigned msg_control, 1590 unsigned binding_table_index, 1591 unsigned msg_length, 1592 unsigned response_length, 1593 bool eot, 1594 bool last_render_target, 1595 bool header_present); 1596 1597brw_inst *gfx9_fb_READ(struct brw_codegen *p, 1598 struct brw_reg dst, 1599 struct brw_reg payload, 1600 unsigned binding_table_index, 1601 unsigned msg_length, 1602 unsigned response_length, 1603 bool per_sample); 1604 1605void brw_SAMPLE(struct brw_codegen *p, 1606 struct brw_reg dest, 1607 unsigned msg_reg_nr, 1608 struct brw_reg src0, 1609 unsigned binding_table_index, 1610 unsigned sampler, 1611 unsigned msg_type, 1612 unsigned response_length, 1613 unsigned msg_length, 1614 unsigned header_present, 1615 unsigned simd_mode, 1616 unsigned return_format); 1617 1618void brw_adjust_sampler_state_pointer(struct brw_codegen *p, 1619 struct brw_reg header, 1620 struct brw_reg sampler_index); 1621 1622void gfx4_math(struct brw_codegen *p, 1623 struct brw_reg dest, 1624 unsigned function, 1625 unsigned msg_reg_nr, 1626 struct brw_reg src, 1627 unsigned precision ); 1628 1629void gfx6_math(struct brw_codegen *p, 1630 struct brw_reg dest, 1631 unsigned function, 1632 struct brw_reg src0, 1633 struct brw_reg src1); 1634 1635void brw_oword_block_read(struct brw_codegen *p, 1636 struct brw_reg dest, 1637 struct brw_reg mrf, 1638 uint32_t offset, 1639 uint32_t bind_table_index); 1640 1641unsigned brw_scratch_surface_idx(const struct brw_codegen *p); 1642 1643void brw_oword_block_read_scratch(struct brw_codegen *p, 1644 struct brw_reg dest, 1645 struct brw_reg mrf, 1646 int num_regs, 1647 unsigned offset); 1648 1649void brw_oword_block_write_scratch(struct brw_codegen *p, 1650 struct brw_reg mrf, 1651 int num_regs, 1652 unsigned offset); 1653 1654void gfx7_block_read_scratch(struct brw_codegen *p, 1655 struct brw_reg dest, 1656 int num_regs, 1657 unsigned offset); 1658 1659void brw_shader_time_add(struct brw_codegen *p, 1660 struct brw_reg payload, 1661 uint32_t surf_index); 1662 1663/** 1664 * Return the generation-specific jump distance scaling factor. 1665 * 1666 * Given the number of instructions to jump, we need to scale by 1667 * some number to obtain the actual jump distance to program in an 1668 * instruction. 1669 */ 1670static inline unsigned 1671brw_jump_scale(const struct intel_device_info *devinfo) 1672{ 1673 /* Broadwell measures jump targets in bytes. */ 1674 if (devinfo->ver >= 8) 1675 return 16; 1676 1677 /* Ironlake and later measure jump targets in 64-bit data chunks (in order 1678 * (to support compaction), so each 128-bit instruction requires 2 chunks. 1679 */ 1680 if (devinfo->ver >= 5) 1681 return 2; 1682 1683 /* Gfx4 simply uses the number of 128-bit instructions. */ 1684 return 1; 1685} 1686 1687void brw_barrier(struct brw_codegen *p, struct brw_reg src); 1688 1689/* If/else/endif. Works by manipulating the execution flags on each 1690 * channel. 1691 */ 1692brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size); 1693brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, 1694 struct brw_reg src0, struct brw_reg src1); 1695 1696void brw_ELSE(struct brw_codegen *p); 1697void brw_ENDIF(struct brw_codegen *p); 1698 1699/* DO/WHILE loops: 1700 */ 1701brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size); 1702 1703brw_inst *brw_WHILE(struct brw_codegen *p); 1704 1705brw_inst *brw_BREAK(struct brw_codegen *p); 1706brw_inst *brw_CONT(struct brw_codegen *p); 1707brw_inst *brw_HALT(struct brw_codegen *p); 1708 1709/* Forward jumps: 1710 */ 1711void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx); 1712 1713brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index, 1714 unsigned predicate_control); 1715 1716void brw_NOP(struct brw_codegen *p); 1717 1718void brw_WAIT(struct brw_codegen *p); 1719 1720void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func); 1721 1722/* Special case: there is never a destination, execution size will be 1723 * taken from src0: 1724 */ 1725void brw_CMP(struct brw_codegen *p, 1726 struct brw_reg dest, 1727 unsigned conditional, 1728 struct brw_reg src0, 1729 struct brw_reg src1); 1730 1731void brw_CMPN(struct brw_codegen *p, 1732 struct brw_reg dest, 1733 unsigned conditional, 1734 struct brw_reg src0, 1735 struct brw_reg src1); 1736 1737void 1738brw_untyped_atomic(struct brw_codegen *p, 1739 struct brw_reg dst, 1740 struct brw_reg payload, 1741 struct brw_reg surface, 1742 unsigned atomic_op, 1743 unsigned msg_length, 1744 bool response_expected, 1745 bool header_present); 1746 1747void 1748brw_untyped_surface_read(struct brw_codegen *p, 1749 struct brw_reg dst, 1750 struct brw_reg payload, 1751 struct brw_reg surface, 1752 unsigned msg_length, 1753 unsigned num_channels); 1754 1755void 1756brw_untyped_surface_write(struct brw_codegen *p, 1757 struct brw_reg payload, 1758 struct brw_reg surface, 1759 unsigned msg_length, 1760 unsigned num_channels, 1761 bool header_present); 1762 1763void 1764brw_memory_fence(struct brw_codegen *p, 1765 struct brw_reg dst, 1766 struct brw_reg src, 1767 enum opcode send_op, 1768 enum brw_message_target sfid, 1769 bool commit_enable, 1770 unsigned bti); 1771 1772void 1773brw_pixel_interpolator_query(struct brw_codegen *p, 1774 struct brw_reg dest, 1775 struct brw_reg mrf, 1776 bool noperspective, 1777 bool coarse_pixel_rate, 1778 unsigned mode, 1779 struct brw_reg data, 1780 unsigned msg_length, 1781 unsigned response_length); 1782 1783void 1784brw_find_live_channel(struct brw_codegen *p, 1785 struct brw_reg dst, 1786 struct brw_reg mask); 1787 1788void 1789brw_broadcast(struct brw_codegen *p, 1790 struct brw_reg dst, 1791 struct brw_reg src, 1792 struct brw_reg idx); 1793 1794void 1795brw_float_controls_mode(struct brw_codegen *p, 1796 unsigned mode, unsigned mask); 1797 1798void 1799brw_update_reloc_imm(const struct intel_device_info *devinfo, 1800 brw_inst *inst, 1801 uint32_t value); 1802 1803void 1804brw_MOV_reloc_imm(struct brw_codegen *p, 1805 struct brw_reg dst, 1806 enum brw_reg_type src_type, 1807 uint32_t id); 1808 1809/*********************************************************************** 1810 * brw_eu_util.c: 1811 */ 1812 1813void brw_copy_indirect_to_indirect(struct brw_codegen *p, 1814 struct brw_indirect dst_ptr, 1815 struct brw_indirect src_ptr, 1816 unsigned count); 1817 1818void brw_copy_from_indirect(struct brw_codegen *p, 1819 struct brw_reg dst, 1820 struct brw_indirect ptr, 1821 unsigned count); 1822 1823void brw_copy4(struct brw_codegen *p, 1824 struct brw_reg dst, 1825 struct brw_reg src, 1826 unsigned count); 1827 1828void brw_copy8(struct brw_codegen *p, 1829 struct brw_reg dst, 1830 struct brw_reg src, 1831 unsigned count); 1832 1833void brw_math_invert( struct brw_codegen *p, 1834 struct brw_reg dst, 1835 struct brw_reg src); 1836 1837void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg); 1838 1839void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn, 1840 unsigned desc, unsigned ex_desc); 1841 1842static inline void 1843brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc) 1844{ 1845 brw_set_desc_ex(p, insn, desc, 0); 1846} 1847 1848void brw_set_uip_jip(struct brw_codegen *p, int start_offset); 1849 1850enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod); 1851enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod); 1852 1853/* brw_eu_compact.c */ 1854void brw_compact_instructions(struct brw_codegen *p, int start_offset, 1855 struct disasm_info *disasm); 1856void brw_uncompact_instruction(const struct intel_device_info *devinfo, 1857 brw_inst *dst, brw_compact_inst *src); 1858bool brw_try_compact_instruction(const struct intel_device_info *devinfo, 1859 brw_compact_inst *dst, const brw_inst *src); 1860 1861void brw_debug_compact_uncompact(const struct intel_device_info *devinfo, 1862 brw_inst *orig, brw_inst *uncompacted); 1863 1864/* brw_eu_validate.c */ 1865bool brw_validate_instruction(const struct intel_device_info *devinfo, 1866 const brw_inst *inst, int offset, 1867 struct disasm_info *disasm); 1868bool brw_validate_instructions(const struct intel_device_info *devinfo, 1869 const void *assembly, int start_offset, int end_offset, 1870 struct disasm_info *disasm); 1871 1872static inline int 1873next_offset(const struct intel_device_info *devinfo, void *store, int offset) 1874{ 1875 brw_inst *insn = (brw_inst *)((char *)store + offset); 1876 1877 if (brw_inst_cmpt_control(devinfo, insn)) 1878 return offset + 8; 1879 else 1880 return offset + 16; 1881} 1882 1883struct opcode_desc { 1884 unsigned ir; 1885 unsigned hw; 1886 const char *name; 1887 int nsrc; 1888 int ndst; 1889 int gfx_vers; 1890}; 1891 1892const struct opcode_desc * 1893brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode); 1894 1895const struct opcode_desc * 1896brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw); 1897 1898static inline unsigned 1899brw_opcode_encode(const struct intel_device_info *devinfo, enum opcode opcode) 1900{ 1901 return brw_opcode_desc(devinfo, opcode)->hw; 1902} 1903 1904static inline enum opcode 1905brw_opcode_decode(const struct intel_device_info *devinfo, unsigned hw) 1906{ 1907 const struct opcode_desc *desc = brw_opcode_desc_from_hw(devinfo, hw); 1908 return desc ? (enum opcode)desc->ir : BRW_OPCODE_ILLEGAL; 1909} 1910 1911static inline void 1912brw_inst_set_opcode(const struct intel_device_info *devinfo, 1913 brw_inst *inst, enum opcode opcode) 1914{ 1915 brw_inst_set_hw_opcode(devinfo, inst, brw_opcode_encode(devinfo, opcode)); 1916} 1917 1918static inline enum opcode 1919brw_inst_opcode(const struct intel_device_info *devinfo, const brw_inst *inst) 1920{ 1921 return brw_opcode_decode(devinfo, brw_inst_hw_opcode(devinfo, inst)); 1922} 1923 1924static inline bool 1925is_3src(const struct intel_device_info *devinfo, enum opcode opcode) 1926{ 1927 const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); 1928 return desc && desc->nsrc == 3; 1929} 1930 1931/** Maximum SEND message length */ 1932#define BRW_MAX_MSG_LENGTH 15 1933 1934/** First MRF register used by pull loads */ 1935#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13) 1936 1937/** First MRF register used by spills */ 1938#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13) 1939 1940#ifdef __cplusplus 1941} 1942#endif 1943 1944#endif 1945