v3d_compiler.h revision ed98bd31
1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef V3D_COMPILER_H 25#define V3D_COMPILER_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "common/v3d_debug.h" 36#include "common/v3d_device_info.h" 37#include "common/v3d_limits.h" 38#include "compiler/nir/nir.h" 39#include "util/list.h" 40#include "util/u_math.h" 41 42#include "qpu/qpu_instr.h" 43#include "pipe/p_state.h" 44 45struct nir_builder; 46 47struct v3d_fs_inputs { 48 /** 49 * Array of the meanings of the VPM inputs this shader needs. 50 * 51 * It doesn't include those that aren't part of the VPM, like 52 * point/line coordinates. 53 */ 54 struct v3d_varying_slot *input_slots; 55 uint32_t num_inputs; 56}; 57 58enum qfile { 59 /** An unused source or destination register. */ 60 QFILE_NULL, 61 62 /** A physical register, such as the W coordinate payload. */ 63 QFILE_REG, 64 /** One of the regsiters for fixed function interactions. */ 65 QFILE_MAGIC, 66 67 /** 68 * A virtual register, that will be allocated to actual accumulator 69 * or physical registers later. 70 */ 71 QFILE_TEMP, 72 73 /** 74 * VPM reads use this with an index value to say what part of the VPM 75 * is being read. 76 */ 77 QFILE_VPM, 78 79 /** 80 * Stores an immediate value in the index field that will be used 81 * directly by qpu_load_imm(). 82 */ 83 QFILE_LOAD_IMM, 84 85 /** 86 * Stores an immediate value in the index field that can be turned 87 * into a small immediate field by qpu_encode_small_immediate(). 88 */ 89 QFILE_SMALL_IMM, 90}; 91 92/** 93 * A reference to a QPU register or a virtual temp register. 94 */ 95struct qreg { 96 enum qfile file; 97 uint32_t index; 98}; 99 100static inline struct qreg vir_reg(enum qfile file, uint32_t index) 101{ 102 return (struct qreg){file, index}; 103} 104 105static inline struct qreg vir_magic_reg(uint32_t index) 106{ 107 return (struct qreg){QFILE_MAGIC, index}; 108} 109 110static inline struct qreg vir_nop_reg(void) 111{ 112 return (struct qreg){QFILE_NULL, 0}; 113} 114 115/** 116 * A reference to an actual register at the QPU level, for register 117 * allocation. 118 */ 119struct qpu_reg { 120 bool magic; 121 bool smimm; 122 int index; 123}; 124 125struct qinst { 126 /** Entry in qblock->instructions */ 127 struct list_head link; 128 129 /** 130 * The instruction being wrapped. Its condition codes, pack flags, 131 * signals, etc. will all be used, with just the register references 132 * being replaced by the contents of qinst->dst and qinst->src[]. 133 */ 134 struct v3d_qpu_instr qpu; 135 136 /* Pre-register-allocation references to src/dst registers */ 137 struct qreg dst; 138 struct qreg src[3]; 139 bool is_last_thrsw; 140 141 /* If the instruction reads a uniform (other than through src[i].file 142 * == QFILE_UNIF), that uniform's index in c->uniform_contents. ~0 143 * otherwise. 144 */ 145 int uniform; 146}; 147 148enum quniform_contents { 149 /** 150 * Indicates that a constant 32-bit value is copied from the program's 151 * uniform contents. 152 */ 153 QUNIFORM_CONSTANT, 154 /** 155 * Indicates that the program's uniform contents are used as an index 156 * into the GL uniform storage. 157 */ 158 QUNIFORM_UNIFORM, 159 160 /** @{ 161 * Scaling factors from clip coordinates to relative to the viewport 162 * center. 163 * 164 * This is used by the coordinate and vertex shaders to produce the 165 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 166 * point offsets from the viewport ccenter. 167 */ 168 QUNIFORM_VIEWPORT_X_SCALE, 169 QUNIFORM_VIEWPORT_Y_SCALE, 170 /** @} */ 171 172 QUNIFORM_VIEWPORT_Z_OFFSET, 173 QUNIFORM_VIEWPORT_Z_SCALE, 174 175 QUNIFORM_USER_CLIP_PLANE, 176 177 /** 178 * A reference to a V3D 3.x texture config parameter 0 uniform. 179 * 180 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 181 * defines texture type, miplevels, and such. It will be found as a 182 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 183 */ 184 QUNIFORM_TEXTURE_CONFIG_P0_0, 185 QUNIFORM_TEXTURE_CONFIG_P0_1, 186 QUNIFORM_TEXTURE_CONFIG_P0_2, 187 QUNIFORM_TEXTURE_CONFIG_P0_3, 188 QUNIFORM_TEXTURE_CONFIG_P0_4, 189 QUNIFORM_TEXTURE_CONFIG_P0_5, 190 QUNIFORM_TEXTURE_CONFIG_P0_6, 191 QUNIFORM_TEXTURE_CONFIG_P0_7, 192 QUNIFORM_TEXTURE_CONFIG_P0_8, 193 QUNIFORM_TEXTURE_CONFIG_P0_9, 194 QUNIFORM_TEXTURE_CONFIG_P0_10, 195 QUNIFORM_TEXTURE_CONFIG_P0_11, 196 QUNIFORM_TEXTURE_CONFIG_P0_12, 197 QUNIFORM_TEXTURE_CONFIG_P0_13, 198 QUNIFORM_TEXTURE_CONFIG_P0_14, 199 QUNIFORM_TEXTURE_CONFIG_P0_15, 200 QUNIFORM_TEXTURE_CONFIG_P0_16, 201 QUNIFORM_TEXTURE_CONFIG_P0_17, 202 QUNIFORM_TEXTURE_CONFIG_P0_18, 203 QUNIFORM_TEXTURE_CONFIG_P0_19, 204 QUNIFORM_TEXTURE_CONFIG_P0_20, 205 QUNIFORM_TEXTURE_CONFIG_P0_21, 206 QUNIFORM_TEXTURE_CONFIG_P0_22, 207 QUNIFORM_TEXTURE_CONFIG_P0_23, 208 QUNIFORM_TEXTURE_CONFIG_P0_24, 209 QUNIFORM_TEXTURE_CONFIG_P0_25, 210 QUNIFORM_TEXTURE_CONFIG_P0_26, 211 QUNIFORM_TEXTURE_CONFIG_P0_27, 212 QUNIFORM_TEXTURE_CONFIG_P0_28, 213 QUNIFORM_TEXTURE_CONFIG_P0_29, 214 QUNIFORM_TEXTURE_CONFIG_P0_30, 215 QUNIFORM_TEXTURE_CONFIG_P0_31, 216 QUNIFORM_TEXTURE_CONFIG_P0_32, 217 218 /** 219 * A reference to a V3D 3.x texture config parameter 1 uniform. 220 * 221 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 222 * has the pointer to the indirect texture state. Our data[] field 223 * will have a packed p1 value, but the address field will be just 224 * which texture unit's texture should be referenced. 225 */ 226 QUNIFORM_TEXTURE_CONFIG_P1, 227 228 /* A V3D 4.x texture config parameter. The high 8 bits will be 229 * which texture or sampler is being sampled, and the driver must 230 * replace the address field with the appropriate address. 231 */ 232 QUNIFORM_TMU_CONFIG_P0, 233 QUNIFORM_TMU_CONFIG_P1, 234 235 QUNIFORM_IMAGE_TMU_CONFIG_P0, 236 237 QUNIFORM_TEXTURE_FIRST_LEVEL, 238 239 QUNIFORM_TEXTURE_WIDTH, 240 QUNIFORM_TEXTURE_HEIGHT, 241 QUNIFORM_TEXTURE_DEPTH, 242 QUNIFORM_TEXTURE_ARRAY_SIZE, 243 QUNIFORM_TEXTURE_LEVELS, 244 245 QUNIFORM_UBO_ADDR, 246 247 QUNIFORM_TEXRECT_SCALE_X, 248 QUNIFORM_TEXRECT_SCALE_Y, 249 250 /* Returns the base offset of the SSBO given by the data value. */ 251 QUNIFORM_SSBO_OFFSET, 252 253 /* Returns the size of the SSBO given by the data value. */ 254 QUNIFORM_GET_BUFFER_SIZE, 255 256 /* Sizes (in pixels) of a shader image given by the data value. */ 257 QUNIFORM_IMAGE_WIDTH, 258 QUNIFORM_IMAGE_HEIGHT, 259 QUNIFORM_IMAGE_DEPTH, 260 QUNIFORM_IMAGE_ARRAY_SIZE, 261 262 QUNIFORM_ALPHA_REF, 263 264 /* Number of workgroups passed to glDispatchCompute in the dimension 265 * selected by the data value. 266 */ 267 QUNIFORM_NUM_WORK_GROUPS, 268 269 /** 270 * Returns the the offset of the scratch buffer for register spilling. 271 */ 272 QUNIFORM_SPILL_OFFSET, 273 QUNIFORM_SPILL_SIZE_PER_THREAD, 274 275 /** 276 * Returns the offset of the shared memory for compute shaders. 277 * 278 * This will be accessed using TMU general memory operations, so the 279 * L2T cache will effectively be the shared memory area. 280 */ 281 QUNIFORM_SHARED_OFFSET, 282}; 283 284static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) 285{ 286 assert(value < (1 << 24)); 287 return unit << 24 | value; 288} 289 290static inline uint32_t v3d_unit_data_get_unit(uint32_t data) 291{ 292 return data >> 24; 293} 294 295static inline uint32_t v3d_unit_data_get_offset(uint32_t data) 296{ 297 return data & 0xffffff; 298} 299 300struct v3d_varying_slot { 301 uint8_t slot_and_component; 302}; 303 304static inline struct v3d_varying_slot 305v3d_slot_from_slot_and_component(uint8_t slot, uint8_t component) 306{ 307 assert(slot < 255 / 4); 308 return (struct v3d_varying_slot){ (slot << 2) + component }; 309} 310 311static inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot) 312{ 313 return slot.slot_and_component >> 2; 314} 315 316static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot) 317{ 318 return slot.slot_and_component & 3; 319} 320 321struct v3d_key { 322 void *shader_state; 323 struct { 324 uint8_t swizzle[4]; 325 uint8_t return_size; 326 uint8_t return_channels; 327 bool clamp_s:1; 328 bool clamp_t:1; 329 bool clamp_r:1; 330 } tex[V3D_MAX_TEXTURE_SAMPLERS]; 331 uint8_t ucp_enables; 332}; 333 334struct v3d_fs_key { 335 struct v3d_key base; 336 bool depth_enabled; 337 bool is_points; 338 bool is_lines; 339 bool alpha_test; 340 bool point_coord_upper_left; 341 bool light_twoside; 342 bool msaa; 343 bool sample_coverage; 344 bool sample_alpha_to_coverage; 345 bool sample_alpha_to_one; 346 bool clamp_color; 347 bool shade_model_flat; 348 /* Mask of which color render targets are present. */ 349 uint8_t cbufs; 350 uint8_t swap_color_rb; 351 /* Mask of which render targets need to be written as 32-bit floats */ 352 uint8_t f32_color_rb; 353 /* Masks of which render targets need to be written as ints/uints. 354 * Used by gallium to work around lost information in TGSI. 355 */ 356 uint8_t int_color_rb; 357 uint8_t uint_color_rb; 358 uint8_t alpha_test_func; 359 uint8_t logicop_func; 360 uint32_t point_sprite_mask; 361 362 struct pipe_rt_blend_state blend; 363}; 364 365struct v3d_vs_key { 366 struct v3d_key base; 367 368 struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS]; 369 uint8_t num_fs_inputs; 370 371 bool is_coord; 372 bool per_vertex_point_size; 373 bool clamp_color; 374}; 375 376/** A basic block of VIR intructions. */ 377struct qblock { 378 struct list_head link; 379 380 struct list_head instructions; 381 382 struct set *predecessors; 383 struct qblock *successors[2]; 384 385 int index; 386 387 /* Instruction IPs for the first and last instruction of the block. 388 * Set by qpu_schedule.c. 389 */ 390 uint32_t start_qpu_ip; 391 uint32_t end_qpu_ip; 392 393 /* Instruction IP for the branch instruction of the block. Set by 394 * qpu_schedule.c. 395 */ 396 uint32_t branch_qpu_ip; 397 398 /** Offset within the uniform stream at the start of the block. */ 399 uint32_t start_uniform; 400 /** Offset within the uniform stream of the branch instruction */ 401 uint32_t branch_uniform; 402 403 /** @{ used by v3d_vir_live_variables.c */ 404 BITSET_WORD *def; 405 BITSET_WORD *defin; 406 BITSET_WORD *defout; 407 BITSET_WORD *use; 408 BITSET_WORD *live_in; 409 BITSET_WORD *live_out; 410 int start_ip, end_ip; 411 /** @} */ 412}; 413 414/** Which util/list.h add mode we should use when inserting an instruction. */ 415enum vir_cursor_mode { 416 vir_cursor_add, 417 vir_cursor_addtail, 418}; 419 420/** 421 * Tracking structure for where new instructions should be inserted. Create 422 * with one of the vir_after_inst()-style helper functions. 423 * 424 * This does not protect against removal of the block or instruction, so we 425 * have an assert in instruction removal to try to catch it. 426 */ 427struct vir_cursor { 428 enum vir_cursor_mode mode; 429 struct list_head *link; 430}; 431 432static inline struct vir_cursor 433vir_before_inst(struct qinst *inst) 434{ 435 return (struct vir_cursor){ vir_cursor_addtail, &inst->link }; 436} 437 438static inline struct vir_cursor 439vir_after_inst(struct qinst *inst) 440{ 441 return (struct vir_cursor){ vir_cursor_add, &inst->link }; 442} 443 444static inline struct vir_cursor 445vir_before_block(struct qblock *block) 446{ 447 return (struct vir_cursor){ vir_cursor_add, &block->instructions }; 448} 449 450static inline struct vir_cursor 451vir_after_block(struct qblock *block) 452{ 453 return (struct vir_cursor){ vir_cursor_addtail, &block->instructions }; 454} 455 456/** 457 * Compiler state saved across compiler invocations, for any expensive global 458 * setup. 459 */ 460struct v3d_compiler { 461 const struct v3d_device_info *devinfo; 462 struct ra_regs *regs; 463 unsigned int reg_class_any[3]; 464 unsigned int reg_class_r5[3]; 465 unsigned int reg_class_phys[3]; 466 unsigned int reg_class_phys_or_acc[3]; 467}; 468 469struct v3d_compile { 470 const struct v3d_device_info *devinfo; 471 nir_shader *s; 472 nir_function_impl *impl; 473 struct exec_list *cf_node_list; 474 const struct v3d_compiler *compiler; 475 476 void (*debug_output)(const char *msg, 477 void *debug_output_data); 478 void *debug_output_data; 479 480 /** 481 * Mapping from nir_register * or nir_ssa_def * to array of struct 482 * qreg for the values. 483 */ 484 struct hash_table *def_ht; 485 486 /* For each temp, the instruction generating its value. */ 487 struct qinst **defs; 488 uint32_t defs_array_size; 489 490 /** 491 * Inputs to the shader, arranged by TGSI declaration order. 492 * 493 * Not all fragment shader QFILE_VARY reads are present in this array. 494 */ 495 struct qreg *inputs; 496 struct qreg *outputs; 497 bool msaa_per_sample_output; 498 struct qreg color_reads[V3D_MAX_SAMPLES]; 499 struct qreg sample_colors[V3D_MAX_SAMPLES]; 500 uint32_t inputs_array_size; 501 uint32_t outputs_array_size; 502 uint32_t uniforms_array_size; 503 504 /* Booleans for whether the corresponding QFILE_VARY[i] is 505 * flat-shaded. This includes gl_FragColor flat-shading, which is 506 * customized based on the shademodel_flat shader key. 507 */ 508 uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 509 510 uint32_t noperspective_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 511 512 uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 513 514 bool uses_center_w; 515 bool writes_z; 516 517 /* State for whether we're executing on each channel currently. 0 if 518 * yes, otherwise a block number + 1 that the channel jumped to. 519 */ 520 struct qreg execute; 521 bool in_control_flow; 522 523 struct qreg line_x, point_x, point_y; 524 525 /** 526 * Instance ID, which comes in before the vertex attribute payload if 527 * the shader record requests it. 528 */ 529 struct qreg iid; 530 531 /** 532 * Vertex ID, which comes in before the vertex attribute payload 533 * (after Instance ID) if the shader record requests it. 534 */ 535 struct qreg vid; 536 537 /* Fragment shader payload regs. */ 538 struct qreg payload_w, payload_w_centroid, payload_z; 539 540 struct qreg cs_payload[2]; 541 struct qreg cs_shared_offset; 542 int local_invocation_index_bits; 543 544 uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; 545 uint32_t vpm_output_size; 546 547 /* Size in bytes of registers that have been spilled. This is how much 548 * space needs to be available in the spill BO per thread per QPU. 549 */ 550 uint32_t spill_size; 551 /* Shader-db stats */ 552 uint32_t spills, fills, loops; 553 /** 554 * Register spilling's per-thread base address, shared between each 555 * spill/fill's addressing calculations. 556 */ 557 struct qreg spill_base; 558 /* Bit vector of which temps may be spilled */ 559 BITSET_WORD *spillable; 560 561 /** 562 * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. 563 * 564 * This includes those that aren't part of the VPM varyings, like 565 * point/line coordinates. 566 */ 567 struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; 568 569 /** 570 * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* 571 * of the output is. Used to emit from the VS in the order that the 572 * FS needs. 573 */ 574 struct v3d_varying_slot *output_slots; 575 576 struct pipe_shader_state *shader_state; 577 struct v3d_key *key; 578 struct v3d_fs_key *fs_key; 579 struct v3d_vs_key *vs_key; 580 581 /* Live ranges of temps. */ 582 int *temp_start, *temp_end; 583 bool live_intervals_valid; 584 585 uint32_t *uniform_data; 586 enum quniform_contents *uniform_contents; 587 uint32_t uniform_array_size; 588 uint32_t num_uniforms; 589 uint32_t output_position_index; 590 nir_variable *output_color_var[4]; 591 uint32_t output_sample_mask_index; 592 593 struct qreg undef; 594 uint32_t num_temps; 595 596 struct vir_cursor cursor; 597 struct list_head blocks; 598 int next_block_index; 599 struct qblock *cur_block; 600 struct qblock *loop_cont_block; 601 struct qblock *loop_break_block; 602 603 uint64_t *qpu_insts; 604 uint32_t qpu_inst_count; 605 uint32_t qpu_inst_size; 606 607 /* For the FS, the number of varying inputs not counting the 608 * point/line varyings payload 609 */ 610 uint32_t num_inputs; 611 612 uint32_t program_id; 613 uint32_t variant_id; 614 615 /* Set to compile program in in 1x, 2x, or 4x threaded mode, where 616 * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of 617 * limiting ourselves to the part of the physical reg space. 618 * 619 * On V3D 3.x, 2x or 4x divide the physical reg space by 2x or 4x. On 620 * V3D 4.x, all shaders are 2x threaded, and 4x only divides the 621 * physical reg space in half. 622 */ 623 uint8_t threads; 624 struct qinst *last_thrsw; 625 bool last_thrsw_at_top_level; 626 627 bool failed; 628}; 629 630struct v3d_uniform_list { 631 enum quniform_contents *contents; 632 uint32_t *data; 633 uint32_t count; 634}; 635 636struct v3d_prog_data { 637 struct v3d_uniform_list uniforms; 638 639 uint32_t spill_size; 640 641 uint8_t threads; 642 643 /* For threads > 1, whether the program should be dispatched in the 644 * after-final-THRSW state. 645 */ 646 bool single_seg; 647}; 648 649struct v3d_vs_prog_data { 650 struct v3d_prog_data base; 651 652 bool uses_iid, uses_vid; 653 654 /* Number of components read from each vertex attribute. */ 655 uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; 656 657 /* Total number of components read, for the shader state record. */ 658 uint32_t vpm_input_size; 659 660 /* Total number of components written, for the shader state record. */ 661 uint32_t vpm_output_size; 662 663 /* Set if there should be separate VPM segments for input and output. 664 * If unset, vpm_input_size will be 0. 665 */ 666 bool separate_segments; 667 668 /* Value to be programmed in VCM_CACHE_SIZE. */ 669 uint8_t vcm_cache_size; 670}; 671 672struct v3d_fs_prog_data { 673 struct v3d_prog_data base; 674 675 struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; 676 677 /* Array of flat shade flags. 678 * 679 * Each entry is only 24 bits (high 8 bits 0), to match the hardware 680 * packet layout. 681 */ 682 uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 683 684 uint32_t noperspective_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 685 686 uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 687 688 uint8_t num_inputs; 689 bool writes_z; 690 bool disable_ez; 691 bool uses_center_w; 692}; 693 694struct v3d_compute_prog_data { 695 struct v3d_prog_data base; 696 /* Size in bytes of the workgroup's shared space. */ 697 uint32_t shared_size; 698}; 699 700static inline bool 701vir_has_uniform(struct qinst *inst) 702{ 703 return inst->uniform != ~0; 704} 705 706/* Special nir_load_input intrinsic index for loading the current TLB 707 * destination color. 708 */ 709#define V3D_NIR_TLB_COLOR_READ_INPUT 2000000000 710 711#define V3D_NIR_MS_MASK_OUTPUT 2000000000 712 713extern const nir_shader_compiler_options v3d_nir_options; 714 715const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo); 716void v3d_compiler_free(const struct v3d_compiler *compiler); 717void v3d_optimize_nir(struct nir_shader *s); 718 719uint64_t *v3d_compile(const struct v3d_compiler *compiler, 720 struct v3d_key *key, 721 struct v3d_prog_data **prog_data, 722 nir_shader *s, 723 void (*debug_output)(const char *msg, 724 void *debug_output_data), 725 void *debug_output_data, 726 int program_id, int variant_id, 727 uint32_t *final_assembly_size); 728 729void v3d_nir_to_vir(struct v3d_compile *c); 730 731void vir_compile_destroy(struct v3d_compile *c); 732const char *vir_get_stage_name(struct v3d_compile *c); 733struct qblock *vir_new_block(struct v3d_compile *c); 734void vir_set_emit_block(struct v3d_compile *c, struct qblock *block); 735void vir_link_blocks(struct qblock *predecessor, struct qblock *successor); 736struct qblock *vir_entry_block(struct v3d_compile *c); 737struct qblock *vir_exit_block(struct v3d_compile *c); 738struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, 739 struct qreg src0, struct qreg src1); 740struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, 741 struct qreg src0, struct qreg src1); 742struct qinst *vir_branch_inst(struct v3d_compile *c, 743 enum v3d_qpu_branch_cond cond); 744void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst); 745uint32_t vir_get_uniform_index(struct v3d_compile *c, 746 enum quniform_contents contents, 747 uint32_t data); 748struct qreg vir_uniform(struct v3d_compile *c, 749 enum quniform_contents contents, 750 uint32_t data); 751void vir_schedule_instructions(struct v3d_compile *c); 752void v3d_setup_spill_base(struct v3d_compile *c); 753struct v3d_qpu_instr v3d_qpu_nop(void); 754 755struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); 756struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); 757void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); 758void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf); 759void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf); 760void vir_set_unpack(struct qinst *inst, int src, 761 enum v3d_qpu_input_unpack unpack); 762 763struct qreg vir_get_temp(struct v3d_compile *c); 764void vir_emit_last_thrsw(struct v3d_compile *c); 765void vir_calculate_live_intervals(struct v3d_compile *c); 766int vir_get_nsrc(struct qinst *inst); 767bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst); 768bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op); 769bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op); 770bool vir_is_raw_mov(struct qinst *inst); 771bool vir_is_tex(struct qinst *inst); 772bool vir_is_add(struct qinst *inst); 773bool vir_is_mul(struct qinst *inst); 774bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); 775bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); 776struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); 777uint8_t vir_channels_written(struct qinst *inst); 778struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i); 779void ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, 780 struct qreg result); 781void vir_emit_thrsw(struct v3d_compile *c); 782 783void vir_dump(struct v3d_compile *c); 784void vir_dump_inst(struct v3d_compile *c, struct qinst *inst); 785void vir_dump_uniform(enum quniform_contents contents, uint32_t data); 786 787void vir_validate(struct v3d_compile *c); 788 789void vir_optimize(struct v3d_compile *c); 790bool vir_opt_algebraic(struct v3d_compile *c); 791bool vir_opt_constant_folding(struct v3d_compile *c); 792bool vir_opt_copy_propagate(struct v3d_compile *c); 793bool vir_opt_dead_code(struct v3d_compile *c); 794bool vir_opt_peephole_sf(struct v3d_compile *c); 795bool vir_opt_redundant_flags(struct v3d_compile *c); 796bool vir_opt_small_immediates(struct v3d_compile *c); 797bool vir_opt_vpm(struct v3d_compile *c); 798void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c); 799void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c); 800void v3d_nir_lower_scratch(nir_shader *s); 801void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c); 802void v3d_nir_lower_image_load_store(nir_shader *s); 803void vir_lower_uniforms(struct v3d_compile *c); 804 805void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components); 806void v3d33_vir_vpm_write_setup(struct v3d_compile *c); 807void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); 808void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); 809void v3d40_vir_emit_image_load_store(struct v3d_compile *c, 810 nir_intrinsic_instr *instr); 811 812void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); 813uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); 814void qpu_validate(struct v3d_compile *c); 815struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled); 816bool vir_init_reg_sets(struct v3d_compiler *compiler); 817 818bool v3d_gl_format_is_return_32(GLenum format); 819 820static inline bool 821quniform_contents_is_texture_p0(enum quniform_contents contents) 822{ 823 return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 && 824 contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 + 825 V3D_MAX_TEXTURE_SAMPLERS)); 826} 827 828static inline bool 829vir_in_nonuniform_control_flow(struct v3d_compile *c) 830{ 831 return c->execute.file != QFILE_NULL; 832} 833 834static inline struct qreg 835vir_uniform_ui(struct v3d_compile *c, uint32_t ui) 836{ 837 return vir_uniform(c, QUNIFORM_CONSTANT, ui); 838} 839 840static inline struct qreg 841vir_uniform_f(struct v3d_compile *c, float f) 842{ 843 return vir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 844} 845 846#define VIR_ALU0(name, vir_inst, op) \ 847static inline struct qreg \ 848vir_##name(struct v3d_compile *c) \ 849{ \ 850 return vir_emit_def(c, vir_inst(op, c->undef, \ 851 c->undef, c->undef)); \ 852} \ 853static inline struct qinst * \ 854vir_##name##_dest(struct v3d_compile *c, struct qreg dest) \ 855{ \ 856 return vir_emit_nondef(c, vir_inst(op, dest, \ 857 c->undef, c->undef)); \ 858} 859 860#define VIR_ALU1(name, vir_inst, op) \ 861static inline struct qreg \ 862vir_##name(struct v3d_compile *c, struct qreg a) \ 863{ \ 864 return vir_emit_def(c, vir_inst(op, c->undef, \ 865 a, c->undef)); \ 866} \ 867static inline struct qinst * \ 868vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 869 struct qreg a) \ 870{ \ 871 return vir_emit_nondef(c, vir_inst(op, dest, a, \ 872 c->undef)); \ 873} 874 875#define VIR_ALU2(name, vir_inst, op) \ 876static inline struct qreg \ 877vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ 878{ \ 879 return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \ 880} \ 881static inline struct qinst * \ 882vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 883 struct qreg a, struct qreg b) \ 884{ \ 885 return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \ 886} 887 888#define VIR_NODST_0(name, vir_inst, op) \ 889static inline struct qinst * \ 890vir_##name(struct v3d_compile *c) \ 891{ \ 892 return vir_emit_nondef(c, vir_inst(op, c->undef, \ 893 c->undef, c->undef)); \ 894} 895 896#define VIR_NODST_1(name, vir_inst, op) \ 897static inline struct qinst * \ 898vir_##name(struct v3d_compile *c, struct qreg a) \ 899{ \ 900 return vir_emit_nondef(c, vir_inst(op, c->undef, \ 901 a, c->undef)); \ 902} 903 904#define VIR_NODST_2(name, vir_inst, op) \ 905static inline struct qinst * \ 906vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ 907{ \ 908 return vir_emit_nondef(c, vir_inst(op, c->undef, \ 909 a, b)); \ 910} 911 912#define VIR_SFU(name) \ 913static inline struct qreg \ 914vir_##name(struct v3d_compile *c, struct qreg a) \ 915{ \ 916 if (c->devinfo->ver >= 41) { \ 917 return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ 918 c->undef, \ 919 a, c->undef)); \ 920 } else { \ 921 vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ 922 return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ 923 } \ 924} \ 925static inline struct qinst * \ 926vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 927 struct qreg a) \ 928{ \ 929 if (c->devinfo->ver >= 41) { \ 930 return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ 931 dest, \ 932 a, c->undef)); \ 933 } else { \ 934 vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ 935 return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ 936 } \ 937} 938 939#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) 940#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) 941#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) 942#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name) 943#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name) 944#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name) 945#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name) 946#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name) 947#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name) 948#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name) 949#define VIR_A_NODST_0(name) VIR_NODST_0(name, vir_add_inst, V3D_QPU_A_##name) 950 951VIR_A_ALU2(FADD) 952VIR_A_ALU2(VFPACK) 953VIR_A_ALU2(FSUB) 954VIR_A_ALU2(FMIN) 955VIR_A_ALU2(FMAX) 956 957VIR_A_ALU2(ADD) 958VIR_A_ALU2(SUB) 959VIR_A_ALU2(SHL) 960VIR_A_ALU2(SHR) 961VIR_A_ALU2(ASR) 962VIR_A_ALU2(ROR) 963VIR_A_ALU2(MIN) 964VIR_A_ALU2(MAX) 965VIR_A_ALU2(UMIN) 966VIR_A_ALU2(UMAX) 967VIR_A_ALU2(AND) 968VIR_A_ALU2(OR) 969VIR_A_ALU2(XOR) 970VIR_A_ALU2(VADD) 971VIR_A_ALU2(VSUB) 972VIR_A_NODST_2(STVPMV) 973VIR_A_ALU1(NOT) 974VIR_A_ALU1(NEG) 975VIR_A_ALU1(FLAPUSH) 976VIR_A_ALU1(FLBPUSH) 977VIR_A_ALU1(FLPOP) 978VIR_A_ALU1(SETMSF) 979VIR_A_ALU1(SETREVF) 980VIR_A_ALU0(TIDX) 981VIR_A_ALU0(EIDX) 982VIR_A_ALU1(LDVPMV_IN) 983VIR_A_ALU1(LDVPMV_OUT) 984VIR_A_ALU0(TMUWT) 985 986VIR_A_ALU0(FXCD) 987VIR_A_ALU0(XCD) 988VIR_A_ALU0(FYCD) 989VIR_A_ALU0(YCD) 990VIR_A_ALU0(MSF) 991VIR_A_ALU0(REVF) 992VIR_A_ALU0(BARRIERID) 993VIR_A_NODST_1(VPMSETUP) 994VIR_A_NODST_0(VPMWT) 995VIR_A_ALU2(FCMP) 996VIR_A_ALU2(VFMAX) 997 998VIR_A_ALU1(FROUND) 999VIR_A_ALU1(FTOIN) 1000VIR_A_ALU1(FTRUNC) 1001VIR_A_ALU1(FTOIZ) 1002VIR_A_ALU1(FFLOOR) 1003VIR_A_ALU1(FTOUZ) 1004VIR_A_ALU1(FCEIL) 1005VIR_A_ALU1(FTOC) 1006 1007VIR_A_ALU1(FDX) 1008VIR_A_ALU1(FDY) 1009 1010VIR_A_ALU1(ITOF) 1011VIR_A_ALU1(CLZ) 1012VIR_A_ALU1(UTOF) 1013 1014VIR_M_ALU2(UMUL24) 1015VIR_M_ALU2(FMUL) 1016VIR_M_ALU2(SMUL24) 1017VIR_M_NODST_2(MULTOP) 1018 1019VIR_M_ALU1(MOV) 1020VIR_M_ALU1(FMOV) 1021 1022VIR_SFU(RECIP) 1023VIR_SFU(RSQRT) 1024VIR_SFU(EXP) 1025VIR_SFU(LOG) 1026VIR_SFU(SIN) 1027VIR_SFU(RSQRT2) 1028 1029static inline struct qinst * 1030vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, 1031 struct qreg dest, struct qreg src) 1032{ 1033 struct qinst *mov = vir_MOV_dest(c, dest, src); 1034 vir_set_cond(mov, cond); 1035 return mov; 1036} 1037 1038static inline struct qreg 1039vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond, 1040 struct qreg src0, struct qreg src1) 1041{ 1042 struct qreg t = vir_get_temp(c); 1043 vir_MOV_dest(c, t, src1); 1044 vir_MOV_cond(c, cond, t, src0); 1045 return t; 1046} 1047 1048static inline struct qinst * 1049vir_NOP(struct v3d_compile *c) 1050{ 1051 return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP, 1052 c->undef, c->undef, c->undef)); 1053} 1054 1055static inline struct qreg 1056vir_LDTMU(struct v3d_compile *c) 1057{ 1058 if (c->devinfo->ver >= 41) { 1059 struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef, 1060 c->undef, c->undef); 1061 ldtmu->qpu.sig.ldtmu = true; 1062 1063 return vir_emit_def(c, ldtmu); 1064 } else { 1065 vir_NOP(c)->qpu.sig.ldtmu = true; 1066 return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); 1067 } 1068} 1069 1070static inline struct qreg 1071vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1) 1072{ 1073 vir_MULTOP(c, src0, src1); 1074 return vir_UMUL24(c, src0, src1); 1075} 1076 1077/* 1078static inline struct qreg 1079vir_LOAD_IMM(struct v3d_compile *c, uint32_t val) 1080{ 1081 return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef, 1082 vir_reg(QFILE_LOAD_IMM, val), c->undef)); 1083} 1084 1085static inline struct qreg 1086vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val) 1087{ 1088 return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef, 1089 vir_reg(QFILE_LOAD_IMM, val), 1090 c->undef)); 1091} 1092static inline struct qreg 1093vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val) 1094{ 1095 return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef, 1096 vir_reg(QFILE_LOAD_IMM, val), 1097 c->undef)); 1098} 1099*/ 1100 1101static inline struct qinst * 1102vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) 1103{ 1104 /* The actual uniform_data value will be set at scheduling time */ 1105 return vir_emit_nondef(c, vir_branch_inst(c, cond)); 1106} 1107 1108#define vir_for_each_block(block, c) \ 1109 list_for_each_entry(struct qblock, block, &c->blocks, link) 1110 1111#define vir_for_each_block_rev(block, c) \ 1112 list_for_each_entry_rev(struct qblock, block, &c->blocks, link) 1113 1114/* Loop over the non-NULL members of the successors array. */ 1115#define vir_for_each_successor(succ, block) \ 1116 for (struct qblock *succ = block->successors[0]; \ 1117 succ != NULL; \ 1118 succ = (succ == block->successors[1] ? NULL : \ 1119 block->successors[1])) 1120 1121#define vir_for_each_inst(inst, block) \ 1122 list_for_each_entry(struct qinst, inst, &block->instructions, link) 1123 1124#define vir_for_each_inst_rev(inst, block) \ 1125 list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) 1126 1127#define vir_for_each_inst_safe(inst, block) \ 1128 list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) 1129 1130#define vir_for_each_inst_inorder(inst, c) \ 1131 vir_for_each_block(_block, c) \ 1132 vir_for_each_inst(inst, _block) 1133 1134#define vir_for_each_inst_inorder_safe(inst, c) \ 1135 vir_for_each_block(_block, c) \ 1136 vir_for_each_inst_safe(inst, _block) 1137 1138#endif /* V3D_COMPILER_H */ 1139