v3d_compiler.h revision 01e04c3f
1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef V3D_COMPILER_H 25#define V3D_COMPILER_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "common/v3d_debug.h" 36#include "common/v3d_device_info.h" 37#include "compiler/nir/nir.h" 38#include "util/list.h" 39#include "util/u_math.h" 40 41#include "qpu/qpu_instr.h" 42#include "pipe/p_state.h" 43 44#define V3D_MAX_TEXTURE_SAMPLERS 32 45#define V3D_MAX_SAMPLES 4 46#define V3D_MAX_FS_INPUTS 64 47#define V3D_MAX_VS_INPUTS 64 48 49struct nir_builder; 50 51struct v3d_fs_inputs { 52 /** 53 * Array of the meanings of the VPM inputs this shader needs. 54 * 55 * It doesn't include those that aren't part of the VPM, like 56 * point/line coordinates. 57 */ 58 struct v3d_varying_slot *input_slots; 59 uint32_t num_inputs; 60}; 61 62enum qfile { 63 /** An unused source or destination register. */ 64 QFILE_NULL, 65 66 /** A physical register, such as the W coordinate payload. */ 67 QFILE_REG, 68 /** One of the regsiters for fixed function interactions. */ 69 QFILE_MAGIC, 70 71 /** 72 * A virtual register, that will be allocated to actual accumulator 73 * or physical registers later. 74 */ 75 QFILE_TEMP, 76 QFILE_UNIF, 77 QFILE_TLB, 78 QFILE_TLBU, 79 80 /** 81 * VPM reads use this with an index value to say what part of the VPM 82 * is being read. 83 */ 84 QFILE_VPM, 85 86 /** 87 * Stores an immediate value in the index field that will be used 88 * directly by qpu_load_imm(). 89 */ 90 QFILE_LOAD_IMM, 91 92 /** 93 * Stores an immediate value in the index field that can be turned 94 * into a small immediate field by qpu_encode_small_immediate(). 95 */ 96 QFILE_SMALL_IMM, 97}; 98 99/** 100 * A reference to a QPU register or a virtual temp register. 101 */ 102struct qreg { 103 enum qfile file; 104 uint32_t index; 105}; 106 107static inline struct qreg vir_reg(enum qfile file, uint32_t index) 108{ 109 return (struct qreg){file, index}; 110} 111 112/** 113 * A reference to an actual register at the QPU level, for register 114 * allocation. 115 */ 116struct qpu_reg { 117 bool magic; 118 bool smimm; 119 int index; 120}; 121 122struct qinst { 123 /** Entry in qblock->instructions */ 124 struct list_head link; 125 126 /** 127 * The instruction being wrapped. Its condition codes, pack flags, 128 * signals, etc. will all be used, with just the register references 129 * being replaced by the contents of qinst->dst and qinst->src[]. 130 */ 131 struct v3d_qpu_instr qpu; 132 133 /* Pre-register-allocation references to src/dst registers */ 134 struct qreg dst; 135 struct qreg src[3]; 136 bool cond_is_exec_mask; 137 bool has_implicit_uniform; 138 bool is_last_thrsw; 139 140 /* After vir_to_qpu.c: If instr reads a uniform, which uniform from 141 * the uncompiled stream it is. 142 */ 143 int uniform; 144}; 145 146enum quniform_contents { 147 /** 148 * Indicates that a constant 32-bit value is copied from the program's 149 * uniform contents. 150 */ 151 QUNIFORM_CONSTANT, 152 /** 153 * Indicates that the program's uniform contents are used as an index 154 * into the GL uniform storage. 155 */ 156 QUNIFORM_UNIFORM, 157 158 /** @{ 159 * Scaling factors from clip coordinates to relative to the viewport 160 * center. 161 * 162 * This is used by the coordinate and vertex shaders to produce the 163 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 164 * point offsets from the viewport ccenter. 165 */ 166 QUNIFORM_VIEWPORT_X_SCALE, 167 QUNIFORM_VIEWPORT_Y_SCALE, 168 /** @} */ 169 170 QUNIFORM_VIEWPORT_Z_OFFSET, 171 QUNIFORM_VIEWPORT_Z_SCALE, 172 173 QUNIFORM_USER_CLIP_PLANE, 174 175 /** 176 * A reference to a V3D 3.x texture config parameter 0 uniform. 177 * 178 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 179 * defines texture type, miplevels, and such. It will be found as a 180 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 181 */ 182 QUNIFORM_TEXTURE_CONFIG_P0_0, 183 QUNIFORM_TEXTURE_CONFIG_P0_1, 184 QUNIFORM_TEXTURE_CONFIG_P0_2, 185 QUNIFORM_TEXTURE_CONFIG_P0_3, 186 QUNIFORM_TEXTURE_CONFIG_P0_4, 187 QUNIFORM_TEXTURE_CONFIG_P0_5, 188 QUNIFORM_TEXTURE_CONFIG_P0_6, 189 QUNIFORM_TEXTURE_CONFIG_P0_7, 190 QUNIFORM_TEXTURE_CONFIG_P0_8, 191 QUNIFORM_TEXTURE_CONFIG_P0_9, 192 QUNIFORM_TEXTURE_CONFIG_P0_10, 193 QUNIFORM_TEXTURE_CONFIG_P0_11, 194 QUNIFORM_TEXTURE_CONFIG_P0_12, 195 QUNIFORM_TEXTURE_CONFIG_P0_13, 196 QUNIFORM_TEXTURE_CONFIG_P0_14, 197 QUNIFORM_TEXTURE_CONFIG_P0_15, 198 QUNIFORM_TEXTURE_CONFIG_P0_16, 199 QUNIFORM_TEXTURE_CONFIG_P0_17, 200 QUNIFORM_TEXTURE_CONFIG_P0_18, 201 QUNIFORM_TEXTURE_CONFIG_P0_19, 202 QUNIFORM_TEXTURE_CONFIG_P0_20, 203 QUNIFORM_TEXTURE_CONFIG_P0_21, 204 QUNIFORM_TEXTURE_CONFIG_P0_22, 205 QUNIFORM_TEXTURE_CONFIG_P0_23, 206 QUNIFORM_TEXTURE_CONFIG_P0_24, 207 QUNIFORM_TEXTURE_CONFIG_P0_25, 208 QUNIFORM_TEXTURE_CONFIG_P0_26, 209 QUNIFORM_TEXTURE_CONFIG_P0_27, 210 QUNIFORM_TEXTURE_CONFIG_P0_28, 211 QUNIFORM_TEXTURE_CONFIG_P0_29, 212 QUNIFORM_TEXTURE_CONFIG_P0_30, 213 QUNIFORM_TEXTURE_CONFIG_P0_31, 214 QUNIFORM_TEXTURE_CONFIG_P0_32, 215 216 /** 217 * A reference to a V3D 3.x texture config parameter 1 uniform. 218 * 219 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 220 * has the pointer to the indirect texture state. Our data[] field 221 * will have a packed p1 value, but the address field will be just 222 * which texture unit's texture should be referenced. 223 */ 224 QUNIFORM_TEXTURE_CONFIG_P1, 225 226 /* A a V3D 4.x texture config parameter. The high 8 bits will be 227 * which texture or sampler is being sampled, and the driver must 228 * replace the address field with the appropriate address. 229 */ 230 QUNIFORM_TMU_CONFIG_P0, 231 QUNIFORM_TMU_CONFIG_P1, 232 233 QUNIFORM_TEXTURE_FIRST_LEVEL, 234 235 QUNIFORM_TEXTURE_WIDTH, 236 QUNIFORM_TEXTURE_HEIGHT, 237 QUNIFORM_TEXTURE_DEPTH, 238 QUNIFORM_TEXTURE_ARRAY_SIZE, 239 QUNIFORM_TEXTURE_LEVELS, 240 241 QUNIFORM_UBO_ADDR, 242 243 QUNIFORM_TEXRECT_SCALE_X, 244 QUNIFORM_TEXRECT_SCALE_Y, 245 246 QUNIFORM_TEXTURE_BORDER_COLOR, 247 248 QUNIFORM_ALPHA_REF, 249 QUNIFORM_SAMPLE_MASK, 250 251 /** 252 * Returns the the offset of the scratch buffer for register spilling. 253 */ 254 QUNIFORM_SPILL_OFFSET, 255 QUNIFORM_SPILL_SIZE_PER_THREAD, 256}; 257 258struct v3d_varying_slot { 259 uint8_t slot_and_component; 260}; 261 262static inline struct v3d_varying_slot 263v3d_slot_from_slot_and_component(uint8_t slot, uint8_t component) 264{ 265 assert(slot < 255 / 4); 266 return (struct v3d_varying_slot){ (slot << 2) + component }; 267} 268 269static inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot) 270{ 271 return slot.slot_and_component >> 2; 272} 273 274static inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot) 275{ 276 return slot.slot_and_component & 3; 277} 278 279struct v3d_ubo_range { 280 /** 281 * offset in bytes from the start of the ubo where this range is 282 * uploaded. 283 * 284 * Only set once used is set. 285 */ 286 uint32_t dst_offset; 287 288 /** 289 * offset in bytes from the start of the gallium uniforms where the 290 * data comes from. 291 */ 292 uint32_t src_offset; 293 294 /** size in bytes of this ubo range */ 295 uint32_t size; 296}; 297 298struct v3d_key { 299 void *shader_state; 300 struct { 301 uint8_t swizzle[4]; 302 uint8_t return_size; 303 uint8_t return_channels; 304 unsigned compare_mode:1; 305 unsigned compare_func:3; 306 bool clamp_s:1; 307 bool clamp_t:1; 308 bool clamp_r:1; 309 } tex[V3D_MAX_TEXTURE_SAMPLERS]; 310 uint8_t ucp_enables; 311}; 312 313struct v3d_fs_key { 314 struct v3d_key base; 315 bool depth_enabled; 316 bool is_points; 317 bool is_lines; 318 bool alpha_test; 319 bool point_coord_upper_left; 320 bool light_twoside; 321 bool msaa; 322 bool sample_coverage; 323 bool sample_alpha_to_coverage; 324 bool sample_alpha_to_one; 325 bool clamp_color; 326 bool shade_model_flat; 327 uint8_t nr_cbufs; 328 uint8_t swap_color_rb; 329 /* Mask of which render targets need to be written as 32-bit floats */ 330 uint8_t f32_color_rb; 331 /* Masks of which render targets need to be written as ints/uints. 332 * Used by gallium to work around lost information in TGSI. 333 */ 334 uint8_t int_color_rb; 335 uint8_t uint_color_rb; 336 uint8_t alpha_test_func; 337 uint8_t logicop_func; 338 uint32_t point_sprite_mask; 339 340 struct pipe_rt_blend_state blend; 341}; 342 343struct v3d_vs_key { 344 struct v3d_key base; 345 346 struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS]; 347 uint8_t num_fs_inputs; 348 349 bool is_coord; 350 bool per_vertex_point_size; 351 bool clamp_color; 352}; 353 354/** A basic block of VIR intructions. */ 355struct qblock { 356 struct list_head link; 357 358 struct list_head instructions; 359 360 struct set *predecessors; 361 struct qblock *successors[2]; 362 363 int index; 364 365 /* Instruction IPs for the first and last instruction of the block. 366 * Set by qpu_schedule.c. 367 */ 368 uint32_t start_qpu_ip; 369 uint32_t end_qpu_ip; 370 371 /* Instruction IP for the branch instruction of the block. Set by 372 * qpu_schedule.c. 373 */ 374 uint32_t branch_qpu_ip; 375 376 /** Offset within the uniform stream at the start of the block. */ 377 uint32_t start_uniform; 378 /** Offset within the uniform stream of the branch instruction */ 379 uint32_t branch_uniform; 380 381 /** @{ used by v3d_vir_live_variables.c */ 382 BITSET_WORD *def; 383 BITSET_WORD *use; 384 BITSET_WORD *live_in; 385 BITSET_WORD *live_out; 386 int start_ip, end_ip; 387 /** @} */ 388}; 389 390/** Which util/list.h add mode we should use when inserting an instruction. */ 391enum vir_cursor_mode { 392 vir_cursor_add, 393 vir_cursor_addtail, 394}; 395 396/** 397 * Tracking structure for where new instructions should be inserted. Create 398 * with one of the vir_after_inst()-style helper functions. 399 * 400 * This does not protect against removal of the block or instruction, so we 401 * have an assert in instruction removal to try to catch it. 402 */ 403struct vir_cursor { 404 enum vir_cursor_mode mode; 405 struct list_head *link; 406}; 407 408static inline struct vir_cursor 409vir_before_inst(struct qinst *inst) 410{ 411 return (struct vir_cursor){ vir_cursor_addtail, &inst->link }; 412} 413 414static inline struct vir_cursor 415vir_after_inst(struct qinst *inst) 416{ 417 return (struct vir_cursor){ vir_cursor_add, &inst->link }; 418} 419 420static inline struct vir_cursor 421vir_before_block(struct qblock *block) 422{ 423 return (struct vir_cursor){ vir_cursor_add, &block->instructions }; 424} 425 426static inline struct vir_cursor 427vir_after_block(struct qblock *block) 428{ 429 return (struct vir_cursor){ vir_cursor_addtail, &block->instructions }; 430} 431 432/** 433 * Compiler state saved across compiler invocations, for any expensive global 434 * setup. 435 */ 436struct v3d_compiler { 437 const struct v3d_device_info *devinfo; 438 struct ra_regs *regs; 439 unsigned int reg_class_phys[3]; 440 unsigned int reg_class_phys_or_acc[3]; 441}; 442 443struct v3d_compile { 444 const struct v3d_device_info *devinfo; 445 nir_shader *s; 446 nir_function_impl *impl; 447 struct exec_list *cf_node_list; 448 const struct v3d_compiler *compiler; 449 450 /** 451 * Mapping from nir_register * or nir_ssa_def * to array of struct 452 * qreg for the values. 453 */ 454 struct hash_table *def_ht; 455 456 /* For each temp, the instruction generating its value. */ 457 struct qinst **defs; 458 uint32_t defs_array_size; 459 460 /** 461 * Inputs to the shader, arranged by TGSI declaration order. 462 * 463 * Not all fragment shader QFILE_VARY reads are present in this array. 464 */ 465 struct qreg *inputs; 466 struct qreg *outputs; 467 bool msaa_per_sample_output; 468 struct qreg color_reads[V3D_MAX_SAMPLES]; 469 struct qreg sample_colors[V3D_MAX_SAMPLES]; 470 uint32_t inputs_array_size; 471 uint32_t outputs_array_size; 472 uint32_t uniforms_array_size; 473 474 /* Booleans for whether the corresponding QFILE_VARY[i] is 475 * flat-shaded. This includes gl_FragColor flat-shading, which is 476 * customized based on the shademodel_flat shader key. 477 */ 478 uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 479 480 uint32_t noperspective_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 481 482 uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 483 484 bool uses_center_w; 485 486 struct v3d_ubo_range *ubo_ranges; 487 bool *ubo_range_used; 488 uint32_t ubo_ranges_array_size; 489 /** Number of uniform areas tracked in ubo_ranges. */ 490 uint32_t num_ubo_ranges; 491 uint32_t next_ubo_dst_offset; 492 493 /* State for whether we're executing on each channel currently. 0 if 494 * yes, otherwise a block number + 1 that the channel jumped to. 495 */ 496 struct qreg execute; 497 498 struct qreg line_x, point_x, point_y; 499 500 /** 501 * Instance ID, which comes in before the vertex attribute payload if 502 * the shader record requests it. 503 */ 504 struct qreg iid; 505 506 /** 507 * Vertex ID, which comes in before the vertex attribute payload 508 * (after Instance ID) if the shader record requests it. 509 */ 510 struct qreg vid; 511 512 /* Fragment shader payload regs. */ 513 struct qreg payload_w, payload_w_centroid, payload_z; 514 515 uint8_t vattr_sizes[V3D_MAX_VS_INPUTS]; 516 uint32_t num_vpm_writes; 517 518 /* Size in bytes of registers that have been spilled. This is how much 519 * space needs to be available in the spill BO per thread per QPU. 520 */ 521 uint32_t spill_size; 522 /* Shader-db stats for register spilling. */ 523 uint32_t spills, fills; 524 /** 525 * Register spilling's per-thread base address, shared between each 526 * spill/fill's addressing calculations. 527 */ 528 struct qreg spill_base; 529 /* Bit vector of which temps may be spilled */ 530 BITSET_WORD *spillable; 531 532 /** 533 * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. 534 * 535 * This includes those that aren't part of the VPM varyings, like 536 * point/line coordinates. 537 */ 538 struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; 539 540 /** 541 * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* 542 * of the output is. Used to emit from the VS in the order that the 543 * FS needs. 544 */ 545 struct v3d_varying_slot *output_slots; 546 547 struct pipe_shader_state *shader_state; 548 struct v3d_key *key; 549 struct v3d_fs_key *fs_key; 550 struct v3d_vs_key *vs_key; 551 552 /* Live ranges of temps. */ 553 int *temp_start, *temp_end; 554 bool live_intervals_valid; 555 556 uint32_t *uniform_data; 557 enum quniform_contents *uniform_contents; 558 uint32_t uniform_array_size; 559 uint32_t num_uniforms; 560 uint32_t num_outputs; 561 uint32_t output_position_index; 562 nir_variable *output_color_var[4]; 563 uint32_t output_point_size_index; 564 uint32_t output_sample_mask_index; 565 566 struct qreg undef; 567 uint32_t num_temps; 568 569 struct vir_cursor cursor; 570 struct list_head blocks; 571 int next_block_index; 572 struct qblock *cur_block; 573 struct qblock *loop_cont_block; 574 struct qblock *loop_break_block; 575 576 uint64_t *qpu_insts; 577 uint32_t qpu_inst_count; 578 uint32_t qpu_inst_size; 579 580 /* For the FS, the number of varying inputs not counting the 581 * point/line varyings payload 582 */ 583 uint32_t num_inputs; 584 585 /** 586 * Number of inputs from num_inputs remaining to be queued to the read 587 * FIFO in the VS/CS. 588 */ 589 uint32_t num_inputs_remaining; 590 591 /* Number of inputs currently in the read FIFO for the VS/CS */ 592 uint32_t num_inputs_in_fifo; 593 594 /** Next offset in the VPM to read from in the VS/CS */ 595 uint32_t vpm_read_offset; 596 597 uint32_t program_id; 598 uint32_t variant_id; 599 600 /* Set to compile program in in 1x, 2x, or 4x threaded mode, where 601 * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of 602 * limiting ourselves to the part of the physical reg space. 603 * 604 * On V3D 3.x, 2x or 4x divide the physical reg space by 2x or 4x. On 605 * V3D 4.x, all shaders are 2x threaded, and 4x only divides the 606 * physical reg space in half. 607 */ 608 uint8_t threads; 609 struct qinst *last_thrsw; 610 bool last_thrsw_at_top_level; 611 612 bool failed; 613}; 614 615struct v3d_uniform_list { 616 enum quniform_contents *contents; 617 uint32_t *data; 618 uint32_t count; 619}; 620 621struct v3d_prog_data { 622 struct v3d_uniform_list uniforms; 623 624 struct v3d_ubo_range *ubo_ranges; 625 uint32_t num_ubo_ranges; 626 uint32_t ubo_size; 627 uint32_t spill_size; 628 629 uint8_t num_inputs; 630 uint8_t threads; 631 632 /* For threads > 1, whether the program should be dispatched in the 633 * after-final-THRSW state. 634 */ 635 bool single_seg; 636}; 637 638struct v3d_vs_prog_data { 639 struct v3d_prog_data base; 640 641 bool uses_iid, uses_vid; 642 643 /* Number of components read from each vertex attribute. */ 644 uint8_t vattr_sizes[32]; 645 646 /* Total number of components read, for the shader state record. */ 647 uint32_t vpm_input_size; 648 649 /* Total number of components written, for the shader state record. */ 650 uint32_t vpm_output_size; 651 652 /* Value to be programmed in VCM_CACHE_SIZE. */ 653 uint8_t vcm_cache_size; 654}; 655 656struct v3d_fs_prog_data { 657 struct v3d_prog_data base; 658 659 struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; 660 661 /* Array of flat shade flags. 662 * 663 * Each entry is only 24 bits (high 8 bits 0), to match the hardware 664 * packet layout. 665 */ 666 uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 667 668 uint32_t noperspective_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 669 670 uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 671 672 bool writes_z; 673 bool discard; 674 bool uses_center_w; 675}; 676 677/* Special nir_load_input intrinsic index for loading the current TLB 678 * destination color. 679 */ 680#define V3D_NIR_TLB_COLOR_READ_INPUT 2000000000 681 682#define V3D_NIR_MS_MASK_OUTPUT 2000000000 683 684extern const nir_shader_compiler_options v3d_nir_options; 685 686const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo); 687void v3d_compiler_free(const struct v3d_compiler *compiler); 688void v3d_optimize_nir(struct nir_shader *s); 689 690uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler, 691 struct v3d_vs_key *key, 692 struct v3d_vs_prog_data *prog_data, 693 nir_shader *s, 694 int program_id, int variant_id, 695 uint32_t *final_assembly_size); 696 697uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler, 698 struct v3d_fs_key *key, 699 struct v3d_fs_prog_data *prog_data, 700 nir_shader *s, 701 int program_id, int variant_id, 702 uint32_t *final_assembly_size); 703 704void v3d_nir_to_vir(struct v3d_compile *c); 705 706void vir_compile_destroy(struct v3d_compile *c); 707const char *vir_get_stage_name(struct v3d_compile *c); 708struct qblock *vir_new_block(struct v3d_compile *c); 709void vir_set_emit_block(struct v3d_compile *c, struct qblock *block); 710void vir_link_blocks(struct qblock *predecessor, struct qblock *successor); 711struct qblock *vir_entry_block(struct v3d_compile *c); 712struct qblock *vir_exit_block(struct v3d_compile *c); 713struct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, 714 struct qreg src0, struct qreg src1); 715struct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, 716 struct qreg src0, struct qreg src1); 717struct qinst *vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src0); 718void vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst); 719struct qreg vir_uniform(struct v3d_compile *c, 720 enum quniform_contents contents, 721 uint32_t data); 722void vir_schedule_instructions(struct v3d_compile *c); 723struct v3d_qpu_instr v3d_qpu_nop(void); 724 725struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); 726struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); 727void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); 728void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf); 729void vir_set_unpack(struct qinst *inst, int src, 730 enum v3d_qpu_input_unpack unpack); 731 732struct qreg vir_get_temp(struct v3d_compile *c); 733void vir_emit_last_thrsw(struct v3d_compile *c); 734void vir_calculate_live_intervals(struct v3d_compile *c); 735bool vir_has_implicit_uniform(struct qinst *inst); 736int vir_get_implicit_uniform_src(struct qinst *inst); 737int vir_get_non_sideband_nsrc(struct qinst *inst); 738int vir_get_nsrc(struct qinst *inst); 739bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst); 740bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op); 741bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op); 742bool vir_is_raw_mov(struct qinst *inst); 743bool vir_is_tex(struct qinst *inst); 744bool vir_is_add(struct qinst *inst); 745bool vir_is_mul(struct qinst *inst); 746bool vir_is_float_input(struct qinst *inst); 747bool vir_depends_on_flags(struct qinst *inst); 748bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); 749bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); 750struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); 751uint8_t vir_channels_written(struct qinst *inst); 752struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i); 753void ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, 754 struct qreg result); 755void vir_emit_thrsw(struct v3d_compile *c); 756 757void vir_dump(struct v3d_compile *c); 758void vir_dump_inst(struct v3d_compile *c, struct qinst *inst); 759 760void vir_validate(struct v3d_compile *c); 761 762void vir_optimize(struct v3d_compile *c); 763bool vir_opt_algebraic(struct v3d_compile *c); 764bool vir_opt_constant_folding(struct v3d_compile *c); 765bool vir_opt_copy_propagate(struct v3d_compile *c); 766bool vir_opt_dead_code(struct v3d_compile *c); 767bool vir_opt_peephole_sf(struct v3d_compile *c); 768bool vir_opt_small_immediates(struct v3d_compile *c); 769bool vir_opt_vpm(struct v3d_compile *c); 770void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c); 771void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c); 772void v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c); 773void vir_lower_uniforms(struct v3d_compile *c); 774 775void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components); 776void v3d33_vir_vpm_write_setup(struct v3d_compile *c); 777void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); 778void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); 779 780void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); 781uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); 782void qpu_validate(struct v3d_compile *c); 783struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled); 784bool vir_init_reg_sets(struct v3d_compiler *compiler); 785 786void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf); 787 788static inline bool 789quniform_contents_is_texture_p0(enum quniform_contents contents) 790{ 791 return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 && 792 contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 + 793 V3D_MAX_TEXTURE_SAMPLERS)); 794} 795 796static inline struct qreg 797vir_uniform_ui(struct v3d_compile *c, uint32_t ui) 798{ 799 return vir_uniform(c, QUNIFORM_CONSTANT, ui); 800} 801 802static inline struct qreg 803vir_uniform_f(struct v3d_compile *c, float f) 804{ 805 return vir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 806} 807 808#define VIR_ALU0(name, vir_inst, op) \ 809static inline struct qreg \ 810vir_##name(struct v3d_compile *c) \ 811{ \ 812 return vir_emit_def(c, vir_inst(op, c->undef, \ 813 c->undef, c->undef)); \ 814} \ 815static inline struct qinst * \ 816vir_##name##_dest(struct v3d_compile *c, struct qreg dest) \ 817{ \ 818 return vir_emit_nondef(c, vir_inst(op, dest, \ 819 c->undef, c->undef)); \ 820} 821 822#define VIR_ALU1(name, vir_inst, op) \ 823static inline struct qreg \ 824vir_##name(struct v3d_compile *c, struct qreg a) \ 825{ \ 826 return vir_emit_def(c, vir_inst(op, c->undef, \ 827 a, c->undef)); \ 828} \ 829static inline struct qinst * \ 830vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 831 struct qreg a) \ 832{ \ 833 return vir_emit_nondef(c, vir_inst(op, dest, a, \ 834 c->undef)); \ 835} 836 837#define VIR_ALU2(name, vir_inst, op) \ 838static inline struct qreg \ 839vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ 840{ \ 841 return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \ 842} \ 843static inline struct qinst * \ 844vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 845 struct qreg a, struct qreg b) \ 846{ \ 847 return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \ 848} 849 850#define VIR_NODST_0(name, vir_inst, op) \ 851static inline struct qinst * \ 852vir_##name(struct v3d_compile *c) \ 853{ \ 854 return vir_emit_nondef(c, vir_inst(op, c->undef, \ 855 c->undef, c->undef)); \ 856} 857 858#define VIR_NODST_1(name, vir_inst, op) \ 859static inline struct qinst * \ 860vir_##name(struct v3d_compile *c, struct qreg a) \ 861{ \ 862 return vir_emit_nondef(c, vir_inst(op, c->undef, \ 863 a, c->undef)); \ 864} 865 866#define VIR_NODST_2(name, vir_inst, op) \ 867static inline struct qinst * \ 868vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ 869{ \ 870 return vir_emit_nondef(c, vir_inst(op, c->undef, \ 871 a, b)); \ 872} 873 874#define VIR_SFU(name) \ 875static inline struct qreg \ 876vir_##name(struct v3d_compile *c, struct qreg a) \ 877{ \ 878 if (c->devinfo->ver >= 41) { \ 879 return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ 880 c->undef, \ 881 a, c->undef)); \ 882 } else { \ 883 vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ 884 return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ 885 } \ 886} \ 887static inline struct qinst * \ 888vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 889 struct qreg a) \ 890{ \ 891 if (c->devinfo->ver >= 41) { \ 892 return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ 893 dest, \ 894 a, c->undef)); \ 895 } else { \ 896 vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ 897 return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ 898 } \ 899} 900 901#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) 902#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) 903#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) 904#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name) 905#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name) 906#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name) 907#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name) 908#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name) 909#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name) 910#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name) 911#define VIR_A_NODST_0(name) VIR_NODST_0(name, vir_add_inst, V3D_QPU_A_##name) 912 913VIR_A_ALU2(FADD) 914VIR_A_ALU2(VFPACK) 915VIR_A_ALU2(FSUB) 916VIR_A_ALU2(FMIN) 917VIR_A_ALU2(FMAX) 918 919VIR_A_ALU2(ADD) 920VIR_A_ALU2(SUB) 921VIR_A_ALU2(SHL) 922VIR_A_ALU2(SHR) 923VIR_A_ALU2(ASR) 924VIR_A_ALU2(ROR) 925VIR_A_ALU2(MIN) 926VIR_A_ALU2(MAX) 927VIR_A_ALU2(UMIN) 928VIR_A_ALU2(UMAX) 929VIR_A_ALU2(AND) 930VIR_A_ALU2(OR) 931VIR_A_ALU2(XOR) 932VIR_A_ALU2(VADD) 933VIR_A_ALU2(VSUB) 934VIR_A_NODST_2(STVPMV) 935VIR_A_ALU1(NOT) 936VIR_A_ALU1(NEG) 937VIR_A_ALU1(FLAPUSH) 938VIR_A_ALU1(FLBPUSH) 939VIR_A_ALU1(FLPOP) 940VIR_A_ALU1(SETMSF) 941VIR_A_ALU1(SETREVF) 942VIR_A_ALU0(TIDX) 943VIR_A_ALU0(EIDX) 944VIR_A_ALU1(LDVPMV_IN) 945VIR_A_ALU1(LDVPMV_OUT) 946VIR_A_ALU0(TMUWT) 947 948VIR_A_ALU0(FXCD) 949VIR_A_ALU0(XCD) 950VIR_A_ALU0(FYCD) 951VIR_A_ALU0(YCD) 952VIR_A_ALU0(MSF) 953VIR_A_ALU0(REVF) 954VIR_A_NODST_1(VPMSETUP) 955VIR_A_NODST_0(VPMWT) 956VIR_A_ALU2(FCMP) 957VIR_A_ALU2(VFMAX) 958 959VIR_A_ALU1(FROUND) 960VIR_A_ALU1(FTOIN) 961VIR_A_ALU1(FTRUNC) 962VIR_A_ALU1(FTOIZ) 963VIR_A_ALU1(FFLOOR) 964VIR_A_ALU1(FTOUZ) 965VIR_A_ALU1(FCEIL) 966VIR_A_ALU1(FTOC) 967 968VIR_A_ALU1(FDX) 969VIR_A_ALU1(FDY) 970 971VIR_A_ALU1(ITOF) 972VIR_A_ALU1(CLZ) 973VIR_A_ALU1(UTOF) 974 975VIR_M_ALU2(UMUL24) 976VIR_M_ALU2(FMUL) 977VIR_M_ALU2(SMUL24) 978VIR_M_NODST_2(MULTOP) 979 980VIR_M_ALU1(MOV) 981VIR_M_ALU1(FMOV) 982 983VIR_SFU(RECIP) 984VIR_SFU(RSQRT) 985VIR_SFU(EXP) 986VIR_SFU(LOG) 987VIR_SFU(SIN) 988VIR_SFU(RSQRT2) 989 990static inline struct qinst * 991vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, 992 struct qreg dest, struct qreg src) 993{ 994 struct qinst *mov = vir_MOV_dest(c, dest, src); 995 vir_set_cond(mov, cond); 996 return mov; 997} 998 999static inline struct qreg 1000vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond, 1001 struct qreg src0, struct qreg src1) 1002{ 1003 struct qreg t = vir_get_temp(c); 1004 vir_MOV_dest(c, t, src1); 1005 vir_MOV_cond(c, cond, t, src0); 1006 return t; 1007} 1008 1009static inline struct qinst * 1010vir_NOP(struct v3d_compile *c) 1011{ 1012 return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP, 1013 c->undef, c->undef, c->undef)); 1014} 1015 1016static inline struct qreg 1017vir_LDTMU(struct v3d_compile *c) 1018{ 1019 if (c->devinfo->ver >= 41) { 1020 struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef, 1021 c->undef, c->undef); 1022 ldtmu->qpu.sig.ldtmu = true; 1023 1024 return vir_emit_def(c, ldtmu); 1025 } else { 1026 vir_NOP(c)->qpu.sig.ldtmu = true; 1027 return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); 1028 } 1029} 1030 1031static inline struct qreg 1032vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1) 1033{ 1034 vir_MULTOP(c, src0, src1); 1035 return vir_UMUL24(c, src0, src1); 1036} 1037 1038/* 1039static inline struct qreg 1040vir_LOAD_IMM(struct v3d_compile *c, uint32_t val) 1041{ 1042 return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef, 1043 vir_reg(QFILE_LOAD_IMM, val), c->undef)); 1044} 1045 1046static inline struct qreg 1047vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val) 1048{ 1049 return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef, 1050 vir_reg(QFILE_LOAD_IMM, val), 1051 c->undef)); 1052} 1053static inline struct qreg 1054vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val) 1055{ 1056 return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef, 1057 vir_reg(QFILE_LOAD_IMM, val), 1058 c->undef)); 1059} 1060*/ 1061 1062static inline struct qinst * 1063vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_cond cond) 1064{ 1065 /* The actual uniform_data value will be set at scheduling time */ 1066 return vir_emit_nondef(c, vir_branch_inst(cond, vir_uniform_ui(c, 0))); 1067} 1068 1069#define vir_for_each_block(block, c) \ 1070 list_for_each_entry(struct qblock, block, &c->blocks, link) 1071 1072#define vir_for_each_block_rev(block, c) \ 1073 list_for_each_entry_rev(struct qblock, block, &c->blocks, link) 1074 1075/* Loop over the non-NULL members of the successors array. */ 1076#define vir_for_each_successor(succ, block) \ 1077 for (struct qblock *succ = block->successors[0]; \ 1078 succ != NULL; \ 1079 succ = (succ == block->successors[1] ? NULL : \ 1080 block->successors[1])) 1081 1082#define vir_for_each_inst(inst, block) \ 1083 list_for_each_entry(struct qinst, inst, &block->instructions, link) 1084 1085#define vir_for_each_inst_rev(inst, block) \ 1086 list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) 1087 1088#define vir_for_each_inst_safe(inst, block) \ 1089 list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) 1090 1091#define vir_for_each_inst_inorder(inst, c) \ 1092 vir_for_each_block(_block, c) \ 1093 vir_for_each_inst(inst, _block) 1094 1095#endif /* V3D_COMPILER_H */ 1096