1848b8605Smrg/* 2848b8605Smrg * Copyright © 2014 Broadcom 3848b8605Smrg * 4848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5848b8605Smrg * copy of this software and associated documentation files (the "Software"), 6848b8605Smrg * to deal in the Software without restriction, including without limitation 7848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 9848b8605Smrg * Software is furnished to do so, subject to the following conditions: 10848b8605Smrg * 11848b8605Smrg * The above copyright notice and this permission notice (including the next 12848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 13848b8605Smrg * Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21848b8605Smrg * IN THE SOFTWARE. 22848b8605Smrg */ 23848b8605Smrg 24848b8605Smrg#ifndef VC4_QIR_H 25848b8605Smrg#define VC4_QIR_H 26848b8605Smrg 27b8e80941Smrg#include <assert.h> 28b8e80941Smrg#include <stdio.h> 29b8e80941Smrg#include <stdlib.h> 30848b8605Smrg#include <stdbool.h> 31848b8605Smrg#include <stdint.h> 32b8e80941Smrg#include <string.h> 33848b8605Smrg 34b8e80941Smrg#include "util/macros.h" 35b8e80941Smrg#include "compiler/nir/nir.h" 36b8e80941Smrg#include "util/list.h" 37b8e80941Smrg#include "util/u_math.h" 38b8e80941Smrg 39b8e80941Smrg#include "vc4_screen.h" 40b8e80941Smrg#include "vc4_qpu_defines.h" 41b8e80941Smrg#include "vc4_qpu.h" 42b8e80941Smrg#include "kernel/vc4_packet.h" 43b8e80941Smrg#include "pipe/p_state.h" 44b8e80941Smrg 45b8e80941Smrgstruct nir_builder; 46848b8605Smrg 47848b8605Smrgenum qfile { 48848b8605Smrg QFILE_NULL, 49848b8605Smrg QFILE_TEMP, 50848b8605Smrg QFILE_VARY, 51848b8605Smrg QFILE_UNIF, 52b8e80941Smrg QFILE_VPM, 53b8e80941Smrg QFILE_TLB_COLOR_WRITE, 54b8e80941Smrg QFILE_TLB_COLOR_WRITE_MS, 55b8e80941Smrg QFILE_TLB_Z_WRITE, 56b8e80941Smrg QFILE_TLB_STENCIL_SETUP, 57b8e80941Smrg 58b8e80941Smrg /* If tex_s is written on its own without preceding t/r/b setup, it's 59b8e80941Smrg * a direct memory access using the input value, without the sideband 60b8e80941Smrg * uniform load. We represent these in QIR as a separate write 61b8e80941Smrg * destination so we can tell if the sideband uniform is present. 62b8e80941Smrg */ 63b8e80941Smrg QFILE_TEX_S_DIRECT, 64b8e80941Smrg 65b8e80941Smrg QFILE_TEX_S, 66b8e80941Smrg QFILE_TEX_T, 67b8e80941Smrg QFILE_TEX_R, 68b8e80941Smrg QFILE_TEX_B, 69b8e80941Smrg 70b8e80941Smrg /* Payload registers that aren't in the physical register file, so we 71b8e80941Smrg * can just use the corresponding qpu_reg at qpu_emit time. 72b8e80941Smrg */ 73b8e80941Smrg QFILE_FRAG_X, 74b8e80941Smrg QFILE_FRAG_Y, 75b8e80941Smrg QFILE_FRAG_REV_FLAG, 76b8e80941Smrg QFILE_QPU_ELEMENT, 77b8e80941Smrg 78b8e80941Smrg /** 79b8e80941Smrg * Stores an immediate value in the index field that will be used 80b8e80941Smrg * directly by qpu_load_imm(). 81b8e80941Smrg */ 82b8e80941Smrg QFILE_LOAD_IMM, 83b8e80941Smrg 84b8e80941Smrg /** 85b8e80941Smrg * Stores an immediate value in the index field that can be turned 86b8e80941Smrg * into a small immediate field by qpu_encode_small_immediate(). 87b8e80941Smrg */ 88b8e80941Smrg QFILE_SMALL_IMM, 89848b8605Smrg}; 90848b8605Smrg 91848b8605Smrgstruct qreg { 92848b8605Smrg enum qfile file; 93848b8605Smrg uint32_t index; 94b8e80941Smrg int pack; 95848b8605Smrg}; 96848b8605Smrg 97b8e80941Smrgstatic inline struct qreg qir_reg(enum qfile file, uint32_t index) 98b8e80941Smrg{ 99b8e80941Smrg return (struct qreg){file, index}; 100b8e80941Smrg} 101b8e80941Smrg 102848b8605Smrgenum qop { 103848b8605Smrg QOP_UNDEF, 104848b8605Smrg QOP_MOV, 105b8e80941Smrg QOP_FMOV, 106b8e80941Smrg QOP_MMOV, 107848b8605Smrg QOP_FADD, 108848b8605Smrg QOP_FSUB, 109848b8605Smrg QOP_FMUL, 110b8e80941Smrg QOP_V8MULD, 111b8e80941Smrg QOP_V8MIN, 112b8e80941Smrg QOP_V8MAX, 113b8e80941Smrg QOP_V8ADDS, 114b8e80941Smrg QOP_V8SUBS, 115b8e80941Smrg QOP_MUL24, 116848b8605Smrg QOP_FMIN, 117848b8605Smrg QOP_FMAX, 118848b8605Smrg QOP_FMINABS, 119848b8605Smrg QOP_FMAXABS, 120b8e80941Smrg QOP_ADD, 121b8e80941Smrg QOP_SUB, 122b8e80941Smrg QOP_SHL, 123b8e80941Smrg QOP_SHR, 124b8e80941Smrg QOP_ASR, 125b8e80941Smrg QOP_MIN, 126b8e80941Smrg QOP_MIN_NOIMM, 127b8e80941Smrg QOP_MAX, 128b8e80941Smrg QOP_AND, 129b8e80941Smrg QOP_OR, 130b8e80941Smrg QOP_XOR, 131b8e80941Smrg QOP_NOT, 132848b8605Smrg 133848b8605Smrg QOP_FTOI, 134848b8605Smrg QOP_ITOF, 135848b8605Smrg QOP_RCP, 136848b8605Smrg QOP_RSQ, 137848b8605Smrg QOP_EXP2, 138848b8605Smrg QOP_LOG2, 139848b8605Smrg QOP_VW_SETUP, 140848b8605Smrg QOP_VR_SETUP, 141848b8605Smrg QOP_TLB_COLOR_READ, 142b8e80941Smrg QOP_MS_MASK, 143848b8605Smrg QOP_VARY_ADD_C, 144848b8605Smrg 145848b8605Smrg QOP_FRAG_Z, 146b8e80941Smrg QOP_FRAG_W, 147b8e80941Smrg 148848b8605Smrg /** 149848b8605Smrg * Signal of texture read being necessary and then reading r4 into 150848b8605Smrg * the destination 151848b8605Smrg */ 152848b8605Smrg QOP_TEX_RESULT, 153b8e80941Smrg 154b8e80941Smrg /** 155b8e80941Smrg * Insert the signal for switching threads in a threaded fragment 156b8e80941Smrg * shader. No value can be live in an accumulator across a thrsw. 157b8e80941Smrg * 158b8e80941Smrg * At the QPU level, this will have several delay slots before the 159b8e80941Smrg * switch happens. Those slots are the responsibility of the 160b8e80941Smrg * scheduler. 161b8e80941Smrg */ 162b8e80941Smrg QOP_THRSW, 163b8e80941Smrg 164b8e80941Smrg /* 32-bit immediate loaded to each SIMD channel */ 165b8e80941Smrg QOP_LOAD_IMM, 166b8e80941Smrg 167b8e80941Smrg /* 32-bit immediate divided into 16 2-bit unsigned int values and 168b8e80941Smrg * loaded to each corresponding SIMD channel. 169b8e80941Smrg */ 170b8e80941Smrg QOP_LOAD_IMM_U2, 171b8e80941Smrg /* 32-bit immediate divided into 16 2-bit signed int values and 172b8e80941Smrg * loaded to each corresponding SIMD channel. 173b8e80941Smrg */ 174b8e80941Smrg QOP_LOAD_IMM_I2, 175b8e80941Smrg 176b8e80941Smrg QOP_ROT_MUL, 177b8e80941Smrg 178b8e80941Smrg /* Jumps to block->successor[0] if the qinst->cond (as a 179b8e80941Smrg * QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note 180b8e80941Smrg * that block->successor[1] may be unset if the condition is ALWAYS. 181b8e80941Smrg */ 182b8e80941Smrg QOP_BRANCH, 183b8e80941Smrg 184b8e80941Smrg /* Emits an ADD from src[0] to src[1], where src[0] must be a 185b8e80941Smrg * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS, 186b8e80941Smrg * required by the kernel as part of its branch validation. 187b8e80941Smrg */ 188b8e80941Smrg QOP_UNIFORMS_RESET, 189848b8605Smrg}; 190848b8605Smrg 191b8e80941Smrgstruct queued_qpu_inst { 192b8e80941Smrg struct list_head link; 193b8e80941Smrg uint64_t inst; 194848b8605Smrg}; 195848b8605Smrg 196848b8605Smrgstruct qinst { 197b8e80941Smrg struct list_head link; 198848b8605Smrg 199848b8605Smrg enum qop op; 200848b8605Smrg struct qreg dst; 201b8e80941Smrg struct qreg src[3]; 202b8e80941Smrg bool sf; 203b8e80941Smrg bool cond_is_exec_mask; 204b8e80941Smrg uint8_t cond; 205848b8605Smrg}; 206848b8605Smrg 207848b8605Smrgenum qstage { 208848b8605Smrg /** 209848b8605Smrg * Coordinate shader, runs during binning, before the VS, and just 210848b8605Smrg * outputs position. 211848b8605Smrg */ 212848b8605Smrg QSTAGE_COORD, 213848b8605Smrg QSTAGE_VERT, 214848b8605Smrg QSTAGE_FRAG, 215848b8605Smrg}; 216848b8605Smrg 217848b8605Smrgenum quniform_contents { 218848b8605Smrg /** 219848b8605Smrg * Indicates that a constant 32-bit value is copied from the program's 220848b8605Smrg * uniform contents. 221848b8605Smrg */ 222848b8605Smrg QUNIFORM_CONSTANT, 223848b8605Smrg /** 224848b8605Smrg * Indicates that the program's uniform contents are used as an index 225848b8605Smrg * into the GL uniform storage. 226848b8605Smrg */ 227848b8605Smrg QUNIFORM_UNIFORM, 228848b8605Smrg 229848b8605Smrg /** @{ 230848b8605Smrg * Scaling factors from clip coordinates to relative to the viewport 231848b8605Smrg * center. 232848b8605Smrg * 233848b8605Smrg * This is used by the coordinate and vertex shaders to produce the 234848b8605Smrg * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 235848b8605Smrg * point offsets from the viewport ccenter. 236848b8605Smrg */ 237848b8605Smrg QUNIFORM_VIEWPORT_X_SCALE, 238848b8605Smrg QUNIFORM_VIEWPORT_Y_SCALE, 239848b8605Smrg /** @} */ 240848b8605Smrg 241848b8605Smrg QUNIFORM_VIEWPORT_Z_OFFSET, 242848b8605Smrg QUNIFORM_VIEWPORT_Z_SCALE, 243848b8605Smrg 244b8e80941Smrg QUNIFORM_USER_CLIP_PLANE, 245b8e80941Smrg 246848b8605Smrg /** 247848b8605Smrg * A reference to a texture config parameter 0 uniform. 248848b8605Smrg * 249848b8605Smrg * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 250848b8605Smrg * defines texture type, miplevels, and such. It will be found as a 251848b8605Smrg * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 252848b8605Smrg */ 253848b8605Smrg QUNIFORM_TEXTURE_CONFIG_P0, 254848b8605Smrg 255848b8605Smrg /** 256848b8605Smrg * A reference to a texture config parameter 1 uniform. 257848b8605Smrg * 258848b8605Smrg * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 259848b8605Smrg * defines texture width, height, filters, and wrap modes. It will be 260848b8605Smrg * found as a parameter to the second QOP_TEX_[STRB] instruction in a 261848b8605Smrg * sequence. 262848b8605Smrg */ 263848b8605Smrg QUNIFORM_TEXTURE_CONFIG_P1, 264848b8605Smrg 265b8e80941Smrg /** A reference to a texture config parameter 2 cubemap stride uniform */ 266b8e80941Smrg QUNIFORM_TEXTURE_CONFIG_P2, 267b8e80941Smrg 268b8e80941Smrg QUNIFORM_TEXTURE_FIRST_LEVEL, 269b8e80941Smrg 270b8e80941Smrg QUNIFORM_TEXTURE_MSAA_ADDR, 271b8e80941Smrg 272b8e80941Smrg QUNIFORM_UBO0_ADDR, 273b8e80941Smrg QUNIFORM_UBO1_ADDR, 274b8e80941Smrg 275848b8605Smrg QUNIFORM_TEXRECT_SCALE_X, 276848b8605Smrg QUNIFORM_TEXRECT_SCALE_Y, 277848b8605Smrg 278b8e80941Smrg QUNIFORM_TEXTURE_BORDER_COLOR, 279b8e80941Smrg 280b8e80941Smrg QUNIFORM_BLEND_CONST_COLOR_X, 281b8e80941Smrg QUNIFORM_BLEND_CONST_COLOR_Y, 282b8e80941Smrg QUNIFORM_BLEND_CONST_COLOR_Z, 283b8e80941Smrg QUNIFORM_BLEND_CONST_COLOR_W, 284b8e80941Smrg QUNIFORM_BLEND_CONST_COLOR_RGBA, 285b8e80941Smrg QUNIFORM_BLEND_CONST_COLOR_AAAA, 286b8e80941Smrg 287b8e80941Smrg QUNIFORM_STENCIL, 288b8e80941Smrg 289b8e80941Smrg QUNIFORM_ALPHA_REF, 290b8e80941Smrg QUNIFORM_SAMPLE_MASK, 291b8e80941Smrg 292b8e80941Smrg /* Placeholder uniform that will be updated by the kernel when used by 293b8e80941Smrg * an instruction writing to QPU_W_UNIFORMS_ADDRESS. 294b8e80941Smrg */ 295b8e80941Smrg QUNIFORM_UNIFORMS_ADDRESS, 296b8e80941Smrg}; 297b8e80941Smrg 298b8e80941Smrgstruct vc4_varying_slot { 299b8e80941Smrg uint8_t slot; 300b8e80941Smrg uint8_t swizzle; 301b8e80941Smrg}; 302b8e80941Smrg 303b8e80941Smrgstruct vc4_key { 304b8e80941Smrg struct vc4_uncompiled_shader *shader_state; 305b8e80941Smrg struct { 306b8e80941Smrg enum pipe_format format; 307b8e80941Smrg uint8_t swizzle[4]; 308b8e80941Smrg union { 309b8e80941Smrg struct { 310b8e80941Smrg unsigned compare_mode:1; 311b8e80941Smrg unsigned compare_func:3; 312b8e80941Smrg unsigned wrap_s:3; 313b8e80941Smrg unsigned wrap_t:3; 314b8e80941Smrg bool force_first_level:1; 315b8e80941Smrg }; 316b8e80941Smrg struct { 317b8e80941Smrg uint16_t msaa_width, msaa_height; 318b8e80941Smrg }; 319b8e80941Smrg }; 320b8e80941Smrg } tex[VC4_MAX_TEXTURE_SAMPLERS]; 321b8e80941Smrg uint8_t ucp_enables; 322848b8605Smrg}; 323848b8605Smrg 324b8e80941Smrgstruct vc4_fs_key { 325b8e80941Smrg struct vc4_key base; 326b8e80941Smrg enum pipe_format color_format; 327b8e80941Smrg bool depth_enabled; 328b8e80941Smrg bool stencil_enabled; 329b8e80941Smrg bool stencil_twoside; 330b8e80941Smrg bool stencil_full_writemasks; 331b8e80941Smrg bool is_points; 332b8e80941Smrg bool is_lines; 333b8e80941Smrg bool point_coord_upper_left; 334b8e80941Smrg bool light_twoside; 335b8e80941Smrg bool msaa; 336b8e80941Smrg bool sample_coverage; 337b8e80941Smrg bool sample_alpha_to_coverage; 338b8e80941Smrg bool sample_alpha_to_one; 339b8e80941Smrg uint8_t alpha_test_func; 340b8e80941Smrg uint8_t logicop_func; 341b8e80941Smrg uint32_t point_sprite_mask; 342b8e80941Smrg uint32_t ubo_1_size; 343b8e80941Smrg 344b8e80941Smrg struct pipe_rt_blend_state blend; 345b8e80941Smrg}; 346b8e80941Smrg 347b8e80941Smrgstruct vc4_vs_key { 348b8e80941Smrg struct vc4_key base; 349b8e80941Smrg 350b8e80941Smrg const struct vc4_fs_inputs *fs_inputs; 351b8e80941Smrg enum pipe_format attr_formats[8]; 352b8e80941Smrg bool is_coord; 353b8e80941Smrg bool per_vertex_point_size; 354b8e80941Smrg bool clamp_color; 355b8e80941Smrg}; 356b8e80941Smrg 357b8e80941Smrg/** A basic block of QIR intructions. */ 358b8e80941Smrgstruct qblock { 359b8e80941Smrg struct list_head link; 360b8e80941Smrg 361b8e80941Smrg struct list_head instructions; 362b8e80941Smrg struct list_head qpu_inst_list; 363b8e80941Smrg 364b8e80941Smrg struct set *predecessors; 365b8e80941Smrg struct qblock *successors[2]; 366b8e80941Smrg 367b8e80941Smrg int index; 368b8e80941Smrg 369b8e80941Smrg /* Instruction IPs for the first and last instruction of the block. 370b8e80941Smrg * Set by vc4_qpu_schedule.c. 371b8e80941Smrg */ 372b8e80941Smrg uint32_t start_qpu_ip; 373b8e80941Smrg uint32_t end_qpu_ip; 374b8e80941Smrg 375b8e80941Smrg /* Instruction IP for the branch instruction of the block. Set by 376b8e80941Smrg * vc4_qpu_schedule.c. 377b8e80941Smrg */ 378b8e80941Smrg uint32_t branch_qpu_ip; 379b8e80941Smrg 380b8e80941Smrg /** @{ used by vc4_qir_live_variables.c */ 381b8e80941Smrg BITSET_WORD *def; 382b8e80941Smrg BITSET_WORD *use; 383b8e80941Smrg BITSET_WORD *live_in; 384b8e80941Smrg BITSET_WORD *live_out; 385b8e80941Smrg int start_ip, end_ip; 386b8e80941Smrg /** @} */ 387b8e80941Smrg}; 388b8e80941Smrg 389b8e80941Smrgstruct vc4_compile { 390b8e80941Smrg struct vc4_context *vc4; 391b8e80941Smrg nir_shader *s; 392b8e80941Smrg nir_function_impl *impl; 393b8e80941Smrg struct exec_list *cf_node_list; 394b8e80941Smrg 395b8e80941Smrg /** 396b8e80941Smrg * Mapping from nir_register * or nir_ssa_def * to array of struct 397b8e80941Smrg * qreg for the values. 398b8e80941Smrg */ 399b8e80941Smrg struct hash_table *def_ht; 400b8e80941Smrg 401b8e80941Smrg /* For each temp, the instruction generating its value. */ 402b8e80941Smrg struct qinst **defs; 403b8e80941Smrg uint32_t defs_array_size; 404b8e80941Smrg 405b8e80941Smrg /** 406b8e80941Smrg * Inputs to the shader, arranged by TGSI declaration order. 407b8e80941Smrg * 408b8e80941Smrg * Not all fragment shader QFILE_VARY reads are present in this array. 409b8e80941Smrg */ 410b8e80941Smrg struct qreg *inputs; 411b8e80941Smrg struct qreg *outputs; 412b8e80941Smrg bool msaa_per_sample_output; 413b8e80941Smrg struct qreg color_reads[VC4_MAX_SAMPLES]; 414b8e80941Smrg struct qreg sample_colors[VC4_MAX_SAMPLES]; 415b8e80941Smrg uint32_t inputs_array_size; 416b8e80941Smrg uint32_t outputs_array_size; 417b8e80941Smrg uint32_t uniforms_array_size; 418b8e80941Smrg 419b8e80941Smrg /* State for whether we're executing on each channel currently. 0 if 420b8e80941Smrg * yes, otherwise a block number + 1 that the channel jumped to. 421b8e80941Smrg */ 422b8e80941Smrg struct qreg execute; 423b8e80941Smrg 424b8e80941Smrg struct qreg line_x, point_x, point_y; 425b8e80941Smrg /** boolean (~0 -> true) if the fragment has been discarded. */ 426b8e80941Smrg struct qreg discard; 427b8e80941Smrg struct qreg payload_FRAG_Z; 428b8e80941Smrg struct qreg payload_FRAG_W; 429b8e80941Smrg 430b8e80941Smrg uint8_t vattr_sizes[8]; 431b8e80941Smrg 432b8e80941Smrg /** 433b8e80941Smrg * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. 434b8e80941Smrg * 435b8e80941Smrg * This includes those that aren't part of the VPM varyings, like 436b8e80941Smrg * point/line coordinates. 437b8e80941Smrg */ 438b8e80941Smrg struct vc4_varying_slot *input_slots; 439b8e80941Smrg uint32_t num_input_slots; 440b8e80941Smrg uint32_t input_slots_array_size; 441b8e80941Smrg 442b8e80941Smrg /** 443b8e80941Smrg * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* 444b8e80941Smrg * of the output is. Used to emit from the VS in the order that the 445b8e80941Smrg * FS needs. 446b8e80941Smrg */ 447b8e80941Smrg struct vc4_varying_slot *output_slots; 448b8e80941Smrg 449b8e80941Smrg struct pipe_shader_state *shader_state; 450b8e80941Smrg struct vc4_key *key; 451b8e80941Smrg struct vc4_fs_key *fs_key; 452b8e80941Smrg struct vc4_vs_key *vs_key; 453b8e80941Smrg 454b8e80941Smrg /* Live ranges of temps. */ 455b8e80941Smrg int *temp_start, *temp_end; 456b8e80941Smrg 457b8e80941Smrg uint32_t *uniform_data; 458b8e80941Smrg enum quniform_contents *uniform_contents; 459b8e80941Smrg uint32_t uniform_array_size; 460b8e80941Smrg uint32_t num_uniforms; 461b8e80941Smrg uint32_t num_outputs; 462b8e80941Smrg uint32_t num_texture_samples; 463b8e80941Smrg uint32_t output_position_index; 464b8e80941Smrg uint32_t output_color_index; 465b8e80941Smrg uint32_t output_point_size_index; 466b8e80941Smrg uint32_t output_sample_mask_index; 467b8e80941Smrg 468848b8605Smrg struct qreg undef; 469848b8605Smrg enum qstage stage; 470848b8605Smrg uint32_t num_temps; 471848b8605Smrg 472b8e80941Smrg struct list_head blocks; 473b8e80941Smrg int next_block_index; 474b8e80941Smrg struct qblock *cur_block; 475b8e80941Smrg struct qblock *loop_cont_block; 476b8e80941Smrg struct qblock *loop_break_block; 477b8e80941Smrg struct qblock *last_top_block; 478b8e80941Smrg 479b8e80941Smrg struct list_head qpu_inst_list; 480b8e80941Smrg 481b8e80941Smrg /* Pre-QPU-scheduled instruction containing the last THRSW */ 482b8e80941Smrg uint64_t *last_thrsw; 483b8e80941Smrg 484848b8605Smrg uint64_t *qpu_insts; 485848b8605Smrg uint32_t qpu_inst_count; 486848b8605Smrg uint32_t qpu_inst_size; 487848b8605Smrg uint32_t num_inputs; 488b8e80941Smrg 489b8e80941Smrg /** 490b8e80941Smrg * Number of inputs from num_inputs remaining to be queued to the read 491b8e80941Smrg * FIFO in the VS/CS. 492b8e80941Smrg */ 493b8e80941Smrg uint32_t num_inputs_remaining; 494b8e80941Smrg 495b8e80941Smrg /* Number of inputs currently in the read FIFO for the VS/CS */ 496b8e80941Smrg uint32_t num_inputs_in_fifo; 497b8e80941Smrg 498b8e80941Smrg /** Next offset in the VPM to read from in the VS/CS */ 499b8e80941Smrg uint32_t vpm_read_offset; 500b8e80941Smrg 501b8e80941Smrg uint32_t program_id; 502b8e80941Smrg uint32_t variant_id; 503b8e80941Smrg 504b8e80941Smrg /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH 505b8e80941Smrg * is used to hide texturing latency at the cost of limiting ourselves 506b8e80941Smrg * to the bottom half of physical reg space. 507b8e80941Smrg */ 508b8e80941Smrg bool fs_threaded; 509b8e80941Smrg 510b8e80941Smrg bool last_thrsw_at_top_level; 511b8e80941Smrg 512b8e80941Smrg bool failed; 513848b8605Smrg}; 514848b8605Smrg 515b8e80941Smrg/* Special nir_load_input intrinsic index for loading the current TLB 516b8e80941Smrg * destination color. 517b8e80941Smrg */ 518b8e80941Smrg#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000 519b8e80941Smrg 520b8e80941Smrg#define VC4_NIR_MS_MASK_OUTPUT 2000000000 521b8e80941Smrg 522b8e80941Smrgstruct vc4_compile *qir_compile_init(void); 523b8e80941Smrgvoid qir_compile_destroy(struct vc4_compile *c); 524b8e80941Smrgstruct qblock *qir_new_block(struct vc4_compile *c); 525b8e80941Smrgvoid qir_set_emit_block(struct vc4_compile *c, struct qblock *block); 526b8e80941Smrgvoid qir_link_blocks(struct qblock *predecessor, struct qblock *successor); 527b8e80941Smrgstruct qblock *qir_entry_block(struct vc4_compile *c); 528b8e80941Smrgstruct qblock *qir_exit_block(struct vc4_compile *c); 529848b8605Smrgstruct qinst *qir_inst(enum qop op, struct qreg dst, 530848b8605Smrg struct qreg src0, struct qreg src1); 531b8e80941Smrgvoid qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 532b8e80941Smrgstruct qreg qir_uniform(struct vc4_compile *c, 533b8e80941Smrg enum quniform_contents contents, 534b8e80941Smrg uint32_t data); 535b8e80941Smrgvoid qir_schedule_instructions(struct vc4_compile *c); 536b8e80941Smrgvoid qir_reorder_uniforms(struct vc4_compile *c); 537b8e80941Smrgvoid qir_emit_uniform_stream_resets(struct vc4_compile *c); 538b8e80941Smrg 539b8e80941Smrgstruct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst); 540b8e80941Smrgstruct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst); 541b8e80941Smrg 542b8e80941Smrgstruct qreg qir_get_temp(struct vc4_compile *c); 543b8e80941Smrgvoid qir_calculate_live_intervals(struct vc4_compile *c); 544b8e80941Smrgint qir_get_nsrc(struct qinst *inst); 545b8e80941Smrgint qir_get_non_sideband_nsrc(struct qinst *inst); 546b8e80941Smrgint qir_get_tex_uniform_src(struct qinst *inst); 547848b8605Smrgbool qir_reg_equals(struct qreg a, struct qreg b); 548b8e80941Smrgbool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 549b8e80941Smrgbool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 550b8e80941Smrgbool qir_has_uniform_read(struct qinst *inst); 551b8e80941Smrgbool qir_is_mul(struct qinst *inst); 552b8e80941Smrgbool qir_is_raw_mov(struct qinst *inst); 553b8e80941Smrgbool qir_is_tex(struct qinst *inst); 554b8e80941Smrgbool qir_has_implicit_tex_uniform(struct qinst *inst); 555b8e80941Smrgbool qir_is_float_input(struct qinst *inst); 556b8e80941Smrgbool qir_depends_on_flags(struct qinst *inst); 557b8e80941Smrgbool qir_writes_r4(struct qinst *inst); 558b8e80941Smrgstruct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 559b8e80941Smrguint8_t qir_channels_written(struct qinst *inst); 560b8e80941Smrg 561b8e80941Smrgvoid qir_dump(struct vc4_compile *c); 562b8e80941Smrgvoid qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 563b8e80941Smrgchar *qir_describe_uniform(enum quniform_contents contents, uint32_t data, 564b8e80941Smrg const uint32_t *uniforms); 565848b8605Smrgconst char *qir_get_stage_name(enum qstage stage); 566848b8605Smrg 567b8e80941Smrgvoid qir_validate(struct vc4_compile *c); 568b8e80941Smrg 569b8e80941Smrgvoid qir_optimize(struct vc4_compile *c); 570b8e80941Smrgbool qir_opt_algebraic(struct vc4_compile *c); 571b8e80941Smrgbool qir_opt_coalesce_ff_writes(struct vc4_compile *c); 572b8e80941Smrgbool qir_opt_constant_folding(struct vc4_compile *c); 573b8e80941Smrgbool qir_opt_copy_propagation(struct vc4_compile *c); 574b8e80941Smrgbool qir_opt_dead_code(struct vc4_compile *c); 575b8e80941Smrgbool qir_opt_peephole_sf(struct vc4_compile *c); 576b8e80941Smrgbool qir_opt_small_immediates(struct vc4_compile *c); 577b8e80941Smrgbool qir_opt_vpm(struct vc4_compile *c); 578b8e80941Smrgvoid vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c); 579b8e80941Smrgvoid vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c); 580b8e80941Smrgnir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b, 581b8e80941Smrg nir_ssa_def **srcs, int swiz); 582b8e80941Smrgvoid vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c); 583b8e80941Smrgvoid qir_lower_uniforms(struct vc4_compile *c); 584b8e80941Smrg 585b8e80941Smrguint32_t qpu_schedule_instructions(struct vc4_compile *c); 586b8e80941Smrg 587b8e80941Smrgvoid qir_SF(struct vc4_compile *c, struct qreg src); 588b8e80941Smrg 589b8e80941Smrgstatic inline struct qreg 590b8e80941Smrgqir_uniform_ui(struct vc4_compile *c, uint32_t ui) 591b8e80941Smrg{ 592b8e80941Smrg return qir_uniform(c, QUNIFORM_CONSTANT, ui); 593b8e80941Smrg} 594b8e80941Smrg 595b8e80941Smrgstatic inline struct qreg 596b8e80941Smrgqir_uniform_f(struct vc4_compile *c, float f) 597b8e80941Smrg{ 598b8e80941Smrg return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 599b8e80941Smrg} 600848b8605Smrg 601848b8605Smrg#define QIR_ALU0(name) \ 602848b8605Smrgstatic inline struct qreg \ 603b8e80941Smrgqir_##name(struct vc4_compile *c) \ 604b8e80941Smrg{ \ 605b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \ 606b8e80941Smrg c->undef, c->undef)); \ 607b8e80941Smrg} \ 608b8e80941Smrgstatic inline struct qinst * \ 609b8e80941Smrgqir_##name##_dest(struct vc4_compile *c, struct qreg dest) \ 610848b8605Smrg{ \ 611b8e80941Smrg return qir_emit_nondef(c, qir_inst(QOP_##name, dest, \ 612b8e80941Smrg c->undef, c->undef)); \ 613848b8605Smrg} 614848b8605Smrg 615848b8605Smrg#define QIR_ALU1(name) \ 616848b8605Smrgstatic inline struct qreg \ 617b8e80941Smrgqir_##name(struct vc4_compile *c, struct qreg a) \ 618848b8605Smrg{ \ 619b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \ 620b8e80941Smrg a, c->undef)); \ 621b8e80941Smrg} \ 622b8e80941Smrgstatic inline struct qinst * \ 623b8e80941Smrgqir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ 624b8e80941Smrg struct qreg a) \ 625b8e80941Smrg{ \ 626b8e80941Smrg return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, \ 627b8e80941Smrg c->undef)); \ 628848b8605Smrg} 629848b8605Smrg 630848b8605Smrg#define QIR_ALU2(name) \ 631848b8605Smrgstatic inline struct qreg \ 632b8e80941Smrgqir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 633b8e80941Smrg{ \ 634b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_##name, c->undef, a, b)); \ 635b8e80941Smrg} \ 636b8e80941Smrgstatic inline struct qinst * \ 637b8e80941Smrgqir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ 638b8e80941Smrg struct qreg a, struct qreg b) \ 639848b8605Smrg{ \ 640b8e80941Smrg return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, b)); \ 641848b8605Smrg} 642848b8605Smrg 643848b8605Smrg#define QIR_NODST_1(name) \ 644b8e80941Smrgstatic inline struct qinst * \ 645b8e80941Smrgqir_##name(struct vc4_compile *c, struct qreg a) \ 646848b8605Smrg{ \ 647b8e80941Smrg return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \ 648b8e80941Smrg a, c->undef)); \ 649848b8605Smrg} 650848b8605Smrg 651848b8605Smrg#define QIR_NODST_2(name) \ 652b8e80941Smrgstatic inline struct qinst * \ 653b8e80941Smrgqir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 654848b8605Smrg{ \ 655b8e80941Smrg return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \ 656b8e80941Smrg a, b)); \ 657b8e80941Smrg} 658b8e80941Smrg 659b8e80941Smrg#define QIR_PAYLOAD(name) \ 660b8e80941Smrgstatic inline struct qreg \ 661b8e80941Smrgqir_##name(struct vc4_compile *c) \ 662b8e80941Smrg{ \ 663b8e80941Smrg struct qreg *payload = &c->payload_##name; \ 664b8e80941Smrg if (payload->file != QFILE_NULL) \ 665b8e80941Smrg return *payload; \ 666b8e80941Smrg *payload = qir_get_temp(c); \ 667b8e80941Smrg struct qinst *inst = qir_inst(QOP_##name, *payload, \ 668b8e80941Smrg c->undef, c->undef); \ 669b8e80941Smrg struct qblock *entry = qir_entry_block(c); \ 670b8e80941Smrg list_add(&inst->link, &entry->instructions); \ 671b8e80941Smrg c->defs[payload->index] = inst; \ 672b8e80941Smrg return *payload; \ 673848b8605Smrg} 674848b8605Smrg 675848b8605SmrgQIR_ALU1(MOV) 676b8e80941SmrgQIR_ALU1(FMOV) 677b8e80941SmrgQIR_ALU1(MMOV) 678848b8605SmrgQIR_ALU2(FADD) 679848b8605SmrgQIR_ALU2(FSUB) 680848b8605SmrgQIR_ALU2(FMUL) 681b8e80941SmrgQIR_ALU2(V8MULD) 682b8e80941SmrgQIR_ALU2(V8MIN) 683b8e80941SmrgQIR_ALU2(V8MAX) 684b8e80941SmrgQIR_ALU2(V8ADDS) 685b8e80941SmrgQIR_ALU2(V8SUBS) 686b8e80941SmrgQIR_ALU2(MUL24) 687848b8605SmrgQIR_ALU2(FMIN) 688848b8605SmrgQIR_ALU2(FMAX) 689848b8605SmrgQIR_ALU2(FMINABS) 690848b8605SmrgQIR_ALU2(FMAXABS) 691848b8605SmrgQIR_ALU1(FTOI) 692848b8605SmrgQIR_ALU1(ITOF) 693b8e80941Smrg 694b8e80941SmrgQIR_ALU2(ADD) 695b8e80941SmrgQIR_ALU2(SUB) 696b8e80941SmrgQIR_ALU2(SHL) 697b8e80941SmrgQIR_ALU2(SHR) 698b8e80941SmrgQIR_ALU2(ASR) 699b8e80941SmrgQIR_ALU2(MIN) 700b8e80941SmrgQIR_ALU2(MIN_NOIMM) 701b8e80941SmrgQIR_ALU2(MAX) 702b8e80941SmrgQIR_ALU2(AND) 703b8e80941SmrgQIR_ALU2(OR) 704b8e80941SmrgQIR_ALU2(XOR) 705b8e80941SmrgQIR_ALU1(NOT) 706b8e80941Smrg 707848b8605SmrgQIR_ALU1(RCP) 708848b8605SmrgQIR_ALU1(RSQ) 709848b8605SmrgQIR_ALU1(EXP2) 710848b8605SmrgQIR_ALU1(LOG2) 711848b8605SmrgQIR_ALU1(VARY_ADD_C) 712b8e80941SmrgQIR_PAYLOAD(FRAG_Z) 713b8e80941SmrgQIR_PAYLOAD(FRAG_W) 714b8e80941SmrgQIR_ALU0(TEX_RESULT) 715b8e80941SmrgQIR_ALU0(TLB_COLOR_READ) 716b8e80941SmrgQIR_NODST_1(MS_MASK) 717848b8605Smrg 718848b8605Smrgstatic inline struct qreg 719b8e80941Smrgqir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1) 720848b8605Smrg{ 721848b8605Smrg struct qreg t = qir_get_temp(c); 722b8e80941Smrg qir_MOV_dest(c, t, src1); 723b8e80941Smrg qir_MOV_dest(c, t, src0)->cond = cond; 724848b8605Smrg return t; 725848b8605Smrg} 726848b8605Smrg 727848b8605Smrgstatic inline struct qreg 728b8e80941Smrgqir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 729848b8605Smrg{ 730b8e80941Smrg struct qreg t = qir_FMOV(c, src); 731b8e80941Smrg c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; 732b8e80941Smrg return t; 733b8e80941Smrg} 734b8e80941Smrg 735b8e80941Smrgstatic inline struct qreg 736b8e80941Smrgqir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 737b8e80941Smrg{ 738b8e80941Smrg struct qreg t = qir_MOV(c, src); 739b8e80941Smrg c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; 740b8e80941Smrg return t; 741b8e80941Smrg} 742b8e80941Smrg 743b8e80941Smrgstatic inline struct qreg 744b8e80941Smrgqir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 745b8e80941Smrg{ 746b8e80941Smrg struct qreg t = qir_FMOV(c, src); 747b8e80941Smrg c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; 748b8e80941Smrg return t; 749b8e80941Smrg} 750b8e80941Smrg 751b8e80941Smrgstatic inline struct qreg 752b8e80941Smrgqir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 753b8e80941Smrg{ 754b8e80941Smrg struct qreg t = qir_MOV(c, src); 755b8e80941Smrg c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; 756848b8605Smrg return t; 757848b8605Smrg} 758848b8605Smrg 759b8e80941Smrgstatic inline void 760b8e80941Smrgqir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) 761b8e80941Smrg{ 762b8e80941Smrg assert(!dest.pack); 763b8e80941Smrg dest.pack = QPU_PACK_MUL_8A + chan; 764b8e80941Smrg qir_emit_nondef(c, qir_inst(QOP_MMOV, dest, val, c->undef)); 765b8e80941Smrg} 766b8e80941Smrg 767b8e80941Smrgstatic inline struct qreg 768b8e80941Smrgqir_PACK_8888_F(struct vc4_compile *c, struct qreg val) 769b8e80941Smrg{ 770b8e80941Smrg struct qreg dest = qir_MMOV(c, val); 771b8e80941Smrg c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888; 772b8e80941Smrg return dest; 773b8e80941Smrg} 774b8e80941Smrg 775b8e80941Smrgstatic inline struct qreg 776b8e80941Smrgqir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 777b8e80941Smrg{ 778b8e80941Smrg return qir_EXP2(c, qir_FMUL(c, 779b8e80941Smrg y, 780b8e80941Smrg qir_LOG2(c, x))); 781b8e80941Smrg} 782b8e80941Smrg 783b8e80941Smrgstatic inline void 784b8e80941Smrgqir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 785b8e80941Smrg{ 786b8e80941Smrg qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val); 787b8e80941Smrg} 788b8e80941Smrg 789b8e80941Smrgstatic inline struct qreg 790b8e80941Smrgqir_LOAD_IMM(struct vc4_compile *c, uint32_t val) 791b8e80941Smrg{ 792b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_LOAD_IMM, c->undef, 793b8e80941Smrg qir_reg(QFILE_LOAD_IMM, val), c->undef)); 794b8e80941Smrg} 795b8e80941Smrg 796b8e80941Smrgstatic inline struct qreg 797b8e80941Smrgqir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val) 798b8e80941Smrg{ 799b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef, 800b8e80941Smrg qir_reg(QFILE_LOAD_IMM, val), 801b8e80941Smrg c->undef)); 802b8e80941Smrg} 803b8e80941Smrg 804b8e80941Smrgstatic inline struct qreg 805b8e80941Smrgqir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val) 806b8e80941Smrg{ 807b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef, 808b8e80941Smrg qir_reg(QFILE_LOAD_IMM, val), 809b8e80941Smrg c->undef)); 810b8e80941Smrg} 811b8e80941Smrg 812b8e80941Smrg/** Shifts the multiply output to the right by rot channels */ 813b8e80941Smrgstatic inline struct qreg 814b8e80941Smrgqir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot) 815b8e80941Smrg{ 816b8e80941Smrg return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef, 817b8e80941Smrg val, 818b8e80941Smrg qir_reg(QFILE_LOAD_IMM, 819b8e80941Smrg QPU_SMALL_IMM_MUL_ROT + rot))); 820b8e80941Smrg} 821b8e80941Smrg 822b8e80941Smrgstatic inline struct qinst * 823b8e80941Smrgqir_MOV_cond(struct vc4_compile *c, uint8_t cond, 824b8e80941Smrg struct qreg dest, struct qreg src) 825b8e80941Smrg{ 826b8e80941Smrg struct qinst *mov = qir_MOV_dest(c, dest, src); 827b8e80941Smrg mov->cond = cond; 828b8e80941Smrg return mov; 829b8e80941Smrg} 830b8e80941Smrg 831b8e80941Smrgstatic inline struct qinst * 832b8e80941Smrgqir_BRANCH(struct vc4_compile *c, uint8_t cond) 833b8e80941Smrg{ 834b8e80941Smrg struct qinst *inst = qir_inst(QOP_BRANCH, c->undef, c->undef, c->undef); 835b8e80941Smrg inst->cond = cond; 836b8e80941Smrg qir_emit_nondef(c, inst); 837b8e80941Smrg return inst; 838b8e80941Smrg} 839b8e80941Smrg 840b8e80941Smrg#define qir_for_each_block(block, c) \ 841b8e80941Smrg list_for_each_entry(struct qblock, block, &c->blocks, link) 842b8e80941Smrg 843b8e80941Smrg#define qir_for_each_block_rev(block, c) \ 844b8e80941Smrg list_for_each_entry_rev(struct qblock, block, &c->blocks, link) 845b8e80941Smrg 846b8e80941Smrg/* Loop over the non-NULL members of the successors array. */ 847b8e80941Smrg#define qir_for_each_successor(succ, block) \ 848b8e80941Smrg for (struct qblock *succ = block->successors[0]; \ 849b8e80941Smrg succ != NULL; \ 850b8e80941Smrg succ = (succ == block->successors[1] ? NULL : \ 851b8e80941Smrg block->successors[1])) 852b8e80941Smrg 853b8e80941Smrg#define qir_for_each_inst(inst, block) \ 854b8e80941Smrg list_for_each_entry(struct qinst, inst, &block->instructions, link) 855b8e80941Smrg 856b8e80941Smrg#define qir_for_each_inst_rev(inst, block) \ 857b8e80941Smrg list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) 858b8e80941Smrg 859b8e80941Smrg#define qir_for_each_inst_safe(inst, block) \ 860b8e80941Smrg list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) 861b8e80941Smrg 862b8e80941Smrg#define qir_for_each_inst_inorder(inst, c) \ 863b8e80941Smrg qir_for_each_block(_block, c) \ 864b8e80941Smrg qir_for_each_inst_safe(inst, _block) 865b8e80941Smrg 866848b8605Smrg#endif /* VC4_QIR_H */ 867