101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#ifndef V3D_COMPILER_H 2501e04c3fSmrg#define V3D_COMPILER_H 2601e04c3fSmrg 2701e04c3fSmrg#include <assert.h> 2801e04c3fSmrg#include <stdio.h> 2901e04c3fSmrg#include <stdlib.h> 3001e04c3fSmrg#include <stdbool.h> 3101e04c3fSmrg#include <stdint.h> 3201e04c3fSmrg#include <string.h> 3301e04c3fSmrg 3401e04c3fSmrg#include "util/macros.h" 3501e04c3fSmrg#include "common/v3d_debug.h" 3601e04c3fSmrg#include "common/v3d_device_info.h" 37ed98bd31Smaya#include "common/v3d_limits.h" 3801e04c3fSmrg#include "compiler/nir/nir.h" 3901e04c3fSmrg#include "util/list.h" 4001e04c3fSmrg#include "util/u_math.h" 4101e04c3fSmrg 4201e04c3fSmrg#include "qpu/qpu_instr.h" 4301e04c3fSmrg#include "pipe/p_state.h" 4401e04c3fSmrg 457ec681f3Smrg/** 467ec681f3Smrg * Maximum number of outstanding TMU operations we can queue for execution. 477ec681f3Smrg * 487ec681f3Smrg * This is mostly limited by the size of the TMU fifos. The Input and Config 497ec681f3Smrg * fifos can stall, but we prefer that than injecting TMU flushes manually 507ec681f3Smrg * in the driver, so we can ignore these, but we can't overflow the Output fifo, 517ec681f3Smrg * which has 16 / threads per-thread entries, meaning that the maximum number 527ec681f3Smrg * of outstanding LDTMUs we can ever have is 8, for a 2-way threaded shader. 537ec681f3Smrg * This means that at most we can have 8 outstanding TMU loads, if each load 547ec681f3Smrg * is just one component. 557ec681f3Smrg * 567ec681f3Smrg * NOTE: we could actually have a larger value here because TMU stores don't 577ec681f3Smrg * consume any entries in the Output fifo (so we could have any number of 587ec681f3Smrg * outstanding stores) and the driver keeps track of used Output fifo entries 597ec681f3Smrg * and will flush if we ever needs more than 8, but since loads are much more 607ec681f3Smrg * common than stores, it is probably not worth it. 617ec681f3Smrg */ 627ec681f3Smrg#define MAX_TMU_QUEUE_SIZE 8 637ec681f3Smrg 647ec681f3Smrg/** 657ec681f3Smrg * Maximum offset distance in bytes between two consecutive constant UBO loads 667ec681f3Smrg * for the same UBO where we would favor updating the unifa address by emitting 677ec681f3Smrg * dummy ldunifa instructions to avoid writing the unifa register. 687ec681f3Smrg */ 697ec681f3Smrg#define MAX_UNIFA_SKIP_DISTANCE 16 707ec681f3Smrg 7101e04c3fSmrgstruct nir_builder; 7201e04c3fSmrg 7301e04c3fSmrgstruct v3d_fs_inputs { 7401e04c3fSmrg /** 7501e04c3fSmrg * Array of the meanings of the VPM inputs this shader needs. 7601e04c3fSmrg * 7701e04c3fSmrg * It doesn't include those that aren't part of the VPM, like 7801e04c3fSmrg * point/line coordinates. 7901e04c3fSmrg */ 8001e04c3fSmrg struct v3d_varying_slot *input_slots; 8101e04c3fSmrg uint32_t num_inputs; 8201e04c3fSmrg}; 8301e04c3fSmrg 8401e04c3fSmrgenum qfile { 8501e04c3fSmrg /** An unused source or destination register. */ 8601e04c3fSmrg QFILE_NULL, 8701e04c3fSmrg 8801e04c3fSmrg /** A physical register, such as the W coordinate payload. */ 8901e04c3fSmrg QFILE_REG, 9001e04c3fSmrg /** One of the regsiters for fixed function interactions. */ 9101e04c3fSmrg QFILE_MAGIC, 9201e04c3fSmrg 9301e04c3fSmrg /** 9401e04c3fSmrg * A virtual register, that will be allocated to actual accumulator 9501e04c3fSmrg * or physical registers later. 9601e04c3fSmrg */ 9701e04c3fSmrg QFILE_TEMP, 9801e04c3fSmrg 9901e04c3fSmrg /** 10001e04c3fSmrg * VPM reads use this with an index value to say what part of the VPM 10101e04c3fSmrg * is being read. 10201e04c3fSmrg */ 10301e04c3fSmrg QFILE_VPM, 10401e04c3fSmrg 10501e04c3fSmrg /** 10601e04c3fSmrg * Stores an immediate value in the index field that will be used 10701e04c3fSmrg * directly by qpu_load_imm(). 10801e04c3fSmrg */ 10901e04c3fSmrg QFILE_LOAD_IMM, 11001e04c3fSmrg 11101e04c3fSmrg /** 11201e04c3fSmrg * Stores an immediate value in the index field that can be turned 11301e04c3fSmrg * into a small immediate field by qpu_encode_small_immediate(). 11401e04c3fSmrg */ 11501e04c3fSmrg QFILE_SMALL_IMM, 11601e04c3fSmrg}; 11701e04c3fSmrg 11801e04c3fSmrg/** 11901e04c3fSmrg * A reference to a QPU register or a virtual temp register. 12001e04c3fSmrg */ 12101e04c3fSmrgstruct qreg { 12201e04c3fSmrg enum qfile file; 12301e04c3fSmrg uint32_t index; 12401e04c3fSmrg}; 12501e04c3fSmrg 12601e04c3fSmrgstatic inline struct qreg vir_reg(enum qfile file, uint32_t index) 12701e04c3fSmrg{ 12801e04c3fSmrg return (struct qreg){file, index}; 12901e04c3fSmrg} 13001e04c3fSmrg 131ed98bd31Smayastatic inline struct qreg vir_magic_reg(uint32_t index) 132ed98bd31Smaya{ 133ed98bd31Smaya return (struct qreg){QFILE_MAGIC, index}; 134ed98bd31Smaya} 135ed98bd31Smaya 136ed98bd31Smayastatic inline struct qreg vir_nop_reg(void) 137ed98bd31Smaya{ 138ed98bd31Smaya return (struct qreg){QFILE_NULL, 0}; 139ed98bd31Smaya} 140ed98bd31Smaya 14101e04c3fSmrg/** 14201e04c3fSmrg * A reference to an actual register at the QPU level, for register 14301e04c3fSmrg * allocation. 14401e04c3fSmrg */ 14501e04c3fSmrgstruct qpu_reg { 14601e04c3fSmrg bool magic; 14701e04c3fSmrg bool smimm; 14801e04c3fSmrg int index; 14901e04c3fSmrg}; 15001e04c3fSmrg 15101e04c3fSmrgstruct qinst { 15201e04c3fSmrg /** Entry in qblock->instructions */ 15301e04c3fSmrg struct list_head link; 15401e04c3fSmrg 15501e04c3fSmrg /** 15601e04c3fSmrg * The instruction being wrapped. Its condition codes, pack flags, 15701e04c3fSmrg * signals, etc. will all be used, with just the register references 15801e04c3fSmrg * being replaced by the contents of qinst->dst and qinst->src[]. 15901e04c3fSmrg */ 16001e04c3fSmrg struct v3d_qpu_instr qpu; 16101e04c3fSmrg 16201e04c3fSmrg /* Pre-register-allocation references to src/dst registers */ 16301e04c3fSmrg struct qreg dst; 16401e04c3fSmrg struct qreg src[3]; 16501e04c3fSmrg bool is_last_thrsw; 16601e04c3fSmrg 167ed98bd31Smaya /* If the instruction reads a uniform (other than through src[i].file 168ed98bd31Smaya * == QFILE_UNIF), that uniform's index in c->uniform_contents. ~0 169ed98bd31Smaya * otherwise. 17001e04c3fSmrg */ 17101e04c3fSmrg int uniform; 17201e04c3fSmrg}; 17301e04c3fSmrg 17401e04c3fSmrgenum quniform_contents { 17501e04c3fSmrg /** 17601e04c3fSmrg * Indicates that a constant 32-bit value is copied from the program's 17701e04c3fSmrg * uniform contents. 17801e04c3fSmrg */ 17901e04c3fSmrg QUNIFORM_CONSTANT, 18001e04c3fSmrg /** 18101e04c3fSmrg * Indicates that the program's uniform contents are used as an index 18201e04c3fSmrg * into the GL uniform storage. 18301e04c3fSmrg */ 18401e04c3fSmrg QUNIFORM_UNIFORM, 18501e04c3fSmrg 18601e04c3fSmrg /** @{ 18701e04c3fSmrg * Scaling factors from clip coordinates to relative to the viewport 18801e04c3fSmrg * center. 18901e04c3fSmrg * 19001e04c3fSmrg * This is used by the coordinate and vertex shaders to produce the 19101e04c3fSmrg * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 19201e04c3fSmrg * point offsets from the viewport ccenter. 19301e04c3fSmrg */ 19401e04c3fSmrg QUNIFORM_VIEWPORT_X_SCALE, 19501e04c3fSmrg QUNIFORM_VIEWPORT_Y_SCALE, 19601e04c3fSmrg /** @} */ 19701e04c3fSmrg 19801e04c3fSmrg QUNIFORM_VIEWPORT_Z_OFFSET, 19901e04c3fSmrg QUNIFORM_VIEWPORT_Z_SCALE, 20001e04c3fSmrg 20101e04c3fSmrg QUNIFORM_USER_CLIP_PLANE, 20201e04c3fSmrg 20301e04c3fSmrg /** 20401e04c3fSmrg * A reference to a V3D 3.x texture config parameter 0 uniform. 20501e04c3fSmrg * 20601e04c3fSmrg * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 20701e04c3fSmrg * defines texture type, miplevels, and such. It will be found as a 20801e04c3fSmrg * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 20901e04c3fSmrg */ 21001e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_0, 21101e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_1, 21201e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_2, 21301e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_3, 21401e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_4, 21501e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_5, 21601e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_6, 21701e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_7, 21801e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_8, 21901e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_9, 22001e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_10, 22101e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_11, 22201e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_12, 22301e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_13, 22401e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_14, 22501e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_15, 22601e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_16, 22701e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_17, 22801e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_18, 22901e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_19, 23001e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_20, 23101e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_21, 23201e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_22, 23301e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_23, 23401e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_24, 23501e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_25, 23601e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_26, 23701e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_27, 23801e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_28, 23901e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_29, 24001e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_30, 24101e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_31, 24201e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P0_32, 24301e04c3fSmrg 24401e04c3fSmrg /** 24501e04c3fSmrg * A reference to a V3D 3.x texture config parameter 1 uniform. 24601e04c3fSmrg * 24701e04c3fSmrg * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 24801e04c3fSmrg * has the pointer to the indirect texture state. Our data[] field 24901e04c3fSmrg * will have a packed p1 value, but the address field will be just 25001e04c3fSmrg * which texture unit's texture should be referenced. 25101e04c3fSmrg */ 25201e04c3fSmrg QUNIFORM_TEXTURE_CONFIG_P1, 25301e04c3fSmrg 254ed98bd31Smaya /* A V3D 4.x texture config parameter. The high 8 bits will be 25501e04c3fSmrg * which texture or sampler is being sampled, and the driver must 25601e04c3fSmrg * replace the address field with the appropriate address. 25701e04c3fSmrg */ 25801e04c3fSmrg QUNIFORM_TMU_CONFIG_P0, 25901e04c3fSmrg QUNIFORM_TMU_CONFIG_P1, 26001e04c3fSmrg 261ed98bd31Smaya QUNIFORM_IMAGE_TMU_CONFIG_P0, 262ed98bd31Smaya 26301e04c3fSmrg QUNIFORM_TEXTURE_FIRST_LEVEL, 26401e04c3fSmrg 26501e04c3fSmrg QUNIFORM_TEXTURE_WIDTH, 26601e04c3fSmrg QUNIFORM_TEXTURE_HEIGHT, 26701e04c3fSmrg QUNIFORM_TEXTURE_DEPTH, 26801e04c3fSmrg QUNIFORM_TEXTURE_ARRAY_SIZE, 26901e04c3fSmrg QUNIFORM_TEXTURE_LEVELS, 2707ec681f3Smrg QUNIFORM_TEXTURE_SAMPLES, 27101e04c3fSmrg 27201e04c3fSmrg QUNIFORM_UBO_ADDR, 27301e04c3fSmrg 27401e04c3fSmrg QUNIFORM_TEXRECT_SCALE_X, 27501e04c3fSmrg QUNIFORM_TEXRECT_SCALE_Y, 27601e04c3fSmrg 277ed98bd31Smaya /* Returns the base offset of the SSBO given by the data value. */ 278ed98bd31Smaya QUNIFORM_SSBO_OFFSET, 279ed98bd31Smaya 2807ec681f3Smrg /* Returns the size of the SSBO or UBO given by the data value. */ 2817ec681f3Smrg QUNIFORM_GET_SSBO_SIZE, 2827ec681f3Smrg QUNIFORM_GET_UBO_SIZE, 283ed98bd31Smaya 284ed98bd31Smaya /* Sizes (in pixels) of a shader image given by the data value. */ 285ed98bd31Smaya QUNIFORM_IMAGE_WIDTH, 286ed98bd31Smaya QUNIFORM_IMAGE_HEIGHT, 287ed98bd31Smaya QUNIFORM_IMAGE_DEPTH, 288ed98bd31Smaya QUNIFORM_IMAGE_ARRAY_SIZE, 28901e04c3fSmrg 2907ec681f3Smrg QUNIFORM_LINE_WIDTH, 2917ec681f3Smrg 2927ec681f3Smrg /* The line width sent to hardware. This includes the expanded width 2937ec681f3Smrg * when anti-aliasing is enabled. 2947ec681f3Smrg */ 2957ec681f3Smrg QUNIFORM_AA_LINE_WIDTH, 296ed98bd31Smaya 297ed98bd31Smaya /* Number of workgroups passed to glDispatchCompute in the dimension 298ed98bd31Smaya * selected by the data value. 299ed98bd31Smaya */ 300ed98bd31Smaya QUNIFORM_NUM_WORK_GROUPS, 30101e04c3fSmrg 3027ec681f3Smrg /* Base workgroup offset passed to vkCmdDispatchBase in the dimension 3037ec681f3Smrg * selected by the data value. 3047ec681f3Smrg */ 3057ec681f3Smrg QUNIFORM_WORK_GROUP_BASE, 3067ec681f3Smrg 30701e04c3fSmrg /** 30801e04c3fSmrg * Returns the the offset of the scratch buffer for register spilling. 30901e04c3fSmrg */ 31001e04c3fSmrg QUNIFORM_SPILL_OFFSET, 31101e04c3fSmrg QUNIFORM_SPILL_SIZE_PER_THREAD, 312ed98bd31Smaya 313ed98bd31Smaya /** 314ed98bd31Smaya * Returns the offset of the shared memory for compute shaders. 315ed98bd31Smaya * 316ed98bd31Smaya * This will be accessed using TMU general memory operations, so the 317ed98bd31Smaya * L2T cache will effectively be the shared memory area. 318ed98bd31Smaya */ 319ed98bd31Smaya QUNIFORM_SHARED_OFFSET, 3207ec681f3Smrg 3217ec681f3Smrg /** 3227ec681f3Smrg * Returns the number of layers in the framebuffer. 3237ec681f3Smrg * 3247ec681f3Smrg * This is used to cap gl_Layer in geometry shaders to avoid 3257ec681f3Smrg * out-of-bounds accesses into the tile state during binning. 3267ec681f3Smrg */ 3277ec681f3Smrg QUNIFORM_FB_LAYERS, 3287ec681f3Smrg 3297ec681f3Smrg /** 3307ec681f3Smrg * Current value of gl_ViewIndex for Multiview rendering. 3317ec681f3Smrg */ 3327ec681f3Smrg QUNIFORM_VIEW_INDEX, 33301e04c3fSmrg}; 33401e04c3fSmrg 335ed98bd31Smayastatic inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) 336ed98bd31Smaya{ 337ed98bd31Smaya assert(value < (1 << 24)); 338ed98bd31Smaya return unit << 24 | value; 339ed98bd31Smaya} 340ed98bd31Smaya 341ed98bd31Smayastatic inline uint32_t v3d_unit_data_get_unit(uint32_t data) 342ed98bd31Smaya{ 343ed98bd31Smaya return data >> 24; 344ed98bd31Smaya} 345ed98bd31Smaya 346ed98bd31Smayastatic inline uint32_t v3d_unit_data_get_offset(uint32_t data) 347ed98bd31Smaya{ 348ed98bd31Smaya return data & 0xffffff; 349ed98bd31Smaya} 350ed98bd31Smaya 35101e04c3fSmrgstruct v3d_varying_slot { 35201e04c3fSmrg uint8_t slot_and_component; 35301e04c3fSmrg}; 35401e04c3fSmrg 35501e04c3fSmrgstatic inline struct v3d_varying_slot 35601e04c3fSmrgv3d_slot_from_slot_and_component(uint8_t slot, uint8_t component) 35701e04c3fSmrg{ 35801e04c3fSmrg assert(slot < 255 / 4); 35901e04c3fSmrg return (struct v3d_varying_slot){ (slot << 2) + component }; 36001e04c3fSmrg} 36101e04c3fSmrg 36201e04c3fSmrgstatic inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot) 36301e04c3fSmrg{ 36401e04c3fSmrg return slot.slot_and_component >> 2; 36501e04c3fSmrg} 36601e04c3fSmrg 36701e04c3fSmrgstatic inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot) 36801e04c3fSmrg{ 36901e04c3fSmrg return slot.slot_and_component & 3; 37001e04c3fSmrg} 37101e04c3fSmrg 3727ec681f3Smrgenum v3d_execution_environment { 3737ec681f3Smrg V3D_ENVIRONMENT_OPENGL = 0, 3747ec681f3Smrg V3D_ENVIRONMENT_VULKAN, 3757ec681f3Smrg}; 3767ec681f3Smrg 37701e04c3fSmrgstruct v3d_key { 37801e04c3fSmrg void *shader_state; 37901e04c3fSmrg struct { 38001e04c3fSmrg uint8_t swizzle[4]; 3817ec681f3Smrg } tex[V3D_MAX_TEXTURE_SAMPLERS]; 3827ec681f3Smrg struct { 38301e04c3fSmrg uint8_t return_size; 38401e04c3fSmrg uint8_t return_channels; 3857ec681f3Smrg } sampler[V3D_MAX_TEXTURE_SAMPLERS]; 3867ec681f3Smrg 3877ec681f3Smrg uint8_t num_tex_used; 3887ec681f3Smrg uint8_t num_samplers_used; 38901e04c3fSmrg uint8_t ucp_enables; 3907ec681f3Smrg bool is_last_geometry_stage; 3917ec681f3Smrg bool robust_buffer_access; 3927ec681f3Smrg 3937ec681f3Smrg enum v3d_execution_environment environment; 39401e04c3fSmrg}; 39501e04c3fSmrg 39601e04c3fSmrgstruct v3d_fs_key { 39701e04c3fSmrg struct v3d_key base; 39801e04c3fSmrg bool is_points; 39901e04c3fSmrg bool is_lines; 4007ec681f3Smrg bool line_smoothing; 40101e04c3fSmrg bool point_coord_upper_left; 40201e04c3fSmrg bool msaa; 40301e04c3fSmrg bool sample_coverage; 40401e04c3fSmrg bool sample_alpha_to_coverage; 40501e04c3fSmrg bool sample_alpha_to_one; 406ed98bd31Smaya /* Mask of which color render targets are present. */ 407ed98bd31Smaya uint8_t cbufs; 40801e04c3fSmrg uint8_t swap_color_rb; 40901e04c3fSmrg /* Mask of which render targets need to be written as 32-bit floats */ 41001e04c3fSmrg uint8_t f32_color_rb; 41101e04c3fSmrg /* Masks of which render targets need to be written as ints/uints. 41201e04c3fSmrg * Used by gallium to work around lost information in TGSI. 41301e04c3fSmrg */ 41401e04c3fSmrg uint8_t int_color_rb; 41501e04c3fSmrg uint8_t uint_color_rb; 4167ec681f3Smrg 4177ec681f3Smrg /* Color format information per render target. Only set when logic 4187ec681f3Smrg * operations are enabled. 4197ec681f3Smrg */ 4207ec681f3Smrg struct { 4217ec681f3Smrg enum pipe_format format; 4227ec681f3Smrg const uint8_t *swizzle; 4237ec681f3Smrg } color_fmt[V3D_MAX_DRAW_BUFFERS]; 4247ec681f3Smrg 42501e04c3fSmrg uint8_t logicop_func; 42601e04c3fSmrg uint32_t point_sprite_mask; 42701e04c3fSmrg 42801e04c3fSmrg struct pipe_rt_blend_state blend; 4297ec681f3Smrg 4307ec681f3Smrg /* If the fragment shader reads gl_PrimitiveID then we have 2 scenarios: 4317ec681f3Smrg * 4327ec681f3Smrg * - If there is a geometry shader, then gl_PrimitiveID must be written 4337ec681f3Smrg * by it and the fragment shader loads it as a regular explicit input 4347ec681f3Smrg * varying. This is the only valid use case in GLES 3.1. 4357ec681f3Smrg * 4367ec681f3Smrg * - If there is not a geometry shader (allowed since GLES 3.2 and 4377ec681f3Smrg * Vulkan 1.0), then gl_PrimitiveID must be implicitly written by 4387ec681f3Smrg * hardware and is considered an implicit input varying in the 4397ec681f3Smrg * fragment shader. 4407ec681f3Smrg */ 4417ec681f3Smrg bool has_gs; 4427ec681f3Smrg}; 4437ec681f3Smrg 4447ec681f3Smrgstruct v3d_gs_key { 4457ec681f3Smrg struct v3d_key base; 4467ec681f3Smrg 4477ec681f3Smrg struct v3d_varying_slot used_outputs[V3D_MAX_FS_INPUTS]; 4487ec681f3Smrg uint8_t num_used_outputs; 4497ec681f3Smrg 4507ec681f3Smrg bool is_coord; 4517ec681f3Smrg bool per_vertex_point_size; 45201e04c3fSmrg}; 45301e04c3fSmrg 45401e04c3fSmrgstruct v3d_vs_key { 45501e04c3fSmrg struct v3d_key base; 45601e04c3fSmrg 4577ec681f3Smrg struct v3d_varying_slot used_outputs[V3D_MAX_ANY_STAGE_INPUTS]; 4587ec681f3Smrg uint8_t num_used_outputs; 4597ec681f3Smrg 4607ec681f3Smrg /* A bit-mask indicating if we need to swap the R/B channels for 4617ec681f3Smrg * vertex attributes. Since the hardware doesn't provide any 4627ec681f3Smrg * means to swizzle vertex attributes we need to do it in the shader. 4637ec681f3Smrg */ 4647ec681f3Smrg uint32_t va_swap_rb_mask; 46501e04c3fSmrg 46601e04c3fSmrg bool is_coord; 46701e04c3fSmrg bool per_vertex_point_size; 46801e04c3fSmrg bool clamp_color; 46901e04c3fSmrg}; 47001e04c3fSmrg 47101e04c3fSmrg/** A basic block of VIR intructions. */ 47201e04c3fSmrgstruct qblock { 47301e04c3fSmrg struct list_head link; 47401e04c3fSmrg 47501e04c3fSmrg struct list_head instructions; 47601e04c3fSmrg 47701e04c3fSmrg struct set *predecessors; 47801e04c3fSmrg struct qblock *successors[2]; 47901e04c3fSmrg 48001e04c3fSmrg int index; 48101e04c3fSmrg 48201e04c3fSmrg /* Instruction IPs for the first and last instruction of the block. 48301e04c3fSmrg * Set by qpu_schedule.c. 48401e04c3fSmrg */ 48501e04c3fSmrg uint32_t start_qpu_ip; 48601e04c3fSmrg uint32_t end_qpu_ip; 48701e04c3fSmrg 48801e04c3fSmrg /* Instruction IP for the branch instruction of the block. Set by 48901e04c3fSmrg * qpu_schedule.c. 49001e04c3fSmrg */ 49101e04c3fSmrg uint32_t branch_qpu_ip; 49201e04c3fSmrg 49301e04c3fSmrg /** Offset within the uniform stream at the start of the block. */ 49401e04c3fSmrg uint32_t start_uniform; 49501e04c3fSmrg /** Offset within the uniform stream of the branch instruction */ 49601e04c3fSmrg uint32_t branch_uniform; 49701e04c3fSmrg 4987ec681f3Smrg /** 4997ec681f3Smrg * Has the terminating branch of this block already been emitted 5007ec681f3Smrg * by a break or continue? 5017ec681f3Smrg */ 5027ec681f3Smrg bool branch_emitted; 5037ec681f3Smrg 50401e04c3fSmrg /** @{ used by v3d_vir_live_variables.c */ 50501e04c3fSmrg BITSET_WORD *def; 506ed98bd31Smaya BITSET_WORD *defin; 507ed98bd31Smaya BITSET_WORD *defout; 50801e04c3fSmrg BITSET_WORD *use; 50901e04c3fSmrg BITSET_WORD *live_in; 51001e04c3fSmrg BITSET_WORD *live_out; 51101e04c3fSmrg int start_ip, end_ip; 51201e04c3fSmrg /** @} */ 51301e04c3fSmrg}; 51401e04c3fSmrg 51501e04c3fSmrg/** Which util/list.h add mode we should use when inserting an instruction. */ 51601e04c3fSmrgenum vir_cursor_mode { 51701e04c3fSmrg vir_cursor_add, 51801e04c3fSmrg vir_cursor_addtail, 51901e04c3fSmrg}; 52001e04c3fSmrg 52101e04c3fSmrg/** 52201e04c3fSmrg * Tracking structure for where new instructions should be inserted. Create 52301e04c3fSmrg * with one of the vir_after_inst()-style helper functions. 52401e04c3fSmrg * 52501e04c3fSmrg * This does not protect against removal of the block or instruction, so we 52601e04c3fSmrg * have an assert in instruction removal to try to catch it. 52701e04c3fSmrg */ 52801e04c3fSmrgstruct vir_cursor { 52901e04c3fSmrg enum vir_cursor_mode mode; 53001e04c3fSmrg struct list_head *link; 53101e04c3fSmrg}; 53201e04c3fSmrg 53301e04c3fSmrgstatic inline struct vir_cursor 53401e04c3fSmrgvir_before_inst(struct qinst *inst) 53501e04c3fSmrg{ 53601e04c3fSmrg return (struct vir_cursor){ vir_cursor_addtail, &inst->link }; 53701e04c3fSmrg} 53801e04c3fSmrg 53901e04c3fSmrgstatic inline struct vir_cursor 54001e04c3fSmrgvir_after_inst(struct qinst *inst) 54101e04c3fSmrg{ 54201e04c3fSmrg return (struct vir_cursor){ vir_cursor_add, &inst->link }; 54301e04c3fSmrg} 54401e04c3fSmrg 54501e04c3fSmrgstatic inline struct vir_cursor 54601e04c3fSmrgvir_before_block(struct qblock *block) 54701e04c3fSmrg{ 54801e04c3fSmrg return (struct vir_cursor){ vir_cursor_add, &block->instructions }; 54901e04c3fSmrg} 55001e04c3fSmrg 55101e04c3fSmrgstatic inline struct vir_cursor 55201e04c3fSmrgvir_after_block(struct qblock *block) 55301e04c3fSmrg{ 55401e04c3fSmrg return (struct vir_cursor){ vir_cursor_addtail, &block->instructions }; 55501e04c3fSmrg} 55601e04c3fSmrg 5577ec681f3Smrgenum v3d_compilation_result { 5587ec681f3Smrg V3D_COMPILATION_SUCCEEDED, 5597ec681f3Smrg V3D_COMPILATION_FAILED_REGISTER_ALLOCATION, 5607ec681f3Smrg V3D_COMPILATION_FAILED, 5617ec681f3Smrg}; 5627ec681f3Smrg 56301e04c3fSmrg/** 56401e04c3fSmrg * Compiler state saved across compiler invocations, for any expensive global 56501e04c3fSmrg * setup. 56601e04c3fSmrg */ 56701e04c3fSmrgstruct v3d_compiler { 56801e04c3fSmrg const struct v3d_device_info *devinfo; 56901e04c3fSmrg struct ra_regs *regs; 5707ec681f3Smrg struct ra_class *reg_class_any[3]; 5717ec681f3Smrg struct ra_class *reg_class_r5[3]; 5727ec681f3Smrg struct ra_class *reg_class_phys[3]; 5737ec681f3Smrg struct ra_class *reg_class_phys_or_acc[3]; 5747ec681f3Smrg}; 5757ec681f3Smrg 5767ec681f3Smrg/** 5777ec681f3Smrg * This holds partially interpolated inputs as provided by hardware 5787ec681f3Smrg * (The Vp = A*(x - x0) + B*(y - y0) term), as well as the C coefficient 5797ec681f3Smrg * required to compute the final interpolated value. 5807ec681f3Smrg */ 5817ec681f3Smrgstruct v3d_interp_input { 5827ec681f3Smrg struct qreg vp; 5837ec681f3Smrg struct qreg C; 5847ec681f3Smrg unsigned mode; /* interpolation mode */ 58501e04c3fSmrg}; 58601e04c3fSmrg 58701e04c3fSmrgstruct v3d_compile { 58801e04c3fSmrg const struct v3d_device_info *devinfo; 58901e04c3fSmrg nir_shader *s; 59001e04c3fSmrg nir_function_impl *impl; 59101e04c3fSmrg struct exec_list *cf_node_list; 59201e04c3fSmrg const struct v3d_compiler *compiler; 59301e04c3fSmrg 594ed98bd31Smaya void (*debug_output)(const char *msg, 595ed98bd31Smaya void *debug_output_data); 596ed98bd31Smaya void *debug_output_data; 597ed98bd31Smaya 59801e04c3fSmrg /** 59901e04c3fSmrg * Mapping from nir_register * or nir_ssa_def * to array of struct 60001e04c3fSmrg * qreg for the values. 60101e04c3fSmrg */ 60201e04c3fSmrg struct hash_table *def_ht; 60301e04c3fSmrg 60401e04c3fSmrg /* For each temp, the instruction generating its value. */ 60501e04c3fSmrg struct qinst **defs; 60601e04c3fSmrg uint32_t defs_array_size; 60701e04c3fSmrg 6087ec681f3Smrg /* TMU pipelining tracking */ 6097ec681f3Smrg struct { 6107ec681f3Smrg /* NIR registers that have been updated with a TMU operation 6117ec681f3Smrg * that has not been flushed yet. 6127ec681f3Smrg */ 6137ec681f3Smrg struct set *outstanding_regs; 6147ec681f3Smrg 6157ec681f3Smrg uint32_t output_fifo_size; 6167ec681f3Smrg 6177ec681f3Smrg struct { 6187ec681f3Smrg nir_dest *dest; 6197ec681f3Smrg uint8_t num_components; 6207ec681f3Smrg uint8_t component_mask; 6217ec681f3Smrg } flush[MAX_TMU_QUEUE_SIZE]; 6227ec681f3Smrg uint32_t flush_count; 6237ec681f3Smrg } tmu; 6247ec681f3Smrg 62501e04c3fSmrg /** 62601e04c3fSmrg * Inputs to the shader, arranged by TGSI declaration order. 62701e04c3fSmrg * 62801e04c3fSmrg * Not all fragment shader QFILE_VARY reads are present in this array. 62901e04c3fSmrg */ 63001e04c3fSmrg struct qreg *inputs; 6317ec681f3Smrg /** 6327ec681f3Smrg * Partially interpolated inputs to the shader. 6337ec681f3Smrg */ 6347ec681f3Smrg struct v3d_interp_input *interp; 63501e04c3fSmrg struct qreg *outputs; 63601e04c3fSmrg bool msaa_per_sample_output; 6377ec681f3Smrg struct qreg color_reads[V3D_MAX_DRAW_BUFFERS * V3D_MAX_SAMPLES * 4]; 6387ec681f3Smrg struct qreg sample_colors[V3D_MAX_DRAW_BUFFERS * V3D_MAX_SAMPLES * 4]; 63901e04c3fSmrg uint32_t inputs_array_size; 64001e04c3fSmrg uint32_t outputs_array_size; 64101e04c3fSmrg uint32_t uniforms_array_size; 64201e04c3fSmrg 64301e04c3fSmrg /* Booleans for whether the corresponding QFILE_VARY[i] is 64401e04c3fSmrg * flat-shaded. This includes gl_FragColor flat-shading, which is 64501e04c3fSmrg * customized based on the shademodel_flat shader key. 64601e04c3fSmrg */ 64701e04c3fSmrg uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 64801e04c3fSmrg 64901e04c3fSmrg uint32_t noperspective_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 65001e04c3fSmrg 65101e04c3fSmrg uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; 65201e04c3fSmrg 65301e04c3fSmrg bool uses_center_w; 654ed98bd31Smaya bool writes_z; 6557ec681f3Smrg bool uses_implicit_point_line_varyings; 6567ec681f3Smrg 6577ec681f3Smrg /* True if a fragment shader reads gl_PrimitiveID */ 6587ec681f3Smrg bool fs_uses_primitive_id; 6597ec681f3Smrg 6607ec681f3Smrg /* If the fragment shader does anything that requires to force 6617ec681f3Smrg * per-sample MSAA, such as reading gl_SampleID. 6627ec681f3Smrg */ 6637ec681f3Smrg bool force_per_sample_msaa; 6647ec681f3Smrg 6657ec681f3Smrg /* Whether we are using the fallback scheduler. This will be set after 6667ec681f3Smrg * register allocation has failed once. 6677ec681f3Smrg */ 6687ec681f3Smrg bool fallback_scheduler; 6697ec681f3Smrg 6707ec681f3Smrg /* Disable TMU pipelining. This may increase the chances of being able 6717ec681f3Smrg * to compile shaders with high register pressure that require to emit 6727ec681f3Smrg * TMU spills. 6737ec681f3Smrg */ 6747ec681f3Smrg bool disable_tmu_pipelining; 6757ec681f3Smrg bool pipelined_any_tmu; 6767ec681f3Smrg 6777ec681f3Smrg /* Disable sorting of UBO loads with constant offset. This may 6787ec681f3Smrg * increase the chances of being able to compile shaders with high 6797ec681f3Smrg * register pressure. 6807ec681f3Smrg */ 6817ec681f3Smrg bool disable_constant_ubo_load_sorting; 6827ec681f3Smrg bool sorted_any_ubo_loads; 6837ec681f3Smrg 6847ec681f3Smrg /* Emits ldunif for each new uniform, even if the uniform was already 6857ec681f3Smrg * emitted in the same block. Useful to compile shaders with high 6867ec681f3Smrg * register pressure or to disable the optimization during uniform 6877ec681f3Smrg * spills. 6887ec681f3Smrg */ 6897ec681f3Smrg bool disable_ldunif_opt; 6907ec681f3Smrg 6917ec681f3Smrg /* Disables loop unrolling to reduce register pressure. */ 6927ec681f3Smrg bool disable_loop_unrolling; 6937ec681f3Smrg bool unrolled_any_loops; 6947ec681f3Smrg 6957ec681f3Smrg /* Minimum number of threads we are willing to use to register allocate 6967ec681f3Smrg * a shader with the current compilation strategy. This only prevents 6977ec681f3Smrg * us from lowering the thread count to register allocate successfully, 6987ec681f3Smrg * which can be useful when we prefer doing other changes to the 6997ec681f3Smrg * compilation strategy before dropping thread count. 7007ec681f3Smrg */ 7017ec681f3Smrg uint32_t min_threads_for_reg_alloc; 7027ec681f3Smrg 7037ec681f3Smrg /* Whether TMU spills are allowed. If this is disabled it may cause 7047ec681f3Smrg * register allocation to fail. We set this to favor other compilation 7057ec681f3Smrg * strategies that can reduce register pressure and hopefully reduce or 7067ec681f3Smrg * eliminate TMU spills in the shader. 7077ec681f3Smrg */ 7087ec681f3Smrg bool tmu_spilling_allowed; 7097ec681f3Smrg 7107ec681f3Smrg /* The UBO index and block used with the last unifa load, as well as the 7117ec681f3Smrg * current unifa offset *after* emitting that load. This is used to skip 7127ec681f3Smrg * unifa writes (and their 3 delay slot) when the next UBO load reads 7137ec681f3Smrg * right after the previous one in the same block. 7147ec681f3Smrg */ 7157ec681f3Smrg struct qblock *current_unifa_block; 7167ec681f3Smrg int32_t current_unifa_index; 7177ec681f3Smrg uint32_t current_unifa_offset; 71801e04c3fSmrg 71901e04c3fSmrg /* State for whether we're executing on each channel currently. 0 if 72001e04c3fSmrg * yes, otherwise a block number + 1 that the channel jumped to. 72101e04c3fSmrg */ 72201e04c3fSmrg struct qreg execute; 723ed98bd31Smaya bool in_control_flow; 72401e04c3fSmrg 7257ec681f3Smrg struct qreg line_x, point_x, point_y, primitive_id; 72601e04c3fSmrg 72701e04c3fSmrg /** 72801e04c3fSmrg * Instance ID, which comes in before the vertex attribute payload if 72901e04c3fSmrg * the shader record requests it. 73001e04c3fSmrg */ 73101e04c3fSmrg struct qreg iid; 73201e04c3fSmrg 73301e04c3fSmrg /** 7347ec681f3Smrg * Base Instance ID, which comes in before the vertex attribute payload 73501e04c3fSmrg * (after Instance ID) if the shader record requests it. 73601e04c3fSmrg */ 7377ec681f3Smrg struct qreg biid; 7387ec681f3Smrg 7397ec681f3Smrg /** 7407ec681f3Smrg * Vertex ID, which comes in before the vertex attribute payload 7417ec681f3Smrg * (after Base Instance) if the shader record requests it. 7427ec681f3Smrg */ 74301e04c3fSmrg struct qreg vid; 74401e04c3fSmrg 74501e04c3fSmrg /* Fragment shader payload regs. */ 74601e04c3fSmrg struct qreg payload_w, payload_w_centroid, payload_z; 74701e04c3fSmrg 748ed98bd31Smaya struct qreg cs_payload[2]; 749ed98bd31Smaya struct qreg cs_shared_offset; 750ed98bd31Smaya int local_invocation_index_bits; 751ed98bd31Smaya 7527ec681f3Smrg /* If the shader uses subgroup functionality */ 7537ec681f3Smrg bool has_subgroups; 7547ec681f3Smrg 755ed98bd31Smaya uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; 756ed98bd31Smaya uint32_t vpm_output_size; 75701e04c3fSmrg 75801e04c3fSmrg /* Size in bytes of registers that have been spilled. This is how much 75901e04c3fSmrg * space needs to be available in the spill BO per thread per QPU. 76001e04c3fSmrg */ 76101e04c3fSmrg uint32_t spill_size; 762ed98bd31Smaya /* Shader-db stats */ 763ed98bd31Smaya uint32_t spills, fills, loops; 76401e04c3fSmrg /** 76501e04c3fSmrg * Register spilling's per-thread base address, shared between each 76601e04c3fSmrg * spill/fill's addressing calculations. 76701e04c3fSmrg */ 76801e04c3fSmrg struct qreg spill_base; 76901e04c3fSmrg /* Bit vector of which temps may be spilled */ 77001e04c3fSmrg BITSET_WORD *spillable; 77101e04c3fSmrg 77201e04c3fSmrg /** 77301e04c3fSmrg * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. 77401e04c3fSmrg * 77501e04c3fSmrg * This includes those that aren't part of the VPM varyings, like 77601e04c3fSmrg * point/line coordinates. 77701e04c3fSmrg */ 77801e04c3fSmrg struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; 77901e04c3fSmrg 78001e04c3fSmrg /** 78101e04c3fSmrg * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* 78201e04c3fSmrg * of the output is. Used to emit from the VS in the order that the 78301e04c3fSmrg * FS needs. 78401e04c3fSmrg */ 78501e04c3fSmrg struct v3d_varying_slot *output_slots; 78601e04c3fSmrg 78701e04c3fSmrg struct pipe_shader_state *shader_state; 78801e04c3fSmrg struct v3d_key *key; 78901e04c3fSmrg struct v3d_fs_key *fs_key; 7907ec681f3Smrg struct v3d_gs_key *gs_key; 79101e04c3fSmrg struct v3d_vs_key *vs_key; 79201e04c3fSmrg 79301e04c3fSmrg /* Live ranges of temps. */ 79401e04c3fSmrg int *temp_start, *temp_end; 79501e04c3fSmrg bool live_intervals_valid; 79601e04c3fSmrg 79701e04c3fSmrg uint32_t *uniform_data; 79801e04c3fSmrg enum quniform_contents *uniform_contents; 79901e04c3fSmrg uint32_t uniform_array_size; 80001e04c3fSmrg uint32_t num_uniforms; 80101e04c3fSmrg uint32_t output_position_index; 80201e04c3fSmrg nir_variable *output_color_var[4]; 80301e04c3fSmrg uint32_t output_sample_mask_index; 80401e04c3fSmrg 80501e04c3fSmrg struct qreg undef; 80601e04c3fSmrg uint32_t num_temps; 80701e04c3fSmrg 80801e04c3fSmrg struct vir_cursor cursor; 80901e04c3fSmrg struct list_head blocks; 81001e04c3fSmrg int next_block_index; 81101e04c3fSmrg struct qblock *cur_block; 81201e04c3fSmrg struct qblock *loop_cont_block; 81301e04c3fSmrg struct qblock *loop_break_block; 8147ec681f3Smrg /** 8157ec681f3Smrg * Which temp, if any, do we currently have in the flags? 8167ec681f3Smrg * This is set when processing a comparison instruction, and 8177ec681f3Smrg * reset to -1 by anything else that touches the flags. 8187ec681f3Smrg */ 8197ec681f3Smrg int32_t flags_temp; 8207ec681f3Smrg enum v3d_qpu_cond flags_cond; 82101e04c3fSmrg 82201e04c3fSmrg uint64_t *qpu_insts; 82301e04c3fSmrg uint32_t qpu_inst_count; 82401e04c3fSmrg uint32_t qpu_inst_size; 8257ec681f3Smrg uint32_t qpu_inst_stalled_count; 8267ec681f3Smrg uint32_t nop_count; 82701e04c3fSmrg 82801e04c3fSmrg /* For the FS, the number of varying inputs not counting the 82901e04c3fSmrg * point/line varyings payload 83001e04c3fSmrg */ 83101e04c3fSmrg uint32_t num_inputs; 83201e04c3fSmrg 83301e04c3fSmrg uint32_t program_id; 83401e04c3fSmrg uint32_t variant_id; 83501e04c3fSmrg 83601e04c3fSmrg /* Set to compile program in in 1x, 2x, or 4x threaded mode, where 83701e04c3fSmrg * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of 83801e04c3fSmrg * limiting ourselves to the part of the physical reg space. 83901e04c3fSmrg * 84001e04c3fSmrg * On V3D 3.x, 2x or 4x divide the physical reg space by 2x or 4x. On 84101e04c3fSmrg * V3D 4.x, all shaders are 2x threaded, and 4x only divides the 84201e04c3fSmrg * physical reg space in half. 84301e04c3fSmrg */ 84401e04c3fSmrg uint8_t threads; 84501e04c3fSmrg struct qinst *last_thrsw; 84601e04c3fSmrg bool last_thrsw_at_top_level; 84701e04c3fSmrg 8487ec681f3Smrg bool emitted_tlb_load; 8497ec681f3Smrg bool lock_scoreboard_on_first_thrsw; 8507ec681f3Smrg 8517ec681f3Smrg /* Total number of spilled registers in the program */ 8527ec681f3Smrg uint32_t spill_count; 8537ec681f3Smrg 8547ec681f3Smrg enum v3d_compilation_result compilation_result; 8557ec681f3Smrg 8567ec681f3Smrg bool tmu_dirty_rcl; 85701e04c3fSmrg}; 85801e04c3fSmrg 85901e04c3fSmrgstruct v3d_uniform_list { 86001e04c3fSmrg enum quniform_contents *contents; 86101e04c3fSmrg uint32_t *data; 86201e04c3fSmrg uint32_t count; 86301e04c3fSmrg}; 86401e04c3fSmrg 86501e04c3fSmrgstruct v3d_prog_data { 86601e04c3fSmrg struct v3d_uniform_list uniforms; 86701e04c3fSmrg 86801e04c3fSmrg uint32_t spill_size; 86901e04c3fSmrg 87001e04c3fSmrg uint8_t threads; 87101e04c3fSmrg 87201e04c3fSmrg /* For threads > 1, whether the program should be dispatched in the 87301e04c3fSmrg * after-final-THRSW state. 87401e04c3fSmrg */ 87501e04c3fSmrg bool single_seg; 8767ec681f3Smrg 8777ec681f3Smrg bool tmu_dirty_rcl; 8787ec681f3Smrg 8797ec681f3Smrg bool has_control_barrier; 88001e04c3fSmrg}; 88101e04c3fSmrg 88201e04c3fSmrgstruct v3d_vs_prog_data { 88301e04c3fSmrg struct v3d_prog_data base; 88401e04c3fSmrg 8857ec681f3Smrg bool uses_iid, uses_biid, uses_vid; 88601e04c3fSmrg 88701e04c3fSmrg /* Number of components read from each vertex attribute. */ 888ed98bd31Smaya uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; 88901e04c3fSmrg 89001e04c3fSmrg /* Total number of components read, for the shader state record. */ 89101e04c3fSmrg uint32_t vpm_input_size; 89201e04c3fSmrg 89301e04c3fSmrg /* Total number of components written, for the shader state record. */ 89401e04c3fSmrg uint32_t vpm_output_size; 89501e04c3fSmrg 896ed98bd31Smaya /* Set if there should be separate VPM segments for input and output. 897ed98bd31Smaya * If unset, vpm_input_size will be 0. 898ed98bd31Smaya */ 899ed98bd31Smaya bool separate_segments; 900ed98bd31Smaya 90101e04c3fSmrg /* Value to be programmed in VCM_CACHE_SIZE. */ 90201e04c3fSmrg uint8_t vcm_cache_size; 9037ec681f3Smrg 9047ec681f3Smrg /* Maps the nir->data.location to its 9057ec681f3Smrg * nir->data.driver_location. In general we are using the 9067ec681f3Smrg * driver location as index (like vattr_sizes above), so this 9077ec681f3Smrg * map is useful when what we have is the location 9087ec681f3Smrg * 9097ec681f3Smrg * Returns -1 if the location is not used 9107ec681f3Smrg */ 9117ec681f3Smrg int32_t driver_location_map[V3D_MAX_VS_INPUTS]; 9127ec681f3Smrg}; 9137ec681f3Smrg 9147ec681f3Smrgstruct v3d_gs_prog_data { 9157ec681f3Smrg struct v3d_prog_data base; 9167ec681f3Smrg 9177ec681f3Smrg /* Whether the program reads gl_PrimitiveIDIn */ 9187ec681f3Smrg bool uses_pid; 9197ec681f3Smrg 9207ec681f3Smrg /* Number of components read from each input varying. */ 9217ec681f3Smrg uint8_t input_sizes[V3D_MAX_GS_INPUTS / 4]; 9227ec681f3Smrg 9237ec681f3Smrg /* Number of inputs */ 9247ec681f3Smrg uint8_t num_inputs; 9257ec681f3Smrg struct v3d_varying_slot input_slots[V3D_MAX_GS_INPUTS]; 9267ec681f3Smrg 9277ec681f3Smrg /* Total number of components written, for the shader state record. */ 9287ec681f3Smrg uint32_t vpm_output_size; 9297ec681f3Smrg 9307ec681f3Smrg /* Maximum SIMD dispatch width to not exceed VPM output size limits 9317ec681f3Smrg * in the geometry shader. Notice that the final dispatch width has to 9327ec681f3Smrg * be decided at draw time and could be lower based on the VPM pressure 9337ec681f3Smrg * added by other shader stages. 9347ec681f3Smrg */ 9357ec681f3Smrg uint8_t simd_width; 9367ec681f3Smrg 9377ec681f3Smrg /* Output primitive type */ 9387ec681f3Smrg uint8_t out_prim_type; 9397ec681f3Smrg 9407ec681f3Smrg /* Number of GS invocations */ 9417ec681f3Smrg uint8_t num_invocations; 9427ec681f3Smrg 9437ec681f3Smrg bool writes_psiz; 94401e04c3fSmrg}; 94501e04c3fSmrg 94601e04c3fSmrgstruct v3d_fs_prog_data { 94701e04c3fSmrg struct v3d_prog_data base; 94801e04c3fSmrg 9497ec681f3Smrg /* Whether the program reads gl_PrimitiveID */ 9507ec681f3Smrg bool uses_pid; 9517ec681f3Smrg 95201e04c3fSmrg struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; 95301e04c3fSmrg 95401e04c3fSmrg /* Array of flat shade flags. 95501e04c3fSmrg * 95601e04c3fSmrg * Each entry is only 24 bits (high 8 bits 0), to match the hardware 95701e04c3fSmrg * packet layout. 95801e04c3fSmrg */ 95901e04c3fSmrg uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 96001e04c3fSmrg 96101e04c3fSmrg uint32_t noperspective_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 96201e04c3fSmrg 96301e04c3fSmrg uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1]; 96401e04c3fSmrg 965ed98bd31Smaya uint8_t num_inputs; 96601e04c3fSmrg bool writes_z; 967ed98bd31Smaya bool disable_ez; 96801e04c3fSmrg bool uses_center_w; 9697ec681f3Smrg bool uses_implicit_point_line_varyings; 9707ec681f3Smrg bool lock_scoreboard_on_first_thrsw; 9717ec681f3Smrg bool force_per_sample_msaa; 97201e04c3fSmrg}; 97301e04c3fSmrg 974ed98bd31Smayastruct v3d_compute_prog_data { 975ed98bd31Smaya struct v3d_prog_data base; 976ed98bd31Smaya /* Size in bytes of the workgroup's shared space. */ 977ed98bd31Smaya uint32_t shared_size; 9787ec681f3Smrg uint16_t local_size[3]; 9797ec681f3Smrg /* If the shader uses subgroup functionality */ 9807ec681f3Smrg bool has_subgroups; 9817ec681f3Smrg}; 9827ec681f3Smrg 9837ec681f3Smrgstruct vpm_config { 9847ec681f3Smrg uint32_t As; 9857ec681f3Smrg uint32_t Vc; 9867ec681f3Smrg uint32_t Gs; 9877ec681f3Smrg uint32_t Gd; 9887ec681f3Smrg uint32_t Gv; 9897ec681f3Smrg uint32_t Ve; 9907ec681f3Smrg uint32_t gs_width; 991ed98bd31Smaya}; 992ed98bd31Smaya 9937ec681f3Smrgbool 9947ec681f3Smrgv3d_compute_vpm_config(struct v3d_device_info *devinfo, 9957ec681f3Smrg struct v3d_vs_prog_data *vs_bin, 9967ec681f3Smrg struct v3d_vs_prog_data *vs, 9977ec681f3Smrg struct v3d_gs_prog_data *gs_bin, 9987ec681f3Smrg struct v3d_gs_prog_data *gs, 9997ec681f3Smrg struct vpm_config *vpm_cfg_bin, 10007ec681f3Smrg struct vpm_config *vpm_cfg); 10017ec681f3Smrg 1002ed98bd31Smayastatic inline bool 1003ed98bd31Smayavir_has_uniform(struct qinst *inst) 1004ed98bd31Smaya{ 1005ed98bd31Smaya return inst->uniform != ~0; 1006ed98bd31Smaya} 1007ed98bd31Smaya 100801e04c3fSmrgconst struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo); 100901e04c3fSmrgvoid v3d_compiler_free(const struct v3d_compiler *compiler); 10107ec681f3Smrgvoid v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s); 101101e04c3fSmrg 1012ed98bd31Smayauint64_t *v3d_compile(const struct v3d_compiler *compiler, 1013ed98bd31Smaya struct v3d_key *key, 1014ed98bd31Smaya struct v3d_prog_data **prog_data, 1015ed98bd31Smaya nir_shader *s, 1016ed98bd31Smaya void (*debug_output)(const char *msg, 1017ed98bd31Smaya void *debug_output_data), 1018ed98bd31Smaya void *debug_output_data, 1019ed98bd31Smaya int program_id, int variant_id, 1020ed98bd31Smaya uint32_t *final_assembly_size); 102101e04c3fSmrg 10227ec681f3Smrguint32_t v3d_prog_data_size(gl_shader_stage stage); 102301e04c3fSmrgvoid v3d_nir_to_vir(struct v3d_compile *c); 102401e04c3fSmrg 102501e04c3fSmrgvoid vir_compile_destroy(struct v3d_compile *c); 102601e04c3fSmrgconst char *vir_get_stage_name(struct v3d_compile *c); 102701e04c3fSmrgstruct qblock *vir_new_block(struct v3d_compile *c); 102801e04c3fSmrgvoid vir_set_emit_block(struct v3d_compile *c, struct qblock *block); 102901e04c3fSmrgvoid vir_link_blocks(struct qblock *predecessor, struct qblock *successor); 103001e04c3fSmrgstruct qblock *vir_entry_block(struct v3d_compile *c); 103101e04c3fSmrgstruct qblock *vir_exit_block(struct v3d_compile *c); 103201e04c3fSmrgstruct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, 103301e04c3fSmrg struct qreg src0, struct qreg src1); 103401e04c3fSmrgstruct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, 103501e04c3fSmrg struct qreg src0, struct qreg src1); 1036ed98bd31Smayastruct qinst *vir_branch_inst(struct v3d_compile *c, 1037ed98bd31Smaya enum v3d_qpu_branch_cond cond); 103801e04c3fSmrgvoid vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst); 1039ed98bd31Smayauint32_t vir_get_uniform_index(struct v3d_compile *c, 1040ed98bd31Smaya enum quniform_contents contents, 1041ed98bd31Smaya uint32_t data); 104201e04c3fSmrgstruct qreg vir_uniform(struct v3d_compile *c, 104301e04c3fSmrg enum quniform_contents contents, 104401e04c3fSmrg uint32_t data); 104501e04c3fSmrgvoid vir_schedule_instructions(struct v3d_compile *c); 1046ed98bd31Smayavoid v3d_setup_spill_base(struct v3d_compile *c); 104701e04c3fSmrgstruct v3d_qpu_instr v3d_qpu_nop(void); 104801e04c3fSmrg 104901e04c3fSmrgstruct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); 105001e04c3fSmrgstruct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); 105101e04c3fSmrgvoid vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); 10527ec681f3Smrgenum v3d_qpu_cond vir_get_cond(struct qinst *inst); 10537ec681f3Smrgvoid vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf); 10547ec681f3Smrgvoid vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf); 105501e04c3fSmrgvoid vir_set_unpack(struct qinst *inst, int src, 105601e04c3fSmrg enum v3d_qpu_input_unpack unpack); 10577ec681f3Smrgvoid vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack); 105801e04c3fSmrg 105901e04c3fSmrgstruct qreg vir_get_temp(struct v3d_compile *c); 106001e04c3fSmrgvoid vir_calculate_live_intervals(struct v3d_compile *c); 106101e04c3fSmrgint vir_get_nsrc(struct qinst *inst); 106201e04c3fSmrgbool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst); 106301e04c3fSmrgbool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op); 106401e04c3fSmrgbool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op); 106501e04c3fSmrgbool vir_is_raw_mov(struct qinst *inst); 10667ec681f3Smrgbool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst); 106701e04c3fSmrgbool vir_is_add(struct qinst *inst); 106801e04c3fSmrgbool vir_is_mul(struct qinst *inst); 106901e04c3fSmrgbool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); 107001e04c3fSmrgbool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); 107101e04c3fSmrgstruct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); 107201e04c3fSmrguint8_t vir_channels_written(struct qinst *inst); 107301e04c3fSmrgstruct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i); 107401e04c3fSmrgvoid ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, 107501e04c3fSmrg struct qreg result); 10767ec681f3Smrgbool ntq_tmu_fifo_overflow(struct v3d_compile *c, uint32_t components); 10777ec681f3Smrgvoid ntq_add_pending_tmu_flush(struct v3d_compile *c, nir_dest *dest, 10787ec681f3Smrg uint32_t component_mask); 10797ec681f3Smrgvoid ntq_flush_tmu(struct v3d_compile *c); 108001e04c3fSmrgvoid vir_emit_thrsw(struct v3d_compile *c); 108101e04c3fSmrg 108201e04c3fSmrgvoid vir_dump(struct v3d_compile *c); 108301e04c3fSmrgvoid vir_dump_inst(struct v3d_compile *c, struct qinst *inst); 1084ed98bd31Smayavoid vir_dump_uniform(enum quniform_contents contents, uint32_t data); 108501e04c3fSmrg 108601e04c3fSmrgvoid vir_validate(struct v3d_compile *c); 108701e04c3fSmrg 108801e04c3fSmrgvoid vir_optimize(struct v3d_compile *c); 108901e04c3fSmrgbool vir_opt_algebraic(struct v3d_compile *c); 109001e04c3fSmrgbool vir_opt_constant_folding(struct v3d_compile *c); 109101e04c3fSmrgbool vir_opt_copy_propagate(struct v3d_compile *c); 109201e04c3fSmrgbool vir_opt_dead_code(struct v3d_compile *c); 109301e04c3fSmrgbool vir_opt_peephole_sf(struct v3d_compile *c); 1094ed98bd31Smayabool vir_opt_redundant_flags(struct v3d_compile *c); 109501e04c3fSmrgbool vir_opt_small_immediates(struct v3d_compile *c); 109601e04c3fSmrgbool vir_opt_vpm(struct v3d_compile *c); 10977ec681f3Smrgbool vir_opt_constant_alu(struct v3d_compile *c); 109801e04c3fSmrgvoid v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c); 109901e04c3fSmrgvoid v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c); 11007ec681f3Smrgvoid v3d_nir_lower_line_smooth(nir_shader *shader); 11017ec681f3Smrgvoid v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c); 11027ec681f3Smrgvoid v3d_nir_lower_robust_buffer_access(nir_shader *shader, struct v3d_compile *c); 1103ed98bd31Smayavoid v3d_nir_lower_scratch(nir_shader *s); 110401e04c3fSmrgvoid v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c); 1105ed98bd31Smayavoid v3d_nir_lower_image_load_store(nir_shader *s); 110601e04c3fSmrgvoid vir_lower_uniforms(struct v3d_compile *c); 110701e04c3fSmrg 110801e04c3fSmrgvoid v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components); 110901e04c3fSmrgvoid v3d33_vir_vpm_write_setup(struct v3d_compile *c); 111001e04c3fSmrgvoid v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); 111101e04c3fSmrgvoid v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr); 1112ed98bd31Smayavoid v3d40_vir_emit_image_load_store(struct v3d_compile *c, 1113ed98bd31Smaya nir_intrinsic_instr *instr); 111401e04c3fSmrg 111501e04c3fSmrgvoid v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers); 111601e04c3fSmrguint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c); 111701e04c3fSmrgvoid qpu_validate(struct v3d_compile *c); 111801e04c3fSmrgstruct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled); 111901e04c3fSmrgbool vir_init_reg_sets(struct v3d_compiler *compiler); 112001e04c3fSmrg 11217ec681f3Smrgint v3d_shaderdb_dump(struct v3d_compile *c, char **shaderdb_str); 11227ec681f3Smrg 1123ed98bd31Smayabool v3d_gl_format_is_return_32(GLenum format); 112401e04c3fSmrg 11257ec681f3Smrguint32_t 11267ec681f3Smrgv3d_get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src); 11277ec681f3Smrg 112801e04c3fSmrgstatic inline bool 112901e04c3fSmrgquniform_contents_is_texture_p0(enum quniform_contents contents) 113001e04c3fSmrg{ 113101e04c3fSmrg return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 && 113201e04c3fSmrg contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 + 113301e04c3fSmrg V3D_MAX_TEXTURE_SAMPLERS)); 113401e04c3fSmrg} 113501e04c3fSmrg 1136ed98bd31Smayastatic inline bool 1137ed98bd31Smayavir_in_nonuniform_control_flow(struct v3d_compile *c) 1138ed98bd31Smaya{ 1139ed98bd31Smaya return c->execute.file != QFILE_NULL; 1140ed98bd31Smaya} 1141ed98bd31Smaya 114201e04c3fSmrgstatic inline struct qreg 114301e04c3fSmrgvir_uniform_ui(struct v3d_compile *c, uint32_t ui) 114401e04c3fSmrg{ 114501e04c3fSmrg return vir_uniform(c, QUNIFORM_CONSTANT, ui); 114601e04c3fSmrg} 114701e04c3fSmrg 114801e04c3fSmrgstatic inline struct qreg 114901e04c3fSmrgvir_uniform_f(struct v3d_compile *c, float f) 115001e04c3fSmrg{ 115101e04c3fSmrg return vir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 115201e04c3fSmrg} 115301e04c3fSmrg 115401e04c3fSmrg#define VIR_ALU0(name, vir_inst, op) \ 115501e04c3fSmrgstatic inline struct qreg \ 115601e04c3fSmrgvir_##name(struct v3d_compile *c) \ 115701e04c3fSmrg{ \ 115801e04c3fSmrg return vir_emit_def(c, vir_inst(op, c->undef, \ 115901e04c3fSmrg c->undef, c->undef)); \ 116001e04c3fSmrg} \ 116101e04c3fSmrgstatic inline struct qinst * \ 116201e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest) \ 116301e04c3fSmrg{ \ 116401e04c3fSmrg return vir_emit_nondef(c, vir_inst(op, dest, \ 116501e04c3fSmrg c->undef, c->undef)); \ 116601e04c3fSmrg} 116701e04c3fSmrg 116801e04c3fSmrg#define VIR_ALU1(name, vir_inst, op) \ 116901e04c3fSmrgstatic inline struct qreg \ 117001e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a) \ 117101e04c3fSmrg{ \ 117201e04c3fSmrg return vir_emit_def(c, vir_inst(op, c->undef, \ 117301e04c3fSmrg a, c->undef)); \ 117401e04c3fSmrg} \ 117501e04c3fSmrgstatic inline struct qinst * \ 117601e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 117701e04c3fSmrg struct qreg a) \ 117801e04c3fSmrg{ \ 117901e04c3fSmrg return vir_emit_nondef(c, vir_inst(op, dest, a, \ 118001e04c3fSmrg c->undef)); \ 118101e04c3fSmrg} 118201e04c3fSmrg 118301e04c3fSmrg#define VIR_ALU2(name, vir_inst, op) \ 118401e04c3fSmrgstatic inline struct qreg \ 118501e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ 118601e04c3fSmrg{ \ 118701e04c3fSmrg return vir_emit_def(c, vir_inst(op, c->undef, a, b)); \ 118801e04c3fSmrg} \ 118901e04c3fSmrgstatic inline struct qinst * \ 119001e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 119101e04c3fSmrg struct qreg a, struct qreg b) \ 119201e04c3fSmrg{ \ 119301e04c3fSmrg return vir_emit_nondef(c, vir_inst(op, dest, a, b)); \ 119401e04c3fSmrg} 119501e04c3fSmrg 119601e04c3fSmrg#define VIR_NODST_0(name, vir_inst, op) \ 119701e04c3fSmrgstatic inline struct qinst * \ 119801e04c3fSmrgvir_##name(struct v3d_compile *c) \ 119901e04c3fSmrg{ \ 120001e04c3fSmrg return vir_emit_nondef(c, vir_inst(op, c->undef, \ 120101e04c3fSmrg c->undef, c->undef)); \ 120201e04c3fSmrg} 120301e04c3fSmrg 120401e04c3fSmrg#define VIR_NODST_1(name, vir_inst, op) \ 120501e04c3fSmrgstatic inline struct qinst * \ 120601e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a) \ 120701e04c3fSmrg{ \ 120801e04c3fSmrg return vir_emit_nondef(c, vir_inst(op, c->undef, \ 120901e04c3fSmrg a, c->undef)); \ 121001e04c3fSmrg} 121101e04c3fSmrg 121201e04c3fSmrg#define VIR_NODST_2(name, vir_inst, op) \ 121301e04c3fSmrgstatic inline struct qinst * \ 121401e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \ 121501e04c3fSmrg{ \ 121601e04c3fSmrg return vir_emit_nondef(c, vir_inst(op, c->undef, \ 121701e04c3fSmrg a, b)); \ 121801e04c3fSmrg} 121901e04c3fSmrg 122001e04c3fSmrg#define VIR_SFU(name) \ 122101e04c3fSmrgstatic inline struct qreg \ 122201e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a) \ 122301e04c3fSmrg{ \ 122401e04c3fSmrg if (c->devinfo->ver >= 41) { \ 122501e04c3fSmrg return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \ 122601e04c3fSmrg c->undef, \ 122701e04c3fSmrg a, c->undef)); \ 122801e04c3fSmrg } else { \ 122901e04c3fSmrg vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ 123001e04c3fSmrg return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ 123101e04c3fSmrg } \ 123201e04c3fSmrg} \ 123301e04c3fSmrgstatic inline struct qinst * \ 123401e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest, \ 123501e04c3fSmrg struct qreg a) \ 123601e04c3fSmrg{ \ 123701e04c3fSmrg if (c->devinfo->ver >= 41) { \ 123801e04c3fSmrg return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \ 123901e04c3fSmrg dest, \ 124001e04c3fSmrg a, c->undef)); \ 124101e04c3fSmrg } else { \ 124201e04c3fSmrg vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \ 124301e04c3fSmrg return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \ 124401e04c3fSmrg } \ 124501e04c3fSmrg} 124601e04c3fSmrg 124701e04c3fSmrg#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name) 124801e04c3fSmrg#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name) 124901e04c3fSmrg#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name) 125001e04c3fSmrg#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name) 125101e04c3fSmrg#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name) 125201e04c3fSmrg#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name) 125301e04c3fSmrg#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name) 125401e04c3fSmrg#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name) 125501e04c3fSmrg#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name) 125601e04c3fSmrg#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name) 125701e04c3fSmrg#define VIR_A_NODST_0(name) VIR_NODST_0(name, vir_add_inst, V3D_QPU_A_##name) 125801e04c3fSmrg 125901e04c3fSmrgVIR_A_ALU2(FADD) 126001e04c3fSmrgVIR_A_ALU2(VFPACK) 126101e04c3fSmrgVIR_A_ALU2(FSUB) 126201e04c3fSmrgVIR_A_ALU2(FMIN) 126301e04c3fSmrgVIR_A_ALU2(FMAX) 126401e04c3fSmrg 126501e04c3fSmrgVIR_A_ALU2(ADD) 126601e04c3fSmrgVIR_A_ALU2(SUB) 126701e04c3fSmrgVIR_A_ALU2(SHL) 126801e04c3fSmrgVIR_A_ALU2(SHR) 126901e04c3fSmrgVIR_A_ALU2(ASR) 127001e04c3fSmrgVIR_A_ALU2(ROR) 127101e04c3fSmrgVIR_A_ALU2(MIN) 127201e04c3fSmrgVIR_A_ALU2(MAX) 127301e04c3fSmrgVIR_A_ALU2(UMIN) 127401e04c3fSmrgVIR_A_ALU2(UMAX) 127501e04c3fSmrgVIR_A_ALU2(AND) 127601e04c3fSmrgVIR_A_ALU2(OR) 127701e04c3fSmrgVIR_A_ALU2(XOR) 127801e04c3fSmrgVIR_A_ALU2(VADD) 127901e04c3fSmrgVIR_A_ALU2(VSUB) 128001e04c3fSmrgVIR_A_NODST_2(STVPMV) 12817ec681f3SmrgVIR_A_NODST_2(STVPMD) 128201e04c3fSmrgVIR_A_ALU1(NOT) 128301e04c3fSmrgVIR_A_ALU1(NEG) 128401e04c3fSmrgVIR_A_ALU1(FLAPUSH) 128501e04c3fSmrgVIR_A_ALU1(FLBPUSH) 128601e04c3fSmrgVIR_A_ALU1(FLPOP) 12877ec681f3SmrgVIR_A_ALU0(FLAFIRST) 12887ec681f3SmrgVIR_A_ALU0(FLNAFIRST) 128901e04c3fSmrgVIR_A_ALU1(SETMSF) 129001e04c3fSmrgVIR_A_ALU1(SETREVF) 129101e04c3fSmrgVIR_A_ALU0(TIDX) 129201e04c3fSmrgVIR_A_ALU0(EIDX) 129301e04c3fSmrgVIR_A_ALU1(LDVPMV_IN) 129401e04c3fSmrgVIR_A_ALU1(LDVPMV_OUT) 12957ec681f3SmrgVIR_A_ALU1(LDVPMD_IN) 12967ec681f3SmrgVIR_A_ALU1(LDVPMD_OUT) 12977ec681f3SmrgVIR_A_ALU2(LDVPMG_IN) 12987ec681f3SmrgVIR_A_ALU2(LDVPMG_OUT) 129901e04c3fSmrgVIR_A_ALU0(TMUWT) 130001e04c3fSmrg 13017ec681f3SmrgVIR_A_ALU0(IID) 130201e04c3fSmrgVIR_A_ALU0(FXCD) 130301e04c3fSmrgVIR_A_ALU0(XCD) 130401e04c3fSmrgVIR_A_ALU0(FYCD) 130501e04c3fSmrgVIR_A_ALU0(YCD) 130601e04c3fSmrgVIR_A_ALU0(MSF) 130701e04c3fSmrgVIR_A_ALU0(REVF) 1308ed98bd31SmayaVIR_A_ALU0(BARRIERID) 13097ec681f3SmrgVIR_A_ALU0(SAMPID) 131001e04c3fSmrgVIR_A_NODST_1(VPMSETUP) 131101e04c3fSmrgVIR_A_NODST_0(VPMWT) 131201e04c3fSmrgVIR_A_ALU2(FCMP) 131301e04c3fSmrgVIR_A_ALU2(VFMAX) 131401e04c3fSmrg 131501e04c3fSmrgVIR_A_ALU1(FROUND) 131601e04c3fSmrgVIR_A_ALU1(FTOIN) 131701e04c3fSmrgVIR_A_ALU1(FTRUNC) 131801e04c3fSmrgVIR_A_ALU1(FTOIZ) 131901e04c3fSmrgVIR_A_ALU1(FFLOOR) 132001e04c3fSmrgVIR_A_ALU1(FTOUZ) 132101e04c3fSmrgVIR_A_ALU1(FCEIL) 132201e04c3fSmrgVIR_A_ALU1(FTOC) 132301e04c3fSmrg 132401e04c3fSmrgVIR_A_ALU1(FDX) 132501e04c3fSmrgVIR_A_ALU1(FDY) 132601e04c3fSmrg 132701e04c3fSmrgVIR_A_ALU1(ITOF) 132801e04c3fSmrgVIR_A_ALU1(CLZ) 132901e04c3fSmrgVIR_A_ALU1(UTOF) 133001e04c3fSmrg 133101e04c3fSmrgVIR_M_ALU2(UMUL24) 133201e04c3fSmrgVIR_M_ALU2(FMUL) 133301e04c3fSmrgVIR_M_ALU2(SMUL24) 133401e04c3fSmrgVIR_M_NODST_2(MULTOP) 133501e04c3fSmrg 133601e04c3fSmrgVIR_M_ALU1(MOV) 133701e04c3fSmrgVIR_M_ALU1(FMOV) 133801e04c3fSmrg 133901e04c3fSmrgVIR_SFU(RECIP) 134001e04c3fSmrgVIR_SFU(RSQRT) 134101e04c3fSmrgVIR_SFU(EXP) 134201e04c3fSmrgVIR_SFU(LOG) 134301e04c3fSmrgVIR_SFU(SIN) 134401e04c3fSmrgVIR_SFU(RSQRT2) 134501e04c3fSmrg 134601e04c3fSmrgstatic inline struct qinst * 134701e04c3fSmrgvir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond, 134801e04c3fSmrg struct qreg dest, struct qreg src) 134901e04c3fSmrg{ 135001e04c3fSmrg struct qinst *mov = vir_MOV_dest(c, dest, src); 135101e04c3fSmrg vir_set_cond(mov, cond); 135201e04c3fSmrg return mov; 135301e04c3fSmrg} 135401e04c3fSmrg 135501e04c3fSmrgstatic inline struct qreg 135601e04c3fSmrgvir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond, 135701e04c3fSmrg struct qreg src0, struct qreg src1) 135801e04c3fSmrg{ 135901e04c3fSmrg struct qreg t = vir_get_temp(c); 136001e04c3fSmrg vir_MOV_dest(c, t, src1); 136101e04c3fSmrg vir_MOV_cond(c, cond, t, src0); 136201e04c3fSmrg return t; 136301e04c3fSmrg} 136401e04c3fSmrg 136501e04c3fSmrgstatic inline struct qinst * 136601e04c3fSmrgvir_NOP(struct v3d_compile *c) 136701e04c3fSmrg{ 136801e04c3fSmrg return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP, 136901e04c3fSmrg c->undef, c->undef, c->undef)); 137001e04c3fSmrg} 137101e04c3fSmrg 137201e04c3fSmrgstatic inline struct qreg 137301e04c3fSmrgvir_LDTMU(struct v3d_compile *c) 137401e04c3fSmrg{ 137501e04c3fSmrg if (c->devinfo->ver >= 41) { 137601e04c3fSmrg struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef, 137701e04c3fSmrg c->undef, c->undef); 137801e04c3fSmrg ldtmu->qpu.sig.ldtmu = true; 137901e04c3fSmrg 138001e04c3fSmrg return vir_emit_def(c, ldtmu); 138101e04c3fSmrg } else { 138201e04c3fSmrg vir_NOP(c)->qpu.sig.ldtmu = true; 138301e04c3fSmrg return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); 138401e04c3fSmrg } 138501e04c3fSmrg} 138601e04c3fSmrg 138701e04c3fSmrgstatic inline struct qreg 138801e04c3fSmrgvir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1) 138901e04c3fSmrg{ 139001e04c3fSmrg vir_MULTOP(c, src0, src1); 139101e04c3fSmrg return vir_UMUL24(c, src0, src1); 139201e04c3fSmrg} 139301e04c3fSmrg 139401e04c3fSmrgstatic inline struct qreg 13957ec681f3Smrgvir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config) 139601e04c3fSmrg{ 13977ec681f3Smrg assert(c->devinfo->ver >= 41); /* XXX */ 13987ec681f3Smrg assert((config & 0xffffff00) == 0xffffff00); 13997ec681f3Smrg 14007ec681f3Smrg struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef, 14017ec681f3Smrg c->undef, c->undef); 14027ec681f3Smrg ldtlb->qpu.sig.ldtlbu = true; 14037ec681f3Smrg ldtlb->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, config); 14047ec681f3Smrg return vir_emit_def(c, ldtlb); 140501e04c3fSmrg} 140601e04c3fSmrg 140701e04c3fSmrgstatic inline struct qreg 14087ec681f3Smrgvir_TLB_COLOR_READ(struct v3d_compile *c) 140901e04c3fSmrg{ 14107ec681f3Smrg assert(c->devinfo->ver >= 41); /* XXX */ 14117ec681f3Smrg 14127ec681f3Smrg struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef, 14137ec681f3Smrg c->undef, c->undef); 14147ec681f3Smrg ldtlb->qpu.sig.ldtlb = true; 14157ec681f3Smrg return vir_emit_def(c, ldtlb); 141601e04c3fSmrg} 141701e04c3fSmrg 141801e04c3fSmrgstatic inline struct qinst * 1419ed98bd31Smayavir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) 142001e04c3fSmrg{ 142101e04c3fSmrg /* The actual uniform_data value will be set at scheduling time */ 1422ed98bd31Smaya return vir_emit_nondef(c, vir_branch_inst(c, cond)); 142301e04c3fSmrg} 142401e04c3fSmrg 142501e04c3fSmrg#define vir_for_each_block(block, c) \ 142601e04c3fSmrg list_for_each_entry(struct qblock, block, &c->blocks, link) 142701e04c3fSmrg 142801e04c3fSmrg#define vir_for_each_block_rev(block, c) \ 142901e04c3fSmrg list_for_each_entry_rev(struct qblock, block, &c->blocks, link) 143001e04c3fSmrg 143101e04c3fSmrg/* Loop over the non-NULL members of the successors array. */ 143201e04c3fSmrg#define vir_for_each_successor(succ, block) \ 143301e04c3fSmrg for (struct qblock *succ = block->successors[0]; \ 143401e04c3fSmrg succ != NULL; \ 143501e04c3fSmrg succ = (succ == block->successors[1] ? NULL : \ 143601e04c3fSmrg block->successors[1])) 143701e04c3fSmrg 143801e04c3fSmrg#define vir_for_each_inst(inst, block) \ 143901e04c3fSmrg list_for_each_entry(struct qinst, inst, &block->instructions, link) 144001e04c3fSmrg 144101e04c3fSmrg#define vir_for_each_inst_rev(inst, block) \ 144201e04c3fSmrg list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) 144301e04c3fSmrg 144401e04c3fSmrg#define vir_for_each_inst_safe(inst, block) \ 144501e04c3fSmrg list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) 144601e04c3fSmrg 144701e04c3fSmrg#define vir_for_each_inst_inorder(inst, c) \ 144801e04c3fSmrg vir_for_each_block(_block, c) \ 144901e04c3fSmrg vir_for_each_inst(inst, _block) 145001e04c3fSmrg 1451ed98bd31Smaya#define vir_for_each_inst_inorder_safe(inst, c) \ 1452ed98bd31Smaya vir_for_each_block(_block, c) \ 1453ed98bd31Smaya vir_for_each_inst_safe(inst, _block) 1454ed98bd31Smaya 145501e04c3fSmrg#endif /* V3D_COMPILER_H */ 1456