101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Broadcom
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#ifndef V3D_COMPILER_H
2501e04c3fSmrg#define V3D_COMPILER_H
2601e04c3fSmrg
2701e04c3fSmrg#include <assert.h>
2801e04c3fSmrg#include <stdio.h>
2901e04c3fSmrg#include <stdlib.h>
3001e04c3fSmrg#include <stdbool.h>
3101e04c3fSmrg#include <stdint.h>
3201e04c3fSmrg#include <string.h>
3301e04c3fSmrg
3401e04c3fSmrg#include "util/macros.h"
3501e04c3fSmrg#include "common/v3d_debug.h"
3601e04c3fSmrg#include "common/v3d_device_info.h"
37ed98bd31Smaya#include "common/v3d_limits.h"
3801e04c3fSmrg#include "compiler/nir/nir.h"
3901e04c3fSmrg#include "util/list.h"
4001e04c3fSmrg#include "util/u_math.h"
4101e04c3fSmrg
4201e04c3fSmrg#include "qpu/qpu_instr.h"
4301e04c3fSmrg#include "pipe/p_state.h"
4401e04c3fSmrg
457ec681f3Smrg/**
467ec681f3Smrg * Maximum number of outstanding TMU operations we can queue for execution.
477ec681f3Smrg *
487ec681f3Smrg * This is mostly limited by the size of the TMU fifos. The Input and Config
497ec681f3Smrg * fifos can stall, but we prefer that than injecting TMU flushes manually
507ec681f3Smrg * in the driver, so we can ignore these, but we can't overflow the Output fifo,
517ec681f3Smrg * which has 16 / threads per-thread entries, meaning that the maximum number
527ec681f3Smrg * of outstanding LDTMUs we can ever have is 8, for a 2-way threaded shader.
537ec681f3Smrg * This means that at most we can have 8 outstanding TMU loads, if each load
547ec681f3Smrg * is just one component.
557ec681f3Smrg *
567ec681f3Smrg * NOTE: we could actually have a larger value here because TMU stores don't
577ec681f3Smrg * consume any entries in the Output fifo (so we could have any number of
587ec681f3Smrg * outstanding stores) and the driver keeps track of used Output fifo entries
597ec681f3Smrg * and will flush if we ever needs more than 8, but since loads are much more
607ec681f3Smrg * common than stores, it is probably not worth it.
617ec681f3Smrg */
627ec681f3Smrg#define MAX_TMU_QUEUE_SIZE 8
637ec681f3Smrg
647ec681f3Smrg/**
657ec681f3Smrg * Maximum offset distance in bytes between two consecutive constant UBO loads
667ec681f3Smrg * for the same UBO where we would favor updating the unifa address by emitting
677ec681f3Smrg * dummy ldunifa instructions to avoid writing the unifa register.
687ec681f3Smrg */
697ec681f3Smrg#define MAX_UNIFA_SKIP_DISTANCE 16
707ec681f3Smrg
7101e04c3fSmrgstruct nir_builder;
7201e04c3fSmrg
7301e04c3fSmrgstruct v3d_fs_inputs {
7401e04c3fSmrg        /**
7501e04c3fSmrg         * Array of the meanings of the VPM inputs this shader needs.
7601e04c3fSmrg         *
7701e04c3fSmrg         * It doesn't include those that aren't part of the VPM, like
7801e04c3fSmrg         * point/line coordinates.
7901e04c3fSmrg         */
8001e04c3fSmrg        struct v3d_varying_slot *input_slots;
8101e04c3fSmrg        uint32_t num_inputs;
8201e04c3fSmrg};
8301e04c3fSmrg
8401e04c3fSmrgenum qfile {
8501e04c3fSmrg        /** An unused source or destination register. */
8601e04c3fSmrg        QFILE_NULL,
8701e04c3fSmrg
8801e04c3fSmrg        /** A physical register, such as the W coordinate payload. */
8901e04c3fSmrg        QFILE_REG,
9001e04c3fSmrg        /** One of the regsiters for fixed function interactions. */
9101e04c3fSmrg        QFILE_MAGIC,
9201e04c3fSmrg
9301e04c3fSmrg        /**
9401e04c3fSmrg         *  A virtual register, that will be allocated to actual accumulator
9501e04c3fSmrg         * or physical registers later.
9601e04c3fSmrg         */
9701e04c3fSmrg        QFILE_TEMP,
9801e04c3fSmrg
9901e04c3fSmrg        /**
10001e04c3fSmrg         * VPM reads use this with an index value to say what part of the VPM
10101e04c3fSmrg         * is being read.
10201e04c3fSmrg         */
10301e04c3fSmrg        QFILE_VPM,
10401e04c3fSmrg
10501e04c3fSmrg        /**
10601e04c3fSmrg         * Stores an immediate value in the index field that will be used
10701e04c3fSmrg         * directly by qpu_load_imm().
10801e04c3fSmrg         */
10901e04c3fSmrg        QFILE_LOAD_IMM,
11001e04c3fSmrg
11101e04c3fSmrg        /**
11201e04c3fSmrg         * Stores an immediate value in the index field that can be turned
11301e04c3fSmrg         * into a small immediate field by qpu_encode_small_immediate().
11401e04c3fSmrg         */
11501e04c3fSmrg        QFILE_SMALL_IMM,
11601e04c3fSmrg};
11701e04c3fSmrg
11801e04c3fSmrg/**
11901e04c3fSmrg * A reference to a QPU register or a virtual temp register.
12001e04c3fSmrg */
12101e04c3fSmrgstruct qreg {
12201e04c3fSmrg        enum qfile file;
12301e04c3fSmrg        uint32_t index;
12401e04c3fSmrg};
12501e04c3fSmrg
12601e04c3fSmrgstatic inline struct qreg vir_reg(enum qfile file, uint32_t index)
12701e04c3fSmrg{
12801e04c3fSmrg        return (struct qreg){file, index};
12901e04c3fSmrg}
13001e04c3fSmrg
131ed98bd31Smayastatic inline struct qreg vir_magic_reg(uint32_t index)
132ed98bd31Smaya{
133ed98bd31Smaya        return (struct qreg){QFILE_MAGIC, index};
134ed98bd31Smaya}
135ed98bd31Smaya
136ed98bd31Smayastatic inline struct qreg vir_nop_reg(void)
137ed98bd31Smaya{
138ed98bd31Smaya        return (struct qreg){QFILE_NULL, 0};
139ed98bd31Smaya}
140ed98bd31Smaya
14101e04c3fSmrg/**
14201e04c3fSmrg * A reference to an actual register at the QPU level, for register
14301e04c3fSmrg * allocation.
14401e04c3fSmrg */
14501e04c3fSmrgstruct qpu_reg {
14601e04c3fSmrg        bool magic;
14701e04c3fSmrg        bool smimm;
14801e04c3fSmrg        int index;
14901e04c3fSmrg};
15001e04c3fSmrg
15101e04c3fSmrgstruct qinst {
15201e04c3fSmrg        /** Entry in qblock->instructions */
15301e04c3fSmrg        struct list_head link;
15401e04c3fSmrg
15501e04c3fSmrg        /**
15601e04c3fSmrg         * The instruction being wrapped.  Its condition codes, pack flags,
15701e04c3fSmrg         * signals, etc. will all be used, with just the register references
15801e04c3fSmrg         * being replaced by the contents of qinst->dst and qinst->src[].
15901e04c3fSmrg         */
16001e04c3fSmrg        struct v3d_qpu_instr qpu;
16101e04c3fSmrg
16201e04c3fSmrg        /* Pre-register-allocation references to src/dst registers */
16301e04c3fSmrg        struct qreg dst;
16401e04c3fSmrg        struct qreg src[3];
16501e04c3fSmrg        bool is_last_thrsw;
16601e04c3fSmrg
167ed98bd31Smaya        /* If the instruction reads a uniform (other than through src[i].file
168ed98bd31Smaya         * == QFILE_UNIF), that uniform's index in c->uniform_contents.  ~0
169ed98bd31Smaya         * otherwise.
17001e04c3fSmrg         */
17101e04c3fSmrg        int uniform;
17201e04c3fSmrg};
17301e04c3fSmrg
17401e04c3fSmrgenum quniform_contents {
17501e04c3fSmrg        /**
17601e04c3fSmrg         * Indicates that a constant 32-bit value is copied from the program's
17701e04c3fSmrg         * uniform contents.
17801e04c3fSmrg         */
17901e04c3fSmrg        QUNIFORM_CONSTANT,
18001e04c3fSmrg        /**
18101e04c3fSmrg         * Indicates that the program's uniform contents are used as an index
18201e04c3fSmrg         * into the GL uniform storage.
18301e04c3fSmrg         */
18401e04c3fSmrg        QUNIFORM_UNIFORM,
18501e04c3fSmrg
18601e04c3fSmrg        /** @{
18701e04c3fSmrg         * Scaling factors from clip coordinates to relative to the viewport
18801e04c3fSmrg         * center.
18901e04c3fSmrg         *
19001e04c3fSmrg         * This is used by the coordinate and vertex shaders to produce the
19101e04c3fSmrg         * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
19201e04c3fSmrg         * point offsets from the viewport ccenter.
19301e04c3fSmrg         */
19401e04c3fSmrg        QUNIFORM_VIEWPORT_X_SCALE,
19501e04c3fSmrg        QUNIFORM_VIEWPORT_Y_SCALE,
19601e04c3fSmrg        /** @} */
19701e04c3fSmrg
19801e04c3fSmrg        QUNIFORM_VIEWPORT_Z_OFFSET,
19901e04c3fSmrg        QUNIFORM_VIEWPORT_Z_SCALE,
20001e04c3fSmrg
20101e04c3fSmrg        QUNIFORM_USER_CLIP_PLANE,
20201e04c3fSmrg
20301e04c3fSmrg        /**
20401e04c3fSmrg         * A reference to a V3D 3.x texture config parameter 0 uniform.
20501e04c3fSmrg         *
20601e04c3fSmrg         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
20701e04c3fSmrg         * defines texture type, miplevels, and such.  It will be found as a
20801e04c3fSmrg         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
20901e04c3fSmrg         */
21001e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_0,
21101e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_1,
21201e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_2,
21301e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_3,
21401e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_4,
21501e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_5,
21601e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_6,
21701e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_7,
21801e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_8,
21901e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_9,
22001e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_10,
22101e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_11,
22201e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_12,
22301e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_13,
22401e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_14,
22501e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_15,
22601e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_16,
22701e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_17,
22801e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_18,
22901e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_19,
23001e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_20,
23101e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_21,
23201e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_22,
23301e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_23,
23401e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_24,
23501e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_25,
23601e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_26,
23701e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_27,
23801e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_28,
23901e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_29,
24001e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_30,
24101e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_31,
24201e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P0_32,
24301e04c3fSmrg
24401e04c3fSmrg        /**
24501e04c3fSmrg         * A reference to a V3D 3.x texture config parameter 1 uniform.
24601e04c3fSmrg         *
24701e04c3fSmrg         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
24801e04c3fSmrg         * has the pointer to the indirect texture state.  Our data[] field
24901e04c3fSmrg         * will have a packed p1 value, but the address field will be just
25001e04c3fSmrg         * which texture unit's texture should be referenced.
25101e04c3fSmrg         */
25201e04c3fSmrg        QUNIFORM_TEXTURE_CONFIG_P1,
25301e04c3fSmrg
254ed98bd31Smaya        /* A V3D 4.x texture config parameter.  The high 8 bits will be
25501e04c3fSmrg         * which texture or sampler is being sampled, and the driver must
25601e04c3fSmrg         * replace the address field with the appropriate address.
25701e04c3fSmrg         */
25801e04c3fSmrg        QUNIFORM_TMU_CONFIG_P0,
25901e04c3fSmrg        QUNIFORM_TMU_CONFIG_P1,
26001e04c3fSmrg
261ed98bd31Smaya        QUNIFORM_IMAGE_TMU_CONFIG_P0,
262ed98bd31Smaya
26301e04c3fSmrg        QUNIFORM_TEXTURE_FIRST_LEVEL,
26401e04c3fSmrg
26501e04c3fSmrg        QUNIFORM_TEXTURE_WIDTH,
26601e04c3fSmrg        QUNIFORM_TEXTURE_HEIGHT,
26701e04c3fSmrg        QUNIFORM_TEXTURE_DEPTH,
26801e04c3fSmrg        QUNIFORM_TEXTURE_ARRAY_SIZE,
26901e04c3fSmrg        QUNIFORM_TEXTURE_LEVELS,
2707ec681f3Smrg        QUNIFORM_TEXTURE_SAMPLES,
27101e04c3fSmrg
27201e04c3fSmrg        QUNIFORM_UBO_ADDR,
27301e04c3fSmrg
27401e04c3fSmrg        QUNIFORM_TEXRECT_SCALE_X,
27501e04c3fSmrg        QUNIFORM_TEXRECT_SCALE_Y,
27601e04c3fSmrg
277ed98bd31Smaya        /* Returns the base offset of the SSBO given by the data value. */
278ed98bd31Smaya        QUNIFORM_SSBO_OFFSET,
279ed98bd31Smaya
2807ec681f3Smrg        /* Returns the size of the SSBO or UBO given by the data value. */
2817ec681f3Smrg        QUNIFORM_GET_SSBO_SIZE,
2827ec681f3Smrg        QUNIFORM_GET_UBO_SIZE,
283ed98bd31Smaya
284ed98bd31Smaya        /* Sizes (in pixels) of a shader image given by the data value. */
285ed98bd31Smaya        QUNIFORM_IMAGE_WIDTH,
286ed98bd31Smaya        QUNIFORM_IMAGE_HEIGHT,
287ed98bd31Smaya        QUNIFORM_IMAGE_DEPTH,
288ed98bd31Smaya        QUNIFORM_IMAGE_ARRAY_SIZE,
28901e04c3fSmrg
2907ec681f3Smrg        QUNIFORM_LINE_WIDTH,
2917ec681f3Smrg
2927ec681f3Smrg        /* The line width sent to hardware. This includes the expanded width
2937ec681f3Smrg         * when anti-aliasing is enabled.
2947ec681f3Smrg         */
2957ec681f3Smrg        QUNIFORM_AA_LINE_WIDTH,
296ed98bd31Smaya
297ed98bd31Smaya        /* Number of workgroups passed to glDispatchCompute in the dimension
298ed98bd31Smaya         * selected by the data value.
299ed98bd31Smaya         */
300ed98bd31Smaya        QUNIFORM_NUM_WORK_GROUPS,
30101e04c3fSmrg
3027ec681f3Smrg        /* Base workgroup offset passed to vkCmdDispatchBase in the dimension
3037ec681f3Smrg         * selected by the data value.
3047ec681f3Smrg         */
3057ec681f3Smrg        QUNIFORM_WORK_GROUP_BASE,
3067ec681f3Smrg
30701e04c3fSmrg        /**
30801e04c3fSmrg         * Returns the the offset of the scratch buffer for register spilling.
30901e04c3fSmrg         */
31001e04c3fSmrg        QUNIFORM_SPILL_OFFSET,
31101e04c3fSmrg        QUNIFORM_SPILL_SIZE_PER_THREAD,
312ed98bd31Smaya
313ed98bd31Smaya        /**
314ed98bd31Smaya         * Returns the offset of the shared memory for compute shaders.
315ed98bd31Smaya         *
316ed98bd31Smaya         * This will be accessed using TMU general memory operations, so the
317ed98bd31Smaya         * L2T cache will effectively be the shared memory area.
318ed98bd31Smaya         */
319ed98bd31Smaya        QUNIFORM_SHARED_OFFSET,
3207ec681f3Smrg
3217ec681f3Smrg        /**
3227ec681f3Smrg         * Returns the number of layers in the framebuffer.
3237ec681f3Smrg         *
3247ec681f3Smrg         * This is used to cap gl_Layer in geometry shaders to avoid
3257ec681f3Smrg         * out-of-bounds accesses into the tile state during binning.
3267ec681f3Smrg         */
3277ec681f3Smrg        QUNIFORM_FB_LAYERS,
3287ec681f3Smrg
3297ec681f3Smrg        /**
3307ec681f3Smrg         * Current value of gl_ViewIndex for Multiview rendering.
3317ec681f3Smrg         */
3327ec681f3Smrg        QUNIFORM_VIEW_INDEX,
33301e04c3fSmrg};
33401e04c3fSmrg
335ed98bd31Smayastatic inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
336ed98bd31Smaya{
337ed98bd31Smaya        assert(value < (1 << 24));
338ed98bd31Smaya        return unit << 24 | value;
339ed98bd31Smaya}
340ed98bd31Smaya
341ed98bd31Smayastatic inline uint32_t v3d_unit_data_get_unit(uint32_t data)
342ed98bd31Smaya{
343ed98bd31Smaya        return data >> 24;
344ed98bd31Smaya}
345ed98bd31Smaya
346ed98bd31Smayastatic inline uint32_t v3d_unit_data_get_offset(uint32_t data)
347ed98bd31Smaya{
348ed98bd31Smaya        return data & 0xffffff;
349ed98bd31Smaya}
350ed98bd31Smaya
35101e04c3fSmrgstruct v3d_varying_slot {
35201e04c3fSmrg        uint8_t slot_and_component;
35301e04c3fSmrg};
35401e04c3fSmrg
35501e04c3fSmrgstatic inline struct v3d_varying_slot
35601e04c3fSmrgv3d_slot_from_slot_and_component(uint8_t slot, uint8_t component)
35701e04c3fSmrg{
35801e04c3fSmrg        assert(slot < 255 / 4);
35901e04c3fSmrg        return (struct v3d_varying_slot){ (slot << 2) + component };
36001e04c3fSmrg}
36101e04c3fSmrg
36201e04c3fSmrgstatic inline uint8_t v3d_slot_get_slot(struct v3d_varying_slot slot)
36301e04c3fSmrg{
36401e04c3fSmrg        return slot.slot_and_component >> 2;
36501e04c3fSmrg}
36601e04c3fSmrg
36701e04c3fSmrgstatic inline uint8_t v3d_slot_get_component(struct v3d_varying_slot slot)
36801e04c3fSmrg{
36901e04c3fSmrg        return slot.slot_and_component & 3;
37001e04c3fSmrg}
37101e04c3fSmrg
3727ec681f3Smrgenum v3d_execution_environment {
3737ec681f3Smrg   V3D_ENVIRONMENT_OPENGL = 0,
3747ec681f3Smrg   V3D_ENVIRONMENT_VULKAN,
3757ec681f3Smrg};
3767ec681f3Smrg
37701e04c3fSmrgstruct v3d_key {
37801e04c3fSmrg        void *shader_state;
37901e04c3fSmrg        struct {
38001e04c3fSmrg                uint8_t swizzle[4];
3817ec681f3Smrg        } tex[V3D_MAX_TEXTURE_SAMPLERS];
3827ec681f3Smrg        struct {
38301e04c3fSmrg                uint8_t return_size;
38401e04c3fSmrg                uint8_t return_channels;
3857ec681f3Smrg        } sampler[V3D_MAX_TEXTURE_SAMPLERS];
3867ec681f3Smrg
3877ec681f3Smrg        uint8_t num_tex_used;
3887ec681f3Smrg        uint8_t num_samplers_used;
38901e04c3fSmrg        uint8_t ucp_enables;
3907ec681f3Smrg        bool is_last_geometry_stage;
3917ec681f3Smrg        bool robust_buffer_access;
3927ec681f3Smrg
3937ec681f3Smrg        enum v3d_execution_environment environment;
39401e04c3fSmrg};
39501e04c3fSmrg
39601e04c3fSmrgstruct v3d_fs_key {
39701e04c3fSmrg        struct v3d_key base;
39801e04c3fSmrg        bool is_points;
39901e04c3fSmrg        bool is_lines;
4007ec681f3Smrg        bool line_smoothing;
40101e04c3fSmrg        bool point_coord_upper_left;
40201e04c3fSmrg        bool msaa;
40301e04c3fSmrg        bool sample_coverage;
40401e04c3fSmrg        bool sample_alpha_to_coverage;
40501e04c3fSmrg        bool sample_alpha_to_one;
406ed98bd31Smaya        /* Mask of which color render targets are present. */
407ed98bd31Smaya        uint8_t cbufs;
40801e04c3fSmrg        uint8_t swap_color_rb;
40901e04c3fSmrg        /* Mask of which render targets need to be written as 32-bit floats */
41001e04c3fSmrg        uint8_t f32_color_rb;
41101e04c3fSmrg        /* Masks of which render targets need to be written as ints/uints.
41201e04c3fSmrg         * Used by gallium to work around lost information in TGSI.
41301e04c3fSmrg         */
41401e04c3fSmrg        uint8_t int_color_rb;
41501e04c3fSmrg        uint8_t uint_color_rb;
4167ec681f3Smrg
4177ec681f3Smrg        /* Color format information per render target. Only set when logic
4187ec681f3Smrg         * operations are enabled.
4197ec681f3Smrg         */
4207ec681f3Smrg        struct {
4217ec681f3Smrg                enum pipe_format format;
4227ec681f3Smrg                const uint8_t *swizzle;
4237ec681f3Smrg        } color_fmt[V3D_MAX_DRAW_BUFFERS];
4247ec681f3Smrg
42501e04c3fSmrg        uint8_t logicop_func;
42601e04c3fSmrg        uint32_t point_sprite_mask;
42701e04c3fSmrg
42801e04c3fSmrg        struct pipe_rt_blend_state blend;
4297ec681f3Smrg
4307ec681f3Smrg        /* If the fragment shader reads gl_PrimitiveID then we have 2 scenarios:
4317ec681f3Smrg         *
4327ec681f3Smrg         * - If there is a geometry shader, then gl_PrimitiveID must be written
4337ec681f3Smrg         *   by it and the fragment shader loads it as a regular explicit input
4347ec681f3Smrg         *   varying. This is the only valid use case in GLES 3.1.
4357ec681f3Smrg         *
4367ec681f3Smrg         * - If there is not a geometry shader (allowed since GLES 3.2 and
4377ec681f3Smrg         *   Vulkan 1.0), then gl_PrimitiveID must be implicitly written by
4387ec681f3Smrg         *   hardware and is considered an implicit input varying in the
4397ec681f3Smrg         *   fragment shader.
4407ec681f3Smrg         */
4417ec681f3Smrg        bool has_gs;
4427ec681f3Smrg};
4437ec681f3Smrg
4447ec681f3Smrgstruct v3d_gs_key {
4457ec681f3Smrg        struct v3d_key base;
4467ec681f3Smrg
4477ec681f3Smrg        struct v3d_varying_slot used_outputs[V3D_MAX_FS_INPUTS];
4487ec681f3Smrg        uint8_t num_used_outputs;
4497ec681f3Smrg
4507ec681f3Smrg        bool is_coord;
4517ec681f3Smrg        bool per_vertex_point_size;
45201e04c3fSmrg};
45301e04c3fSmrg
45401e04c3fSmrgstruct v3d_vs_key {
45501e04c3fSmrg        struct v3d_key base;
45601e04c3fSmrg
4577ec681f3Smrg        struct v3d_varying_slot used_outputs[V3D_MAX_ANY_STAGE_INPUTS];
4587ec681f3Smrg        uint8_t num_used_outputs;
4597ec681f3Smrg
4607ec681f3Smrg        /* A bit-mask indicating if we need to swap the R/B channels for
4617ec681f3Smrg         * vertex attributes. Since the hardware doesn't provide any
4627ec681f3Smrg         * means to swizzle vertex attributes we need to do it in the shader.
4637ec681f3Smrg         */
4647ec681f3Smrg        uint32_t va_swap_rb_mask;
46501e04c3fSmrg
46601e04c3fSmrg        bool is_coord;
46701e04c3fSmrg        bool per_vertex_point_size;
46801e04c3fSmrg        bool clamp_color;
46901e04c3fSmrg};
47001e04c3fSmrg
47101e04c3fSmrg/** A basic block of VIR intructions. */
47201e04c3fSmrgstruct qblock {
47301e04c3fSmrg        struct list_head link;
47401e04c3fSmrg
47501e04c3fSmrg        struct list_head instructions;
47601e04c3fSmrg
47701e04c3fSmrg        struct set *predecessors;
47801e04c3fSmrg        struct qblock *successors[2];
47901e04c3fSmrg
48001e04c3fSmrg        int index;
48101e04c3fSmrg
48201e04c3fSmrg        /* Instruction IPs for the first and last instruction of the block.
48301e04c3fSmrg         * Set by qpu_schedule.c.
48401e04c3fSmrg         */
48501e04c3fSmrg        uint32_t start_qpu_ip;
48601e04c3fSmrg        uint32_t end_qpu_ip;
48701e04c3fSmrg
48801e04c3fSmrg        /* Instruction IP for the branch instruction of the block.  Set by
48901e04c3fSmrg         * qpu_schedule.c.
49001e04c3fSmrg         */
49101e04c3fSmrg        uint32_t branch_qpu_ip;
49201e04c3fSmrg
49301e04c3fSmrg        /** Offset within the uniform stream at the start of the block. */
49401e04c3fSmrg        uint32_t start_uniform;
49501e04c3fSmrg        /** Offset within the uniform stream of the branch instruction */
49601e04c3fSmrg        uint32_t branch_uniform;
49701e04c3fSmrg
4987ec681f3Smrg        /**
4997ec681f3Smrg         * Has the terminating branch of this block already been emitted
5007ec681f3Smrg         * by a break or continue?
5017ec681f3Smrg         */
5027ec681f3Smrg        bool branch_emitted;
5037ec681f3Smrg
50401e04c3fSmrg        /** @{ used by v3d_vir_live_variables.c */
50501e04c3fSmrg        BITSET_WORD *def;
506ed98bd31Smaya        BITSET_WORD *defin;
507ed98bd31Smaya        BITSET_WORD *defout;
50801e04c3fSmrg        BITSET_WORD *use;
50901e04c3fSmrg        BITSET_WORD *live_in;
51001e04c3fSmrg        BITSET_WORD *live_out;
51101e04c3fSmrg        int start_ip, end_ip;
51201e04c3fSmrg        /** @} */
51301e04c3fSmrg};
51401e04c3fSmrg
51501e04c3fSmrg/** Which util/list.h add mode we should use when inserting an instruction. */
51601e04c3fSmrgenum vir_cursor_mode {
51701e04c3fSmrg        vir_cursor_add,
51801e04c3fSmrg        vir_cursor_addtail,
51901e04c3fSmrg};
52001e04c3fSmrg
52101e04c3fSmrg/**
52201e04c3fSmrg * Tracking structure for where new instructions should be inserted.  Create
52301e04c3fSmrg * with one of the vir_after_inst()-style helper functions.
52401e04c3fSmrg *
52501e04c3fSmrg * This does not protect against removal of the block or instruction, so we
52601e04c3fSmrg * have an assert in instruction removal to try to catch it.
52701e04c3fSmrg */
52801e04c3fSmrgstruct vir_cursor {
52901e04c3fSmrg        enum vir_cursor_mode mode;
53001e04c3fSmrg        struct list_head *link;
53101e04c3fSmrg};
53201e04c3fSmrg
53301e04c3fSmrgstatic inline struct vir_cursor
53401e04c3fSmrgvir_before_inst(struct qinst *inst)
53501e04c3fSmrg{
53601e04c3fSmrg        return (struct vir_cursor){ vir_cursor_addtail, &inst->link };
53701e04c3fSmrg}
53801e04c3fSmrg
53901e04c3fSmrgstatic inline struct vir_cursor
54001e04c3fSmrgvir_after_inst(struct qinst *inst)
54101e04c3fSmrg{
54201e04c3fSmrg        return (struct vir_cursor){ vir_cursor_add, &inst->link };
54301e04c3fSmrg}
54401e04c3fSmrg
54501e04c3fSmrgstatic inline struct vir_cursor
54601e04c3fSmrgvir_before_block(struct qblock *block)
54701e04c3fSmrg{
54801e04c3fSmrg        return (struct vir_cursor){ vir_cursor_add, &block->instructions };
54901e04c3fSmrg}
55001e04c3fSmrg
55101e04c3fSmrgstatic inline struct vir_cursor
55201e04c3fSmrgvir_after_block(struct qblock *block)
55301e04c3fSmrg{
55401e04c3fSmrg        return (struct vir_cursor){ vir_cursor_addtail, &block->instructions };
55501e04c3fSmrg}
55601e04c3fSmrg
5577ec681f3Smrgenum v3d_compilation_result {
5587ec681f3Smrg        V3D_COMPILATION_SUCCEEDED,
5597ec681f3Smrg        V3D_COMPILATION_FAILED_REGISTER_ALLOCATION,
5607ec681f3Smrg        V3D_COMPILATION_FAILED,
5617ec681f3Smrg};
5627ec681f3Smrg
56301e04c3fSmrg/**
56401e04c3fSmrg * Compiler state saved across compiler invocations, for any expensive global
56501e04c3fSmrg * setup.
56601e04c3fSmrg */
56701e04c3fSmrgstruct v3d_compiler {
56801e04c3fSmrg        const struct v3d_device_info *devinfo;
56901e04c3fSmrg        struct ra_regs *regs;
5707ec681f3Smrg        struct ra_class *reg_class_any[3];
5717ec681f3Smrg        struct ra_class *reg_class_r5[3];
5727ec681f3Smrg        struct ra_class *reg_class_phys[3];
5737ec681f3Smrg        struct ra_class *reg_class_phys_or_acc[3];
5747ec681f3Smrg};
5757ec681f3Smrg
5767ec681f3Smrg/**
5777ec681f3Smrg * This holds partially interpolated inputs as provided by hardware
5787ec681f3Smrg * (The Vp = A*(x - x0) + B*(y - y0) term), as well as the C coefficient
5797ec681f3Smrg * required to compute the final interpolated value.
5807ec681f3Smrg */
5817ec681f3Smrgstruct v3d_interp_input {
5827ec681f3Smrg   struct qreg vp;
5837ec681f3Smrg   struct qreg C;
5847ec681f3Smrg   unsigned mode; /* interpolation mode */
58501e04c3fSmrg};
58601e04c3fSmrg
58701e04c3fSmrgstruct v3d_compile {
58801e04c3fSmrg        const struct v3d_device_info *devinfo;
58901e04c3fSmrg        nir_shader *s;
59001e04c3fSmrg        nir_function_impl *impl;
59101e04c3fSmrg        struct exec_list *cf_node_list;
59201e04c3fSmrg        const struct v3d_compiler *compiler;
59301e04c3fSmrg
594ed98bd31Smaya        void (*debug_output)(const char *msg,
595ed98bd31Smaya                             void *debug_output_data);
596ed98bd31Smaya        void *debug_output_data;
597ed98bd31Smaya
59801e04c3fSmrg        /**
59901e04c3fSmrg         * Mapping from nir_register * or nir_ssa_def * to array of struct
60001e04c3fSmrg         * qreg for the values.
60101e04c3fSmrg         */
60201e04c3fSmrg        struct hash_table *def_ht;
60301e04c3fSmrg
60401e04c3fSmrg        /* For each temp, the instruction generating its value. */
60501e04c3fSmrg        struct qinst **defs;
60601e04c3fSmrg        uint32_t defs_array_size;
60701e04c3fSmrg
6087ec681f3Smrg        /* TMU pipelining tracking */
6097ec681f3Smrg        struct {
6107ec681f3Smrg                /* NIR registers that have been updated with a TMU operation
6117ec681f3Smrg                 * that has not been flushed yet.
6127ec681f3Smrg                 */
6137ec681f3Smrg                struct set *outstanding_regs;
6147ec681f3Smrg
6157ec681f3Smrg                uint32_t output_fifo_size;
6167ec681f3Smrg
6177ec681f3Smrg                struct {
6187ec681f3Smrg                        nir_dest *dest;
6197ec681f3Smrg                        uint8_t num_components;
6207ec681f3Smrg                        uint8_t component_mask;
6217ec681f3Smrg                } flush[MAX_TMU_QUEUE_SIZE];
6227ec681f3Smrg                uint32_t flush_count;
6237ec681f3Smrg        } tmu;
6247ec681f3Smrg
62501e04c3fSmrg        /**
62601e04c3fSmrg         * Inputs to the shader, arranged by TGSI declaration order.
62701e04c3fSmrg         *
62801e04c3fSmrg         * Not all fragment shader QFILE_VARY reads are present in this array.
62901e04c3fSmrg         */
63001e04c3fSmrg        struct qreg *inputs;
6317ec681f3Smrg        /**
6327ec681f3Smrg         * Partially interpolated inputs to the shader.
6337ec681f3Smrg         */
6347ec681f3Smrg        struct v3d_interp_input *interp;
63501e04c3fSmrg        struct qreg *outputs;
63601e04c3fSmrg        bool msaa_per_sample_output;
6377ec681f3Smrg        struct qreg color_reads[V3D_MAX_DRAW_BUFFERS * V3D_MAX_SAMPLES * 4];
6387ec681f3Smrg        struct qreg sample_colors[V3D_MAX_DRAW_BUFFERS * V3D_MAX_SAMPLES * 4];
63901e04c3fSmrg        uint32_t inputs_array_size;
64001e04c3fSmrg        uint32_t outputs_array_size;
64101e04c3fSmrg        uint32_t uniforms_array_size;
64201e04c3fSmrg
64301e04c3fSmrg        /* Booleans for whether the corresponding QFILE_VARY[i] is
64401e04c3fSmrg         * flat-shaded.  This includes gl_FragColor flat-shading, which is
64501e04c3fSmrg         * customized based on the shademodel_flat shader key.
64601e04c3fSmrg         */
64701e04c3fSmrg        uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
64801e04c3fSmrg
64901e04c3fSmrg        uint32_t noperspective_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
65001e04c3fSmrg
65101e04c3fSmrg        uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
65201e04c3fSmrg
65301e04c3fSmrg        bool uses_center_w;
654ed98bd31Smaya        bool writes_z;
6557ec681f3Smrg        bool uses_implicit_point_line_varyings;
6567ec681f3Smrg
6577ec681f3Smrg        /* True if a fragment shader reads gl_PrimitiveID */
6587ec681f3Smrg        bool fs_uses_primitive_id;
6597ec681f3Smrg
6607ec681f3Smrg        /* If the fragment shader does anything that requires to force
6617ec681f3Smrg         * per-sample MSAA, such as reading gl_SampleID.
6627ec681f3Smrg         */
6637ec681f3Smrg        bool force_per_sample_msaa;
6647ec681f3Smrg
6657ec681f3Smrg        /* Whether we are using the fallback scheduler. This will be set after
6667ec681f3Smrg         * register allocation has failed once.
6677ec681f3Smrg         */
6687ec681f3Smrg        bool fallback_scheduler;
6697ec681f3Smrg
6707ec681f3Smrg        /* Disable TMU pipelining. This may increase the chances of being able
6717ec681f3Smrg         * to compile shaders with high register pressure that require to emit
6727ec681f3Smrg         * TMU spills.
6737ec681f3Smrg         */
6747ec681f3Smrg        bool disable_tmu_pipelining;
6757ec681f3Smrg        bool pipelined_any_tmu;
6767ec681f3Smrg
6777ec681f3Smrg        /* Disable sorting of UBO loads with constant offset. This may
6787ec681f3Smrg         * increase the chances of being able to compile shaders with high
6797ec681f3Smrg         * register pressure.
6807ec681f3Smrg         */
6817ec681f3Smrg        bool disable_constant_ubo_load_sorting;
6827ec681f3Smrg        bool sorted_any_ubo_loads;
6837ec681f3Smrg
6847ec681f3Smrg        /* Emits ldunif for each new uniform, even if the uniform was already
6857ec681f3Smrg         * emitted in the same block. Useful to compile shaders with high
6867ec681f3Smrg         * register pressure or to disable the optimization during uniform
6877ec681f3Smrg         * spills.
6887ec681f3Smrg         */
6897ec681f3Smrg        bool disable_ldunif_opt;
6907ec681f3Smrg
6917ec681f3Smrg        /* Disables loop unrolling to reduce register pressure. */
6927ec681f3Smrg        bool disable_loop_unrolling;
6937ec681f3Smrg        bool unrolled_any_loops;
6947ec681f3Smrg
6957ec681f3Smrg        /* Minimum number of threads we are willing to use to register allocate
6967ec681f3Smrg         * a shader with the current compilation strategy. This only prevents
6977ec681f3Smrg         * us from lowering the thread count to register allocate successfully,
6987ec681f3Smrg         * which can be useful when we prefer doing other changes to the
6997ec681f3Smrg         * compilation strategy before dropping thread count.
7007ec681f3Smrg         */
7017ec681f3Smrg        uint32_t min_threads_for_reg_alloc;
7027ec681f3Smrg
7037ec681f3Smrg        /* Whether TMU spills are allowed. If this is disabled it may cause
7047ec681f3Smrg         * register allocation to fail. We set this to favor other compilation
7057ec681f3Smrg         * strategies that can reduce register pressure and hopefully reduce or
7067ec681f3Smrg         * eliminate TMU spills in the shader.
7077ec681f3Smrg         */
7087ec681f3Smrg        bool tmu_spilling_allowed;
7097ec681f3Smrg
7107ec681f3Smrg        /* The UBO index and block used with the last unifa load, as well as the
7117ec681f3Smrg         * current unifa offset *after* emitting that load. This is used to skip
7127ec681f3Smrg         * unifa writes (and their 3 delay slot) when the next UBO load reads
7137ec681f3Smrg         * right after the previous one in the same block.
7147ec681f3Smrg         */
7157ec681f3Smrg        struct qblock *current_unifa_block;
7167ec681f3Smrg        int32_t current_unifa_index;
7177ec681f3Smrg        uint32_t current_unifa_offset;
71801e04c3fSmrg
71901e04c3fSmrg        /* State for whether we're executing on each channel currently.  0 if
72001e04c3fSmrg         * yes, otherwise a block number + 1 that the channel jumped to.
72101e04c3fSmrg         */
72201e04c3fSmrg        struct qreg execute;
723ed98bd31Smaya        bool in_control_flow;
72401e04c3fSmrg
7257ec681f3Smrg        struct qreg line_x, point_x, point_y, primitive_id;
72601e04c3fSmrg
72701e04c3fSmrg        /**
72801e04c3fSmrg         * Instance ID, which comes in before the vertex attribute payload if
72901e04c3fSmrg         * the shader record requests it.
73001e04c3fSmrg         */
73101e04c3fSmrg        struct qreg iid;
73201e04c3fSmrg
73301e04c3fSmrg        /**
7347ec681f3Smrg         * Base Instance ID, which comes in before the vertex attribute payload
73501e04c3fSmrg         * (after Instance ID) if the shader record requests it.
73601e04c3fSmrg         */
7377ec681f3Smrg        struct qreg biid;
7387ec681f3Smrg
7397ec681f3Smrg        /**
7407ec681f3Smrg         * Vertex ID, which comes in before the vertex attribute payload
7417ec681f3Smrg         * (after Base Instance) if the shader record requests it.
7427ec681f3Smrg         */
74301e04c3fSmrg        struct qreg vid;
74401e04c3fSmrg
74501e04c3fSmrg        /* Fragment shader payload regs. */
74601e04c3fSmrg        struct qreg payload_w, payload_w_centroid, payload_z;
74701e04c3fSmrg
748ed98bd31Smaya        struct qreg cs_payload[2];
749ed98bd31Smaya        struct qreg cs_shared_offset;
750ed98bd31Smaya        int local_invocation_index_bits;
751ed98bd31Smaya
7527ec681f3Smrg        /* If the shader uses subgroup functionality */
7537ec681f3Smrg        bool has_subgroups;
7547ec681f3Smrg
755ed98bd31Smaya        uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
756ed98bd31Smaya        uint32_t vpm_output_size;
75701e04c3fSmrg
75801e04c3fSmrg        /* Size in bytes of registers that have been spilled. This is how much
75901e04c3fSmrg         * space needs to be available in the spill BO per thread per QPU.
76001e04c3fSmrg         */
76101e04c3fSmrg        uint32_t spill_size;
762ed98bd31Smaya        /* Shader-db stats */
763ed98bd31Smaya        uint32_t spills, fills, loops;
76401e04c3fSmrg        /**
76501e04c3fSmrg         * Register spilling's per-thread base address, shared between each
76601e04c3fSmrg         * spill/fill's addressing calculations.
76701e04c3fSmrg         */
76801e04c3fSmrg        struct qreg spill_base;
76901e04c3fSmrg        /* Bit vector of which temps may be spilled */
77001e04c3fSmrg        BITSET_WORD *spillable;
77101e04c3fSmrg
77201e04c3fSmrg        /**
77301e04c3fSmrg         * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
77401e04c3fSmrg         *
77501e04c3fSmrg         * This includes those that aren't part of the VPM varyings, like
77601e04c3fSmrg         * point/line coordinates.
77701e04c3fSmrg         */
77801e04c3fSmrg        struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
77901e04c3fSmrg
78001e04c3fSmrg        /**
78101e04c3fSmrg         * An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
78201e04c3fSmrg         * of the output is.  Used to emit from the VS in the order that the
78301e04c3fSmrg         * FS needs.
78401e04c3fSmrg         */
78501e04c3fSmrg        struct v3d_varying_slot *output_slots;
78601e04c3fSmrg
78701e04c3fSmrg        struct pipe_shader_state *shader_state;
78801e04c3fSmrg        struct v3d_key *key;
78901e04c3fSmrg        struct v3d_fs_key *fs_key;
7907ec681f3Smrg        struct v3d_gs_key *gs_key;
79101e04c3fSmrg        struct v3d_vs_key *vs_key;
79201e04c3fSmrg
79301e04c3fSmrg        /* Live ranges of temps. */
79401e04c3fSmrg        int *temp_start, *temp_end;
79501e04c3fSmrg        bool live_intervals_valid;
79601e04c3fSmrg
79701e04c3fSmrg        uint32_t *uniform_data;
79801e04c3fSmrg        enum quniform_contents *uniform_contents;
79901e04c3fSmrg        uint32_t uniform_array_size;
80001e04c3fSmrg        uint32_t num_uniforms;
80101e04c3fSmrg        uint32_t output_position_index;
80201e04c3fSmrg        nir_variable *output_color_var[4];
80301e04c3fSmrg        uint32_t output_sample_mask_index;
80401e04c3fSmrg
80501e04c3fSmrg        struct qreg undef;
80601e04c3fSmrg        uint32_t num_temps;
80701e04c3fSmrg
80801e04c3fSmrg        struct vir_cursor cursor;
80901e04c3fSmrg        struct list_head blocks;
81001e04c3fSmrg        int next_block_index;
81101e04c3fSmrg        struct qblock *cur_block;
81201e04c3fSmrg        struct qblock *loop_cont_block;
81301e04c3fSmrg        struct qblock *loop_break_block;
8147ec681f3Smrg        /**
8157ec681f3Smrg         * Which temp, if any, do we currently have in the flags?
8167ec681f3Smrg         * This is set when processing a comparison instruction, and
8177ec681f3Smrg         * reset to -1 by anything else that touches the flags.
8187ec681f3Smrg         */
8197ec681f3Smrg        int32_t flags_temp;
8207ec681f3Smrg        enum v3d_qpu_cond flags_cond;
82101e04c3fSmrg
82201e04c3fSmrg        uint64_t *qpu_insts;
82301e04c3fSmrg        uint32_t qpu_inst_count;
82401e04c3fSmrg        uint32_t qpu_inst_size;
8257ec681f3Smrg        uint32_t qpu_inst_stalled_count;
8267ec681f3Smrg        uint32_t nop_count;
82701e04c3fSmrg
82801e04c3fSmrg        /* For the FS, the number of varying inputs not counting the
82901e04c3fSmrg         * point/line varyings payload
83001e04c3fSmrg         */
83101e04c3fSmrg        uint32_t num_inputs;
83201e04c3fSmrg
83301e04c3fSmrg        uint32_t program_id;
83401e04c3fSmrg        uint32_t variant_id;
83501e04c3fSmrg
83601e04c3fSmrg        /* Set to compile program in in 1x, 2x, or 4x threaded mode, where
83701e04c3fSmrg         * SIG_THREAD_SWITCH is used to hide texturing latency at the cost of
83801e04c3fSmrg         * limiting ourselves to the part of the physical reg space.
83901e04c3fSmrg         *
84001e04c3fSmrg         * On V3D 3.x, 2x or 4x divide the physical reg space by 2x or 4x.  On
84101e04c3fSmrg         * V3D 4.x, all shaders are 2x threaded, and 4x only divides the
84201e04c3fSmrg         * physical reg space in half.
84301e04c3fSmrg         */
84401e04c3fSmrg        uint8_t threads;
84501e04c3fSmrg        struct qinst *last_thrsw;
84601e04c3fSmrg        bool last_thrsw_at_top_level;
84701e04c3fSmrg
8487ec681f3Smrg        bool emitted_tlb_load;
8497ec681f3Smrg        bool lock_scoreboard_on_first_thrsw;
8507ec681f3Smrg
8517ec681f3Smrg        /* Total number of spilled registers in the program */
8527ec681f3Smrg        uint32_t spill_count;
8537ec681f3Smrg
8547ec681f3Smrg        enum v3d_compilation_result compilation_result;
8557ec681f3Smrg
8567ec681f3Smrg        bool tmu_dirty_rcl;
85701e04c3fSmrg};
85801e04c3fSmrg
85901e04c3fSmrgstruct v3d_uniform_list {
86001e04c3fSmrg        enum quniform_contents *contents;
86101e04c3fSmrg        uint32_t *data;
86201e04c3fSmrg        uint32_t count;
86301e04c3fSmrg};
86401e04c3fSmrg
86501e04c3fSmrgstruct v3d_prog_data {
86601e04c3fSmrg        struct v3d_uniform_list uniforms;
86701e04c3fSmrg
86801e04c3fSmrg        uint32_t spill_size;
86901e04c3fSmrg
87001e04c3fSmrg        uint8_t threads;
87101e04c3fSmrg
87201e04c3fSmrg        /* For threads > 1, whether the program should be dispatched in the
87301e04c3fSmrg         * after-final-THRSW state.
87401e04c3fSmrg         */
87501e04c3fSmrg        bool single_seg;
8767ec681f3Smrg
8777ec681f3Smrg        bool tmu_dirty_rcl;
8787ec681f3Smrg
8797ec681f3Smrg        bool has_control_barrier;
88001e04c3fSmrg};
88101e04c3fSmrg
88201e04c3fSmrgstruct v3d_vs_prog_data {
88301e04c3fSmrg        struct v3d_prog_data base;
88401e04c3fSmrg
8857ec681f3Smrg        bool uses_iid, uses_biid, uses_vid;
88601e04c3fSmrg
88701e04c3fSmrg        /* Number of components read from each vertex attribute. */
888ed98bd31Smaya        uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
88901e04c3fSmrg
89001e04c3fSmrg        /* Total number of components read, for the shader state record. */
89101e04c3fSmrg        uint32_t vpm_input_size;
89201e04c3fSmrg
89301e04c3fSmrg        /* Total number of components written, for the shader state record. */
89401e04c3fSmrg        uint32_t vpm_output_size;
89501e04c3fSmrg
896ed98bd31Smaya        /* Set if there should be separate VPM segments for input and output.
897ed98bd31Smaya         * If unset, vpm_input_size will be 0.
898ed98bd31Smaya         */
899ed98bd31Smaya        bool separate_segments;
900ed98bd31Smaya
90101e04c3fSmrg        /* Value to be programmed in VCM_CACHE_SIZE. */
90201e04c3fSmrg        uint8_t vcm_cache_size;
9037ec681f3Smrg
9047ec681f3Smrg        /* Maps the nir->data.location to its
9057ec681f3Smrg         * nir->data.driver_location. In general we are using the
9067ec681f3Smrg         * driver location as index (like vattr_sizes above), so this
9077ec681f3Smrg         * map is useful when what we have is the location
9087ec681f3Smrg         *
9097ec681f3Smrg         * Returns -1 if the location is not used
9107ec681f3Smrg         */
9117ec681f3Smrg        int32_t driver_location_map[V3D_MAX_VS_INPUTS];
9127ec681f3Smrg};
9137ec681f3Smrg
9147ec681f3Smrgstruct v3d_gs_prog_data {
9157ec681f3Smrg        struct v3d_prog_data base;
9167ec681f3Smrg
9177ec681f3Smrg        /* Whether the program reads gl_PrimitiveIDIn */
9187ec681f3Smrg        bool uses_pid;
9197ec681f3Smrg
9207ec681f3Smrg        /* Number of components read from each input varying. */
9217ec681f3Smrg        uint8_t input_sizes[V3D_MAX_GS_INPUTS / 4];
9227ec681f3Smrg
9237ec681f3Smrg        /* Number of inputs */
9247ec681f3Smrg        uint8_t num_inputs;
9257ec681f3Smrg        struct v3d_varying_slot input_slots[V3D_MAX_GS_INPUTS];
9267ec681f3Smrg
9277ec681f3Smrg        /* Total number of components written, for the shader state record. */
9287ec681f3Smrg        uint32_t vpm_output_size;
9297ec681f3Smrg
9307ec681f3Smrg        /* Maximum SIMD dispatch width to not exceed VPM output size limits
9317ec681f3Smrg         * in the geometry shader. Notice that the final dispatch width has to
9327ec681f3Smrg         * be decided at draw time and could be lower based on the VPM pressure
9337ec681f3Smrg         * added by other shader stages.
9347ec681f3Smrg         */
9357ec681f3Smrg        uint8_t simd_width;
9367ec681f3Smrg
9377ec681f3Smrg        /* Output primitive type */
9387ec681f3Smrg        uint8_t out_prim_type;
9397ec681f3Smrg
9407ec681f3Smrg        /* Number of GS invocations */
9417ec681f3Smrg        uint8_t num_invocations;
9427ec681f3Smrg
9437ec681f3Smrg        bool writes_psiz;
94401e04c3fSmrg};
94501e04c3fSmrg
94601e04c3fSmrgstruct v3d_fs_prog_data {
94701e04c3fSmrg        struct v3d_prog_data base;
94801e04c3fSmrg
9497ec681f3Smrg        /* Whether the program reads gl_PrimitiveID */
9507ec681f3Smrg        bool uses_pid;
9517ec681f3Smrg
95201e04c3fSmrg        struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
95301e04c3fSmrg
95401e04c3fSmrg        /* Array of flat shade flags.
95501e04c3fSmrg         *
95601e04c3fSmrg         * Each entry is only 24 bits (high 8 bits 0), to match the hardware
95701e04c3fSmrg         * packet layout.
95801e04c3fSmrg         */
95901e04c3fSmrg        uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
96001e04c3fSmrg
96101e04c3fSmrg        uint32_t noperspective_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
96201e04c3fSmrg
96301e04c3fSmrg        uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
96401e04c3fSmrg
965ed98bd31Smaya        uint8_t num_inputs;
96601e04c3fSmrg        bool writes_z;
967ed98bd31Smaya        bool disable_ez;
96801e04c3fSmrg        bool uses_center_w;
9697ec681f3Smrg        bool uses_implicit_point_line_varyings;
9707ec681f3Smrg        bool lock_scoreboard_on_first_thrsw;
9717ec681f3Smrg        bool force_per_sample_msaa;
97201e04c3fSmrg};
97301e04c3fSmrg
974ed98bd31Smayastruct v3d_compute_prog_data {
975ed98bd31Smaya        struct v3d_prog_data base;
976ed98bd31Smaya        /* Size in bytes of the workgroup's shared space. */
977ed98bd31Smaya        uint32_t shared_size;
9787ec681f3Smrg        uint16_t local_size[3];
9797ec681f3Smrg        /* If the shader uses subgroup functionality */
9807ec681f3Smrg        bool has_subgroups;
9817ec681f3Smrg};
9827ec681f3Smrg
9837ec681f3Smrgstruct vpm_config {
9847ec681f3Smrg   uint32_t As;
9857ec681f3Smrg   uint32_t Vc;
9867ec681f3Smrg   uint32_t Gs;
9877ec681f3Smrg   uint32_t Gd;
9887ec681f3Smrg   uint32_t Gv;
9897ec681f3Smrg   uint32_t Ve;
9907ec681f3Smrg   uint32_t gs_width;
991ed98bd31Smaya};
992ed98bd31Smaya
9937ec681f3Smrgbool
9947ec681f3Smrgv3d_compute_vpm_config(struct v3d_device_info *devinfo,
9957ec681f3Smrg                       struct v3d_vs_prog_data *vs_bin,
9967ec681f3Smrg                       struct v3d_vs_prog_data *vs,
9977ec681f3Smrg                       struct v3d_gs_prog_data *gs_bin,
9987ec681f3Smrg                       struct v3d_gs_prog_data *gs,
9997ec681f3Smrg                       struct vpm_config *vpm_cfg_bin,
10007ec681f3Smrg                       struct vpm_config *vpm_cfg);
10017ec681f3Smrg
1002ed98bd31Smayastatic inline bool
1003ed98bd31Smayavir_has_uniform(struct qinst *inst)
1004ed98bd31Smaya{
1005ed98bd31Smaya        return inst->uniform != ~0;
1006ed98bd31Smaya}
1007ed98bd31Smaya
100801e04c3fSmrgconst struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
100901e04c3fSmrgvoid v3d_compiler_free(const struct v3d_compiler *compiler);
10107ec681f3Smrgvoid v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
101101e04c3fSmrg
1012ed98bd31Smayauint64_t *v3d_compile(const struct v3d_compiler *compiler,
1013ed98bd31Smaya                      struct v3d_key *key,
1014ed98bd31Smaya                      struct v3d_prog_data **prog_data,
1015ed98bd31Smaya                      nir_shader *s,
1016ed98bd31Smaya                      void (*debug_output)(const char *msg,
1017ed98bd31Smaya                                           void *debug_output_data),
1018ed98bd31Smaya                      void *debug_output_data,
1019ed98bd31Smaya                      int program_id, int variant_id,
1020ed98bd31Smaya                      uint32_t *final_assembly_size);
102101e04c3fSmrg
10227ec681f3Smrguint32_t v3d_prog_data_size(gl_shader_stage stage);
102301e04c3fSmrgvoid v3d_nir_to_vir(struct v3d_compile *c);
102401e04c3fSmrg
102501e04c3fSmrgvoid vir_compile_destroy(struct v3d_compile *c);
102601e04c3fSmrgconst char *vir_get_stage_name(struct v3d_compile *c);
102701e04c3fSmrgstruct qblock *vir_new_block(struct v3d_compile *c);
102801e04c3fSmrgvoid vir_set_emit_block(struct v3d_compile *c, struct qblock *block);
102901e04c3fSmrgvoid vir_link_blocks(struct qblock *predecessor, struct qblock *successor);
103001e04c3fSmrgstruct qblock *vir_entry_block(struct v3d_compile *c);
103101e04c3fSmrgstruct qblock *vir_exit_block(struct v3d_compile *c);
103201e04c3fSmrgstruct qinst *vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst,
103301e04c3fSmrg                           struct qreg src0, struct qreg src1);
103401e04c3fSmrgstruct qinst *vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst,
103501e04c3fSmrg                           struct qreg src0, struct qreg src1);
1036ed98bd31Smayastruct qinst *vir_branch_inst(struct v3d_compile *c,
1037ed98bd31Smaya                              enum v3d_qpu_branch_cond cond);
103801e04c3fSmrgvoid vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst);
1039ed98bd31Smayauint32_t vir_get_uniform_index(struct v3d_compile *c,
1040ed98bd31Smaya                               enum quniform_contents contents,
1041ed98bd31Smaya                               uint32_t data);
104201e04c3fSmrgstruct qreg vir_uniform(struct v3d_compile *c,
104301e04c3fSmrg                        enum quniform_contents contents,
104401e04c3fSmrg                        uint32_t data);
104501e04c3fSmrgvoid vir_schedule_instructions(struct v3d_compile *c);
1046ed98bd31Smayavoid v3d_setup_spill_base(struct v3d_compile *c);
104701e04c3fSmrgstruct v3d_qpu_instr v3d_qpu_nop(void);
104801e04c3fSmrg
104901e04c3fSmrgstruct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
105001e04c3fSmrgstruct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
105101e04c3fSmrgvoid vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
10527ec681f3Smrgenum v3d_qpu_cond vir_get_cond(struct qinst *inst);
10537ec681f3Smrgvoid vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf);
10547ec681f3Smrgvoid vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf);
105501e04c3fSmrgvoid vir_set_unpack(struct qinst *inst, int src,
105601e04c3fSmrg                    enum v3d_qpu_input_unpack unpack);
10577ec681f3Smrgvoid vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack);
105801e04c3fSmrg
105901e04c3fSmrgstruct qreg vir_get_temp(struct v3d_compile *c);
106001e04c3fSmrgvoid vir_calculate_live_intervals(struct v3d_compile *c);
106101e04c3fSmrgint vir_get_nsrc(struct qinst *inst);
106201e04c3fSmrgbool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst);
106301e04c3fSmrgbool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op);
106401e04c3fSmrgbool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op);
106501e04c3fSmrgbool vir_is_raw_mov(struct qinst *inst);
10667ec681f3Smrgbool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
106701e04c3fSmrgbool vir_is_add(struct qinst *inst);
106801e04c3fSmrgbool vir_is_mul(struct qinst *inst);
106901e04c3fSmrgbool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
107001e04c3fSmrgbool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
107101e04c3fSmrgstruct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
107201e04c3fSmrguint8_t vir_channels_written(struct qinst *inst);
107301e04c3fSmrgstruct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i);
107401e04c3fSmrgvoid ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
107501e04c3fSmrg                    struct qreg result);
10767ec681f3Smrgbool ntq_tmu_fifo_overflow(struct v3d_compile *c, uint32_t components);
10777ec681f3Smrgvoid ntq_add_pending_tmu_flush(struct v3d_compile *c, nir_dest *dest,
10787ec681f3Smrg                               uint32_t component_mask);
10797ec681f3Smrgvoid ntq_flush_tmu(struct v3d_compile *c);
108001e04c3fSmrgvoid vir_emit_thrsw(struct v3d_compile *c);
108101e04c3fSmrg
108201e04c3fSmrgvoid vir_dump(struct v3d_compile *c);
108301e04c3fSmrgvoid vir_dump_inst(struct v3d_compile *c, struct qinst *inst);
1084ed98bd31Smayavoid vir_dump_uniform(enum quniform_contents contents, uint32_t data);
108501e04c3fSmrg
108601e04c3fSmrgvoid vir_validate(struct v3d_compile *c);
108701e04c3fSmrg
108801e04c3fSmrgvoid vir_optimize(struct v3d_compile *c);
108901e04c3fSmrgbool vir_opt_algebraic(struct v3d_compile *c);
109001e04c3fSmrgbool vir_opt_constant_folding(struct v3d_compile *c);
109101e04c3fSmrgbool vir_opt_copy_propagate(struct v3d_compile *c);
109201e04c3fSmrgbool vir_opt_dead_code(struct v3d_compile *c);
109301e04c3fSmrgbool vir_opt_peephole_sf(struct v3d_compile *c);
1094ed98bd31Smayabool vir_opt_redundant_flags(struct v3d_compile *c);
109501e04c3fSmrgbool vir_opt_small_immediates(struct v3d_compile *c);
109601e04c3fSmrgbool vir_opt_vpm(struct v3d_compile *c);
10977ec681f3Smrgbool vir_opt_constant_alu(struct v3d_compile *c);
109801e04c3fSmrgvoid v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
109901e04c3fSmrgvoid v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
11007ec681f3Smrgvoid v3d_nir_lower_line_smooth(nir_shader *shader);
11017ec681f3Smrgvoid v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
11027ec681f3Smrgvoid v3d_nir_lower_robust_buffer_access(nir_shader *shader, struct v3d_compile *c);
1103ed98bd31Smayavoid v3d_nir_lower_scratch(nir_shader *s);
110401e04c3fSmrgvoid v3d_nir_lower_txf_ms(nir_shader *s, struct v3d_compile *c);
1105ed98bd31Smayavoid v3d_nir_lower_image_load_store(nir_shader *s);
110601e04c3fSmrgvoid vir_lower_uniforms(struct v3d_compile *c);
110701e04c3fSmrg
110801e04c3fSmrgvoid v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
110901e04c3fSmrgvoid v3d33_vir_vpm_write_setup(struct v3d_compile *c);
111001e04c3fSmrgvoid v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
111101e04c3fSmrgvoid v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
1112ed98bd31Smayavoid v3d40_vir_emit_image_load_store(struct v3d_compile *c,
1113ed98bd31Smaya                                     nir_intrinsic_instr *instr);
111401e04c3fSmrg
111501e04c3fSmrgvoid v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
111601e04c3fSmrguint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
111701e04c3fSmrgvoid qpu_validate(struct v3d_compile *c);
111801e04c3fSmrgstruct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled);
111901e04c3fSmrgbool vir_init_reg_sets(struct v3d_compiler *compiler);
112001e04c3fSmrg
11217ec681f3Smrgint v3d_shaderdb_dump(struct v3d_compile *c, char **shaderdb_str);
11227ec681f3Smrg
1123ed98bd31Smayabool v3d_gl_format_is_return_32(GLenum format);
112401e04c3fSmrg
11257ec681f3Smrguint32_t
11267ec681f3Smrgv3d_get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src);
11277ec681f3Smrg
112801e04c3fSmrgstatic inline bool
112901e04c3fSmrgquniform_contents_is_texture_p0(enum quniform_contents contents)
113001e04c3fSmrg{
113101e04c3fSmrg        return (contents >= QUNIFORM_TEXTURE_CONFIG_P0_0 &&
113201e04c3fSmrg                contents < (QUNIFORM_TEXTURE_CONFIG_P0_0 +
113301e04c3fSmrg                            V3D_MAX_TEXTURE_SAMPLERS));
113401e04c3fSmrg}
113501e04c3fSmrg
1136ed98bd31Smayastatic inline bool
1137ed98bd31Smayavir_in_nonuniform_control_flow(struct v3d_compile *c)
1138ed98bd31Smaya{
1139ed98bd31Smaya        return c->execute.file != QFILE_NULL;
1140ed98bd31Smaya}
1141ed98bd31Smaya
114201e04c3fSmrgstatic inline struct qreg
114301e04c3fSmrgvir_uniform_ui(struct v3d_compile *c, uint32_t ui)
114401e04c3fSmrg{
114501e04c3fSmrg        return vir_uniform(c, QUNIFORM_CONSTANT, ui);
114601e04c3fSmrg}
114701e04c3fSmrg
114801e04c3fSmrgstatic inline struct qreg
114901e04c3fSmrgvir_uniform_f(struct v3d_compile *c, float f)
115001e04c3fSmrg{
115101e04c3fSmrg        return vir_uniform(c, QUNIFORM_CONSTANT, fui(f));
115201e04c3fSmrg}
115301e04c3fSmrg
115401e04c3fSmrg#define VIR_ALU0(name, vir_inst, op)                                     \
115501e04c3fSmrgstatic inline struct qreg                                                \
115601e04c3fSmrgvir_##name(struct v3d_compile *c)                                        \
115701e04c3fSmrg{                                                                        \
115801e04c3fSmrg        return vir_emit_def(c, vir_inst(op, c->undef,                    \
115901e04c3fSmrg                                        c->undef, c->undef));            \
116001e04c3fSmrg}                                                                        \
116101e04c3fSmrgstatic inline struct qinst *                                             \
116201e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest)               \
116301e04c3fSmrg{                                                                        \
116401e04c3fSmrg        return vir_emit_nondef(c, vir_inst(op, dest,                     \
116501e04c3fSmrg                                           c->undef, c->undef));         \
116601e04c3fSmrg}
116701e04c3fSmrg
116801e04c3fSmrg#define VIR_ALU1(name, vir_inst, op)                                     \
116901e04c3fSmrgstatic inline struct qreg                                                \
117001e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a)                         \
117101e04c3fSmrg{                                                                        \
117201e04c3fSmrg        return vir_emit_def(c, vir_inst(op, c->undef,                    \
117301e04c3fSmrg                                        a, c->undef));                   \
117401e04c3fSmrg}                                                                        \
117501e04c3fSmrgstatic inline struct qinst *                                             \
117601e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest,               \
117701e04c3fSmrg                  struct qreg a)                                         \
117801e04c3fSmrg{                                                                        \
117901e04c3fSmrg        return vir_emit_nondef(c, vir_inst(op, dest, a,          \
118001e04c3fSmrg                                           c->undef));                   \
118101e04c3fSmrg}
118201e04c3fSmrg
118301e04c3fSmrg#define VIR_ALU2(name, vir_inst, op)                                       \
118401e04c3fSmrgstatic inline struct qreg                                                \
118501e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a, struct qreg b)          \
118601e04c3fSmrg{                                                                        \
118701e04c3fSmrg        return vir_emit_def(c, vir_inst(op, c->undef, a, b));    \
118801e04c3fSmrg}                                                                        \
118901e04c3fSmrgstatic inline struct qinst *                                             \
119001e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest,               \
119101e04c3fSmrg                  struct qreg a, struct qreg b)                          \
119201e04c3fSmrg{                                                                        \
119301e04c3fSmrg        return vir_emit_nondef(c, vir_inst(op, dest, a, b));     \
119401e04c3fSmrg}
119501e04c3fSmrg
119601e04c3fSmrg#define VIR_NODST_0(name, vir_inst, op)                                 \
119701e04c3fSmrgstatic inline struct qinst *                                            \
119801e04c3fSmrgvir_##name(struct v3d_compile *c)                                       \
119901e04c3fSmrg{                                                                       \
120001e04c3fSmrg        return vir_emit_nondef(c, vir_inst(op, c->undef,                \
120101e04c3fSmrg                                           c->undef, c->undef));        \
120201e04c3fSmrg}
120301e04c3fSmrg
120401e04c3fSmrg#define VIR_NODST_1(name, vir_inst, op)                                               \
120501e04c3fSmrgstatic inline struct qinst *                                            \
120601e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a)                        \
120701e04c3fSmrg{                                                                       \
120801e04c3fSmrg        return vir_emit_nondef(c, vir_inst(op, c->undef,        \
120901e04c3fSmrg                                           a, c->undef));               \
121001e04c3fSmrg}
121101e04c3fSmrg
121201e04c3fSmrg#define VIR_NODST_2(name, vir_inst, op)                                               \
121301e04c3fSmrgstatic inline struct qinst *                                            \
121401e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a, struct qreg b)         \
121501e04c3fSmrg{                                                                       \
121601e04c3fSmrg        return vir_emit_nondef(c, vir_inst(op, c->undef,                \
121701e04c3fSmrg                                           a, b));                      \
121801e04c3fSmrg}
121901e04c3fSmrg
122001e04c3fSmrg#define VIR_SFU(name)                                                      \
122101e04c3fSmrgstatic inline struct qreg                                                \
122201e04c3fSmrgvir_##name(struct v3d_compile *c, struct qreg a)                         \
122301e04c3fSmrg{                                                                        \
122401e04c3fSmrg        if (c->devinfo->ver >= 41) {                                     \
122501e04c3fSmrg                return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name,    \
122601e04c3fSmrg                                                    c->undef,            \
122701e04c3fSmrg                                                    a, c->undef));       \
122801e04c3fSmrg        } else {                                                         \
122901e04c3fSmrg                vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
123001e04c3fSmrg                return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
123101e04c3fSmrg        }                                                                \
123201e04c3fSmrg}                                                                        \
123301e04c3fSmrgstatic inline struct qinst *                                             \
123401e04c3fSmrgvir_##name##_dest(struct v3d_compile *c, struct qreg dest,               \
123501e04c3fSmrg                  struct qreg a)                                         \
123601e04c3fSmrg{                                                                        \
123701e04c3fSmrg        if (c->devinfo->ver >= 41) {                                     \
123801e04c3fSmrg                return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
123901e04c3fSmrg                                                       dest,             \
124001e04c3fSmrg                                                       a, c->undef));    \
124101e04c3fSmrg        } else {                                                         \
124201e04c3fSmrg                vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
124301e04c3fSmrg                return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
124401e04c3fSmrg        }                                                                \
124501e04c3fSmrg}
124601e04c3fSmrg
124701e04c3fSmrg#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
124801e04c3fSmrg#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
124901e04c3fSmrg#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
125001e04c3fSmrg#define VIR_M_ALU1(name) VIR_ALU1(name, vir_mul_inst, V3D_QPU_M_##name)
125101e04c3fSmrg#define VIR_A_ALU0(name) VIR_ALU0(name, vir_add_inst, V3D_QPU_A_##name)
125201e04c3fSmrg#define VIR_M_ALU0(name) VIR_ALU0(name, vir_mul_inst, V3D_QPU_M_##name)
125301e04c3fSmrg#define VIR_A_NODST_2(name) VIR_NODST_2(name, vir_add_inst, V3D_QPU_A_##name)
125401e04c3fSmrg#define VIR_M_NODST_2(name) VIR_NODST_2(name, vir_mul_inst, V3D_QPU_M_##name)
125501e04c3fSmrg#define VIR_A_NODST_1(name) VIR_NODST_1(name, vir_add_inst, V3D_QPU_A_##name)
125601e04c3fSmrg#define VIR_M_NODST_1(name) VIR_NODST_1(name, vir_mul_inst, V3D_QPU_M_##name)
125701e04c3fSmrg#define VIR_A_NODST_0(name) VIR_NODST_0(name, vir_add_inst, V3D_QPU_A_##name)
125801e04c3fSmrg
125901e04c3fSmrgVIR_A_ALU2(FADD)
126001e04c3fSmrgVIR_A_ALU2(VFPACK)
126101e04c3fSmrgVIR_A_ALU2(FSUB)
126201e04c3fSmrgVIR_A_ALU2(FMIN)
126301e04c3fSmrgVIR_A_ALU2(FMAX)
126401e04c3fSmrg
126501e04c3fSmrgVIR_A_ALU2(ADD)
126601e04c3fSmrgVIR_A_ALU2(SUB)
126701e04c3fSmrgVIR_A_ALU2(SHL)
126801e04c3fSmrgVIR_A_ALU2(SHR)
126901e04c3fSmrgVIR_A_ALU2(ASR)
127001e04c3fSmrgVIR_A_ALU2(ROR)
127101e04c3fSmrgVIR_A_ALU2(MIN)
127201e04c3fSmrgVIR_A_ALU2(MAX)
127301e04c3fSmrgVIR_A_ALU2(UMIN)
127401e04c3fSmrgVIR_A_ALU2(UMAX)
127501e04c3fSmrgVIR_A_ALU2(AND)
127601e04c3fSmrgVIR_A_ALU2(OR)
127701e04c3fSmrgVIR_A_ALU2(XOR)
127801e04c3fSmrgVIR_A_ALU2(VADD)
127901e04c3fSmrgVIR_A_ALU2(VSUB)
128001e04c3fSmrgVIR_A_NODST_2(STVPMV)
12817ec681f3SmrgVIR_A_NODST_2(STVPMD)
128201e04c3fSmrgVIR_A_ALU1(NOT)
128301e04c3fSmrgVIR_A_ALU1(NEG)
128401e04c3fSmrgVIR_A_ALU1(FLAPUSH)
128501e04c3fSmrgVIR_A_ALU1(FLBPUSH)
128601e04c3fSmrgVIR_A_ALU1(FLPOP)
12877ec681f3SmrgVIR_A_ALU0(FLAFIRST)
12887ec681f3SmrgVIR_A_ALU0(FLNAFIRST)
128901e04c3fSmrgVIR_A_ALU1(SETMSF)
129001e04c3fSmrgVIR_A_ALU1(SETREVF)
129101e04c3fSmrgVIR_A_ALU0(TIDX)
129201e04c3fSmrgVIR_A_ALU0(EIDX)
129301e04c3fSmrgVIR_A_ALU1(LDVPMV_IN)
129401e04c3fSmrgVIR_A_ALU1(LDVPMV_OUT)
12957ec681f3SmrgVIR_A_ALU1(LDVPMD_IN)
12967ec681f3SmrgVIR_A_ALU1(LDVPMD_OUT)
12977ec681f3SmrgVIR_A_ALU2(LDVPMG_IN)
12987ec681f3SmrgVIR_A_ALU2(LDVPMG_OUT)
129901e04c3fSmrgVIR_A_ALU0(TMUWT)
130001e04c3fSmrg
13017ec681f3SmrgVIR_A_ALU0(IID)
130201e04c3fSmrgVIR_A_ALU0(FXCD)
130301e04c3fSmrgVIR_A_ALU0(XCD)
130401e04c3fSmrgVIR_A_ALU0(FYCD)
130501e04c3fSmrgVIR_A_ALU0(YCD)
130601e04c3fSmrgVIR_A_ALU0(MSF)
130701e04c3fSmrgVIR_A_ALU0(REVF)
1308ed98bd31SmayaVIR_A_ALU0(BARRIERID)
13097ec681f3SmrgVIR_A_ALU0(SAMPID)
131001e04c3fSmrgVIR_A_NODST_1(VPMSETUP)
131101e04c3fSmrgVIR_A_NODST_0(VPMWT)
131201e04c3fSmrgVIR_A_ALU2(FCMP)
131301e04c3fSmrgVIR_A_ALU2(VFMAX)
131401e04c3fSmrg
131501e04c3fSmrgVIR_A_ALU1(FROUND)
131601e04c3fSmrgVIR_A_ALU1(FTOIN)
131701e04c3fSmrgVIR_A_ALU1(FTRUNC)
131801e04c3fSmrgVIR_A_ALU1(FTOIZ)
131901e04c3fSmrgVIR_A_ALU1(FFLOOR)
132001e04c3fSmrgVIR_A_ALU1(FTOUZ)
132101e04c3fSmrgVIR_A_ALU1(FCEIL)
132201e04c3fSmrgVIR_A_ALU1(FTOC)
132301e04c3fSmrg
132401e04c3fSmrgVIR_A_ALU1(FDX)
132501e04c3fSmrgVIR_A_ALU1(FDY)
132601e04c3fSmrg
132701e04c3fSmrgVIR_A_ALU1(ITOF)
132801e04c3fSmrgVIR_A_ALU1(CLZ)
132901e04c3fSmrgVIR_A_ALU1(UTOF)
133001e04c3fSmrg
133101e04c3fSmrgVIR_M_ALU2(UMUL24)
133201e04c3fSmrgVIR_M_ALU2(FMUL)
133301e04c3fSmrgVIR_M_ALU2(SMUL24)
133401e04c3fSmrgVIR_M_NODST_2(MULTOP)
133501e04c3fSmrg
133601e04c3fSmrgVIR_M_ALU1(MOV)
133701e04c3fSmrgVIR_M_ALU1(FMOV)
133801e04c3fSmrg
133901e04c3fSmrgVIR_SFU(RECIP)
134001e04c3fSmrgVIR_SFU(RSQRT)
134101e04c3fSmrgVIR_SFU(EXP)
134201e04c3fSmrgVIR_SFU(LOG)
134301e04c3fSmrgVIR_SFU(SIN)
134401e04c3fSmrgVIR_SFU(RSQRT2)
134501e04c3fSmrg
134601e04c3fSmrgstatic inline struct qinst *
134701e04c3fSmrgvir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
134801e04c3fSmrg             struct qreg dest, struct qreg src)
134901e04c3fSmrg{
135001e04c3fSmrg        struct qinst *mov = vir_MOV_dest(c, dest, src);
135101e04c3fSmrg        vir_set_cond(mov, cond);
135201e04c3fSmrg        return mov;
135301e04c3fSmrg}
135401e04c3fSmrg
135501e04c3fSmrgstatic inline struct qreg
135601e04c3fSmrgvir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond,
135701e04c3fSmrg        struct qreg src0, struct qreg src1)
135801e04c3fSmrg{
135901e04c3fSmrg        struct qreg t = vir_get_temp(c);
136001e04c3fSmrg        vir_MOV_dest(c, t, src1);
136101e04c3fSmrg        vir_MOV_cond(c, cond, t, src0);
136201e04c3fSmrg        return t;
136301e04c3fSmrg}
136401e04c3fSmrg
136501e04c3fSmrgstatic inline struct qinst *
136601e04c3fSmrgvir_NOP(struct v3d_compile *c)
136701e04c3fSmrg{
136801e04c3fSmrg        return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_NOP,
136901e04c3fSmrg                                               c->undef, c->undef, c->undef));
137001e04c3fSmrg}
137101e04c3fSmrg
137201e04c3fSmrgstatic inline struct qreg
137301e04c3fSmrgvir_LDTMU(struct v3d_compile *c)
137401e04c3fSmrg{
137501e04c3fSmrg        if (c->devinfo->ver >= 41) {
137601e04c3fSmrg                struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
137701e04c3fSmrg                                                   c->undef, c->undef);
137801e04c3fSmrg                ldtmu->qpu.sig.ldtmu = true;
137901e04c3fSmrg
138001e04c3fSmrg                return vir_emit_def(c, ldtmu);
138101e04c3fSmrg        } else {
138201e04c3fSmrg                vir_NOP(c)->qpu.sig.ldtmu = true;
138301e04c3fSmrg                return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
138401e04c3fSmrg        }
138501e04c3fSmrg}
138601e04c3fSmrg
138701e04c3fSmrgstatic inline struct qreg
138801e04c3fSmrgvir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1)
138901e04c3fSmrg{
139001e04c3fSmrg        vir_MULTOP(c, src0, src1);
139101e04c3fSmrg        return vir_UMUL24(c, src0, src1);
139201e04c3fSmrg}
139301e04c3fSmrg
139401e04c3fSmrgstatic inline struct qreg
13957ec681f3Smrgvir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
139601e04c3fSmrg{
13977ec681f3Smrg        assert(c->devinfo->ver >= 41); /* XXX */
13987ec681f3Smrg        assert((config & 0xffffff00) == 0xffffff00);
13997ec681f3Smrg
14007ec681f3Smrg        struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
14017ec681f3Smrg                                           c->undef, c->undef);
14027ec681f3Smrg        ldtlb->qpu.sig.ldtlbu = true;
14037ec681f3Smrg        ldtlb->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, config);
14047ec681f3Smrg        return vir_emit_def(c, ldtlb);
140501e04c3fSmrg}
140601e04c3fSmrg
140701e04c3fSmrgstatic inline struct qreg
14087ec681f3Smrgvir_TLB_COLOR_READ(struct v3d_compile *c)
140901e04c3fSmrg{
14107ec681f3Smrg        assert(c->devinfo->ver >= 41); /* XXX */
14117ec681f3Smrg
14127ec681f3Smrg        struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
14137ec681f3Smrg                                           c->undef, c->undef);
14147ec681f3Smrg        ldtlb->qpu.sig.ldtlb = true;
14157ec681f3Smrg        return vir_emit_def(c, ldtlb);
141601e04c3fSmrg}
141701e04c3fSmrg
141801e04c3fSmrgstatic inline struct qinst *
1419ed98bd31Smayavir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
142001e04c3fSmrg{
142101e04c3fSmrg        /* The actual uniform_data value will be set at scheduling time */
1422ed98bd31Smaya        return vir_emit_nondef(c, vir_branch_inst(c, cond));
142301e04c3fSmrg}
142401e04c3fSmrg
142501e04c3fSmrg#define vir_for_each_block(block, c)                                    \
142601e04c3fSmrg        list_for_each_entry(struct qblock, block, &c->blocks, link)
142701e04c3fSmrg
142801e04c3fSmrg#define vir_for_each_block_rev(block, c)                                \
142901e04c3fSmrg        list_for_each_entry_rev(struct qblock, block, &c->blocks, link)
143001e04c3fSmrg
143101e04c3fSmrg/* Loop over the non-NULL members of the successors array. */
143201e04c3fSmrg#define vir_for_each_successor(succ, block)                             \
143301e04c3fSmrg        for (struct qblock *succ = block->successors[0];                \
143401e04c3fSmrg             succ != NULL;                                              \
143501e04c3fSmrg             succ = (succ == block->successors[1] ? NULL :              \
143601e04c3fSmrg                     block->successors[1]))
143701e04c3fSmrg
143801e04c3fSmrg#define vir_for_each_inst(inst, block)                                  \
143901e04c3fSmrg        list_for_each_entry(struct qinst, inst, &block->instructions, link)
144001e04c3fSmrg
144101e04c3fSmrg#define vir_for_each_inst_rev(inst, block)                                  \
144201e04c3fSmrg        list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)
144301e04c3fSmrg
144401e04c3fSmrg#define vir_for_each_inst_safe(inst, block)                             \
144501e04c3fSmrg        list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)
144601e04c3fSmrg
144701e04c3fSmrg#define vir_for_each_inst_inorder(inst, c)                              \
144801e04c3fSmrg        vir_for_each_block(_block, c)                                   \
144901e04c3fSmrg                vir_for_each_inst(inst, _block)
145001e04c3fSmrg
1451ed98bd31Smaya#define vir_for_each_inst_inorder_safe(inst, c)                         \
1452ed98bd31Smaya        vir_for_each_block(_block, c)                                   \
1453ed98bd31Smaya                vir_for_each_inst_safe(inst, _block)
1454ed98bd31Smaya
145501e04c3fSmrg#endif /* V3D_COMPILER_H */
1456