Home | History | Annotate | Line # | Download | only in ir3
      1 /*
      2  * Copyright (C) 2013 Rob Clark <robclark (at) freedesktop.org>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors:
     24  *    Rob Clark <robclark (at) freedesktop.org>
     25  */
     26 
     27 #ifndef IR3_COMPILER_H_
     28 #define IR3_COMPILER_H_
     29 
     30 #include "util/disk_cache.h"
     31 #include "util/log.h"
     32 
     33 #include "freedreno_dev_info.h"
     34 
     35 #include "ir3.h"
     36 
     37 struct ir3_ra_reg_set;
     38 struct ir3_shader;
     39 
     40 struct ir3_compiler {
     41    struct fd_device *dev;
     42    const struct fd_dev_id *dev_id;
     43    uint8_t gen;
     44    uint32_t shader_count;
     45 
     46    struct disk_cache *disk_cache;
     47 
     48    /* If true, UBO accesses are assumed to be bounds-checked as defined by
     49     * VK_EXT_robustness2 and optimizations may have to be more conservative.
     50     */
     51    bool robust_ubo_access;
     52 
     53    /*
     54     * Configuration options for things that are handled differently on
     55     * different generations:
     56     */
     57 
     58    /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
     59     * so we need to use ldlv.u32 to load the varying directly:
     60     */
     61    bool flat_bypass;
     62 
     63    /* on a3xx, we need to add one to # of array levels:
     64     */
     65    bool levels_add_one;
     66 
     67    /* on a3xx, we need to scale up integer coords for isaml based
     68     * on LoD:
     69     */
     70    bool unminify_coords;
     71 
     72    /* on a3xx do txf_ms w/ isaml and scaled coords: */
     73    bool txf_ms_with_isaml;
     74 
     75    /* on a4xx, for array textures we need to add 0.5 to the array
     76     * index coordinate:
     77     */
     78    bool array_index_add_half;
     79 
     80    /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders:
     81     */
     82    bool samgq_workaround;
     83 
     84    /* on a650, vertex shader <-> tess control io uses LDL/STL */
     85    bool tess_use_shared;
     86 
     87    /* The maximum number of constants, in vec4's, across the entire graphics
     88     * pipeline.
     89     */
     90    uint16_t max_const_pipeline;
     91 
     92    /* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */
     93    uint16_t max_const_geom;
     94 
     95    /* The maximum number of constants, in vec4's, for FS. */
     96    uint16_t max_const_frag;
     97 
     98    /* A "safe" max constlen that can be applied to each shader in the
     99     * pipeline which we guarantee will never exceed any combined limits.
    100     */
    101    uint16_t max_const_safe;
    102 
    103    /* The maximum number of constants, in vec4's, for compute shaders. */
    104    uint16_t max_const_compute;
    105 
    106    /* Number of instructions that the shader's base address and length
    107     * (instrlen divides instruction count by this) must be aligned to.
    108     */
    109    uint32_t instr_align;
    110 
    111    /* on a3xx, the unit of indirect const load is higher than later gens (in
    112     * vec4 units):
    113     */
    114    uint32_t const_upload_unit;
    115 
    116    /* The base number of threads per wave. Some stages may be able to double
    117     * this.
    118     */
    119    uint32_t threadsize_base;
    120 
    121    /* On at least a6xx, waves are always launched in pairs. In calculations
    122     * about occupancy, we pretend that each wave pair is actually one wave,
    123     * which simplifies many of the calculations, but means we have to
    124     * multiply threadsize_base by this number.
    125     */
    126    uint32_t wave_granularity;
    127 
    128    /* The maximum number of simultaneous waves per core. */
    129    uint32_t max_waves;
    130 
    131    /* This is theoretical maximum number of vec4 registers that one wave of
    132     * the base threadsize could use. To get the actual size of the register
    133     * file in bytes one would need to compute:
    134     *
    135     * reg_size_vec4 * threadsize_base * wave_granularity * 16 (bytes per vec4)
    136     *
    137     * However this number is more often what we actually need. For example, a
    138     * max_reg more than half of this will result in a doubled threadsize
    139     * being impossible (because double-sized waves take up twice as many
    140     * registers). Also, the formula for the occupancy given a particular
    141     * register footprint is simpler.
    142     *
    143     * It is in vec4 units because the register file is allocated
    144     * with vec4 granularity, so it's in the same units as max_reg.
    145     */
    146    uint32_t reg_size_vec4;
    147 
    148    /* The size of local memory in bytes */
    149    uint32_t local_mem_size;
    150 
    151    /* The number of total branch stack entries, divided by wave_granularity. */
    152    uint32_t branchstack_size;
    153 
    154    /* Whether clip+cull distances are supported */
    155    bool has_clip_cull;
    156 
    157    /* Whether private memory is supported */
    158    bool has_pvtmem;
    159 
    160    /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
    161    bool storage_16bit;
    162 };
    163 
    164 void ir3_compiler_destroy(struct ir3_compiler *compiler);
    165 struct ir3_compiler *ir3_compiler_create(struct fd_device *dev,
    166                                          const struct fd_dev_id *dev_id,
    167                                          bool robust_ubo_access);
    168 
    169 void ir3_disk_cache_init(struct ir3_compiler *compiler);
    170 void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
    171                                     struct ir3_shader *shader);
    172 bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
    173                              struct ir3_shader_variant *v);
    174 void ir3_disk_cache_store(struct ir3_compiler *compiler,
    175                           struct ir3_shader_variant *v);
    176 
    177 int ir3_compile_shader_nir(struct ir3_compiler *compiler,
    178                            struct ir3_shader_variant *so);
    179 
    180 /* gpu pointer size in units of 32bit registers/slots */
    181 static inline unsigned
    182 ir3_pointer_size(struct ir3_compiler *compiler)
    183 {
    184    return fd_dev_64b(compiler->dev_id) ? 2 : 1;
    185 }
    186 
    187 enum ir3_shader_debug {
    188    IR3_DBG_SHADER_VS = BITFIELD_BIT(0),
    189    IR3_DBG_SHADER_TCS = BITFIELD_BIT(1),
    190    IR3_DBG_SHADER_TES = BITFIELD_BIT(2),
    191    IR3_DBG_SHADER_GS = BITFIELD_BIT(3),
    192    IR3_DBG_SHADER_FS = BITFIELD_BIT(4),
    193    IR3_DBG_SHADER_CS = BITFIELD_BIT(5),
    194    IR3_DBG_DISASM = BITFIELD_BIT(6),
    195    IR3_DBG_OPTMSGS = BITFIELD_BIT(7),
    196    IR3_DBG_FORCES2EN = BITFIELD_BIT(8),
    197    IR3_DBG_NOUBOOPT = BITFIELD_BIT(9),
    198    IR3_DBG_NOFP16 = BITFIELD_BIT(10),
    199    IR3_DBG_NOCACHE = BITFIELD_BIT(11),
    200    IR3_DBG_SPILLALL = BITFIELD_BIT(12),
    201 
    202    /* DEBUG-only options: */
    203    IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
    204    IR3_DBG_RAMSGS = BITFIELD_BIT(21),
    205 
    206    /* Only used for the disk-caching logic: */
    207    IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30),
    208 };
    209 
    210 extern enum ir3_shader_debug ir3_shader_debug;
    211 extern const char *ir3_shader_override_path;
    212 
    213 static inline bool
    214 shader_debug_enabled(gl_shader_stage type)
    215 {
    216    if (ir3_shader_debug & IR3_DBG_DISASM)
    217       return true;
    218 
    219    switch (type) {
    220    case MESA_SHADER_VERTEX:
    221       return !!(ir3_shader_debug & IR3_DBG_SHADER_VS);
    222    case MESA_SHADER_TESS_CTRL:
    223       return !!(ir3_shader_debug & IR3_DBG_SHADER_TCS);
    224    case MESA_SHADER_TESS_EVAL:
    225       return !!(ir3_shader_debug & IR3_DBG_SHADER_TES);
    226    case MESA_SHADER_GEOMETRY:
    227       return !!(ir3_shader_debug & IR3_DBG_SHADER_GS);
    228    case MESA_SHADER_FRAGMENT:
    229       return !!(ir3_shader_debug & IR3_DBG_SHADER_FS);
    230    case MESA_SHADER_COMPUTE:
    231       return !!(ir3_shader_debug & IR3_DBG_SHADER_CS);
    232    default:
    233       debug_assert(0);
    234       return false;
    235    }
    236 }
    237 
    238 static inline void
    239 ir3_debug_print(struct ir3 *ir, const char *when)
    240 {
    241    if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
    242       mesa_logi("%s:", when);
    243       ir3_print(ir);
    244    }
    245 }
    246 
    247 #endif /* IR3_COMPILER_H_ */
    248