Home | History | Annotate | Line # | Download | only in ir3
      1 /*
      2  * Copyright (C) 2015 Rob Clark <robclark (at) freedesktop.org>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors:
     24  *    Rob Clark <robclark (at) freedesktop.org>
     25  */
     26 
     27 #include "util/ralloc.h"
     28 
     29 #include "freedreno_dev_info.h"
     30 
     31 #include "ir3_compiler.h"
     32 
     33 static const struct debug_named_value shader_debug_options[] = {
     34    /* clang-format off */
     35    {"vs",         IR3_DBG_SHADER_VS,  "Print shader disasm for vertex shaders"},
     36    {"tcs",        IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"},
     37    {"tes",        IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"},
     38    {"gs",         IR3_DBG_SHADER_GS,  "Print shader disasm for geometry shaders"},
     39    {"fs",         IR3_DBG_SHADER_FS,  "Print shader disasm for fragment shaders"},
     40    {"cs",         IR3_DBG_SHADER_CS,  "Print shader disasm for compute shaders"},
     41    {"disasm",     IR3_DBG_DISASM,     "Dump NIR and adreno shader disassembly"},
     42    {"optmsgs",    IR3_DBG_OPTMSGS,    "Enable optimizer debug messages"},
     43    {"forces2en",  IR3_DBG_FORCES2EN,  "Force s2en mode for tex sampler instructions"},
     44    {"nouboopt",   IR3_DBG_NOUBOOPT,   "Disable lowering UBO to uniform"},
     45    {"nofp16",     IR3_DBG_NOFP16,     "Don't lower mediump to fp16"},
     46    {"nocache",    IR3_DBG_NOCACHE,    "Disable shader cache"},
     47    {"spillall",   IR3_DBG_SPILLALL,   "Spill as much as possible to test the spiller"},
     48 #ifdef DEBUG
     49    /* DEBUG-only options: */
     50    {"schedmsgs",  IR3_DBG_SCHEDMSGS,  "Enable scheduler debug messages"},
     51    {"ramsgs",     IR3_DBG_RAMSGS,     "Enable register-allocation debug messages"},
     52 #endif
     53    DEBUG_NAMED_VALUE_END
     54    /* clang-format on */
     55 };
     56 
     57 DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG",
     58                             shader_debug_options, 0)
     59 DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH",
     60                       NULL)
     61 
     62 enum ir3_shader_debug ir3_shader_debug = 0;
     63 const char *ir3_shader_override_path = NULL;
     64 
     65 void
     66 ir3_compiler_destroy(struct ir3_compiler *compiler)
     67 {
     68    disk_cache_destroy(compiler->disk_cache);
     69    ralloc_free(compiler);
     70 }
     71 
     72 struct ir3_compiler *
     73 ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
     74                     bool robust_ubo_access)
     75 {
     76    struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
     77 
     78    ir3_shader_debug = debug_get_option_ir3_shader_debug();
     79    ir3_shader_override_path =
     80       !__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;
     81 
     82    if (ir3_shader_override_path) {
     83       ir3_shader_debug |= IR3_DBG_NOCACHE;
     84    }
     85 
     86    compiler->dev = dev;
     87    compiler->dev_id = dev_id;
     88    compiler->gen = fd_dev_gen(dev_id);
     89    compiler->robust_ubo_access = robust_ubo_access;
     90 
     91    /* All known GPU's have 32k local memory (aka shared) */
     92    compiler->local_mem_size = 32 * 1024;
     93    /* TODO see if older GPU's were different here */
     94    compiler->branchstack_size = 64;
     95    compiler->wave_granularity = 2;
     96    compiler->max_waves = 16;
     97 
     98    if (compiler->gen >= 6) {
     99       compiler->samgq_workaround = true;
    100       /* a6xx split the pipeline state into geometry and fragment state, in
    101        * order to let the VS run ahead of the FS. As a result there are now
    102        * separate const files for the the fragment shader and everything
    103        * else, and separate limits. There seems to be a shared limit, but
    104        * it's higher than the vert or frag limits.
    105        *
    106        * TODO: The shared limit seems to be different on different on
    107        * different models.
    108        */
    109       compiler->max_const_pipeline = 640;
    110       compiler->max_const_frag = 512;
    111       compiler->max_const_geom = 512;
    112       compiler->max_const_safe = 128;
    113 
    114       /* Compute shaders don't share a const file with the FS. Instead they
    115        * have their own file, which is smaller than the FS one.
    116        *
    117        * TODO: is this true on earlier gen's?
    118        */
    119       compiler->max_const_compute = 256;
    120 
    121       /* TODO: implement clip+cull distances on earlier gen's */
    122       compiler->has_clip_cull = true;
    123 
    124       /* TODO: implement private memory on earlier gen's */
    125       compiler->has_pvtmem = true;
    126 
    127       compiler->tess_use_shared =
    128             fd_dev_info(compiler->dev_id)->a6xx.tess_use_shared;
    129 
    130       compiler->storage_16bit =
    131             fd_dev_info(compiler->dev_id)->a6xx.storage_16bit;
    132    } else {
    133       compiler->max_const_pipeline = 512;
    134       compiler->max_const_geom = 512;
    135       compiler->max_const_frag = 512;
    136       compiler->max_const_compute = 512;
    137 
    138       /* Note: this will have to change if/when we support tess+GS on
    139        * earlier gen's.
    140        */
    141       compiler->max_const_safe = 256;
    142    }
    143 
    144    if (compiler->gen >= 6) {
    145       compiler->reg_size_vec4 =
    146             fd_dev_info(compiler->dev_id)->a6xx.reg_size_vec4;
    147    } else if (compiler->gen >= 4) {
    148       /* On a4xx-a5xx, using r24.x and above requires using the smallest
    149        * threadsize.
    150        */
    151       compiler->reg_size_vec4 = 48;
    152    } else {
    153       /* TODO: confirm this */
    154       compiler->reg_size_vec4 = 96;
    155    }
    156 
    157    if (compiler->gen >= 6) {
    158       compiler->threadsize_base = 64;
    159    } else if (compiler->gen >= 4) {
    160       /* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
    161        * 1.1 subgroupSize which is 32.
    162        */
    163       compiler->threadsize_base = 32;
    164    } else {
    165       compiler->threadsize_base = 8;
    166    }
    167 
    168    if (compiler->gen >= 4) {
    169       /* need special handling for "flat" */
    170       compiler->flat_bypass = true;
    171       compiler->levels_add_one = false;
    172       compiler->unminify_coords = false;
    173       compiler->txf_ms_with_isaml = false;
    174       compiler->array_index_add_half = true;
    175       compiler->instr_align = 16;
    176       compiler->const_upload_unit = 4;
    177    } else {
    178       /* no special handling for "flat" */
    179       compiler->flat_bypass = false;
    180       compiler->levels_add_one = true;
    181       compiler->unminify_coords = true;
    182       compiler->txf_ms_with_isaml = true;
    183       compiler->array_index_add_half = false;
    184       compiler->instr_align = 4;
    185       compiler->const_upload_unit = 8;
    186    }
    187 
    188    ir3_disk_cache_init(compiler);
    189 
    190    return compiler;
    191 }
    192