freedreno/common/freedreno_devices.py

7ec681f3Smrg#
7ec681f3Smrg# Copyright © 2021 Google, Inc.
7ec681f3Smrg#
7ec681f3Smrg# Permission is hereby granted, free of charge, to any person obtaining a
7ec681f3Smrg# copy of this software and associated documentation files (the "Software"),
7ec681f3Smrg# to deal in the Software without restriction, including without limitation
7ec681f3Smrg# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7ec681f3Smrg# and/or sell copies of the Software, and to permit persons to whom the
7ec681f3Smrg# Software is furnished to do so, subject to the following conditions:
7ec681f3Smrg#
7ec681f3Smrg# The above copyright notice and this permission notice (including the next
7ec681f3Smrg# paragraph) shall be included in all copies or substantial portions of the
7ec681f3Smrg# Software.
7ec681f3Smrg#
7ec681f3Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7ec681f3Smrg# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7ec681f3Smrg# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
7ec681f3Smrg# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7ec681f3Smrg# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
7ec681f3Smrg# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
7ec681f3Smrg# IN THE SOFTWARE.
7ec681f3Smrg
7ec681f3Smrgfrom mako.template import Template
7ec681f3Smrgimport sys
7ec681f3Smrg
7ec681f3Smrgdef max_bitfield_val(high, low, shift):
7ec681f3Smrg    return ((1 << (high - low)) - 1) << shift
7ec681f3Smrg
7ec681f3Smrgclass State(object):
7ec681f3Smrg    def __init__(self):
7ec681f3Smrg        # List of unique device-info structs, multiple different GPU ids
7ec681f3Smrg        # can map to a single info struct in cases where the differences
7ec681f3Smrg        # are not sw visible, or the only differences are parameters
7ec681f3Smrg        # queried from the kernel (like GMEM size)
7ec681f3Smrg        self.gpu_infos = []
7ec681f3Smrg
7ec681f3Smrg        # Table mapping GPU id to device-info struct
7ec681f3Smrg        self.gpus = {}
7ec681f3Smrg
7ec681f3Smrg    def info_index(self, gpu_info):
7ec681f3Smrg        i = 0
7ec681f3Smrg        for info in self.gpu_infos:
7ec681f3Smrg            if gpu_info == info:
7ec681f3Smrg                return i
7ec681f3Smrg            i += 1
7ec681f3Smrg        raise Error("invalid info")
7ec681f3Smrg
7ec681f3Smrgs = State()
7ec681f3Smrg
7ec681f3Smrgdef add_gpus(ids, info):
7ec681f3Smrg    for id in ids:
7ec681f3Smrg        s.gpus[id] = info
7ec681f3Smrg
7ec681f3Smrgclass GPUId(object):
7ec681f3Smrg    def __init__(self, gpu_id = None, chip_id = None, name=None):
7ec681f3Smrg        if chip_id == None:
7ec681f3Smrg            assert(gpu_id != None)
7ec681f3Smrg            val = gpu_id
7ec681f3Smrg            core = int(val / 100)
7ec681f3Smrg            val -= (core * 100);
7ec681f3Smrg            major = int(val / 10);
7ec681f3Smrg            val -= (major * 10)
7ec681f3Smrg            minor = val
7ec681f3Smrg            chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
7ec681f3Smrg        self.chip_id = chip_id
7ec681f3Smrg        if gpu_id == None:
7ec681f3Smrg            gpu_id = 0
7ec681f3Smrg        self.gpu_id = gpu_id
7ec681f3Smrg        if name == None:
7ec681f3Smrg            assert(gpu_id != 0)
7ec681f3Smrg            name = "FD%d" % gpu_id
7ec681f3Smrg        self.name = name
7ec681f3Smrg
7ec681f3Smrgclass Struct(object):
7ec681f3Smrg    """A helper class that stringifies itself to a 'C' struct initializer
7ec681f3Smrg    """
7ec681f3Smrg    def __str__(self):
7ec681f3Smrg        s = "{"
7ec681f3Smrg        for name, value in vars(self).items():
7ec681f3Smrg            s += "." + name + "=" + str(value) + ","
7ec681f3Smrg        return s + "}"
7ec681f3Smrg
7ec681f3Smrgclass GPUInfo(Struct):
7ec681f3Smrg    """Base class for any generation of adreno, consists of GMEM layout
7ec681f3Smrg       related parameters
7ec681f3Smrg
7ec681f3Smrg       Note that tile_max_h is normally only constrained by corresponding
7ec681f3Smrg       bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
7ec681f3Smrg       tends to have lower limits, in which case a comment will describe
7ec681f3Smrg       the bitfield size/shift
7ec681f3Smrg    """
7ec681f3Smrg    def __init__(self, gmem_align_w, gmem_align_h,
7ec681f3Smrg                 tile_align_w, tile_align_h,
7ec681f3Smrg                 tile_max_w, tile_max_h, num_vsc_pipes):
7ec681f3Smrg        self.gmem_align_w  = gmem_align_w
7ec681f3Smrg        self.gmem_align_h  = gmem_align_h
7ec681f3Smrg        self.tile_align_w  = tile_align_w
7ec681f3Smrg        self.tile_align_h  = tile_align_h
7ec681f3Smrg        self.tile_max_w    = tile_max_w
7ec681f3Smrg        self.tile_max_h    = tile_max_h
7ec681f3Smrg        self.num_vsc_pipes = num_vsc_pipes
7ec681f3Smrg
7ec681f3Smrg        s.gpu_infos.append(self)
7ec681f3Smrg
7ec681f3Smrg
7ec681f3Smrgclass A6xxGPUInfo(GPUInfo):
7ec681f3Smrg    """The a6xx generation has a lot more parameters, and is broken down
7ec681f3Smrg       into distinct sub-generations.  The template parameter avoids
7ec681f3Smrg       duplication of parameters that are unique to the sub-generation.
7ec681f3Smrg    """
7ec681f3Smrg    def __init__(self, template, num_sp_cores, num_ccu,
7ec681f3Smrg                 RB_UNKNOWN_8E04_blit, PC_POWER_CNTL):
7ec681f3Smrg        super().__init__(gmem_align_w = 16, gmem_align_h = 4,
7ec681f3Smrg                         tile_align_w = 32, tile_align_h = 32,
7ec681f3Smrg                         tile_max_w   = 1024, # max_bitfield_val(5, 0, 5)
7ec681f3Smrg                         tile_max_h   = max_bitfield_val(14, 8, 4),
7ec681f3Smrg                         num_vsc_pipes = 32)
7ec681f3Smrg        assert(num_sp_cores == num_ccu)
7ec681f3Smrg
7ec681f3Smrg        self.num_sp_cores = num_sp_cores
7ec681f3Smrg
7ec681f3Smrg        # 96 tile alignment seems correlated to 3 CCU
7ec681f3Smrg        if num_ccu == 3:
7ec681f3Smrg            self.tile_align_w = 96
7ec681f3Smrg
7ec681f3Smrg        self.a6xx = Struct()
7ec681f3Smrg        self.a6xx.magic = Struct()
7ec681f3Smrg
7ec681f3Smrg        for name, val in template["magic"].items():
7ec681f3Smrg            setattr(self.a6xx.magic, name, val)
7ec681f3Smrg
7ec681f3Smrg        # Various "magic" register values:
7ec681f3Smrg        self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
7ec681f3Smrg        self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL
7ec681f3Smrg
7ec681f3Smrg        # Things that earlier gens have and later gens remove, provide
7ec681f3Smrg        # defaults here and let them be overridden by sub-gen template:
7ec681f3Smrg        self.a6xx.has_cp_reg_write = True
7ec681f3Smrg        self.a6xx.has_8bpp_ubwc = True
7ec681f3Smrg
7ec681f3Smrg        for name, val in template.items():
7ec681f3Smrg            if name == "magic": # handled above
7ec681f3Smrg                continue
7ec681f3Smrg            setattr(self.a6xx, name, val)
7ec681f3Smrg
7ec681f3Smrg# a2xx is really two sub-generations, a20x and a22x, but we don't currently
7ec681f3Smrg# capture that in the device-info tables
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(200),
7ec681f3Smrg        GPUId(201),
7ec681f3Smrg        GPUId(205),
7ec681f3Smrg        GPUId(220),
7ec681f3Smrg    ], GPUInfo(
7ec681f3Smrg        gmem_align_w = 32,  gmem_align_h = 32,
7ec681f3Smrg        tile_align_w = 32,  tile_align_h = 32,
7ec681f3Smrg        tile_max_w   = 512,
7ec681f3Smrg        tile_max_h   = ~0, # TODO
7ec681f3Smrg        num_vsc_pipes = 8,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(305),
7ec681f3Smrg        GPUId(307),
7ec681f3Smrg        GPUId(320),
7ec681f3Smrg        GPUId(330),
7ec681f3Smrg    ], GPUInfo(
7ec681f3Smrg        gmem_align_w = 32,  gmem_align_h = 32,
7ec681f3Smrg        tile_align_w = 32,  tile_align_h = 32,
7ec681f3Smrg        tile_max_w   = 992, # max_bitfield_val(4, 0, 5)
7ec681f3Smrg        tile_max_h   = max_bitfield_val(9, 5, 5),
7ec681f3Smrg        num_vsc_pipes = 8,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(405),
7ec681f3Smrg        GPUId(420),
7ec681f3Smrg        GPUId(430),
7ec681f3Smrg    ], GPUInfo(
7ec681f3Smrg        gmem_align_w = 32,  gmem_align_h = 32,
7ec681f3Smrg        tile_align_w = 32,  tile_align_h = 32,
7ec681f3Smrg        tile_max_w   = 1024, # max_bitfield_val(4, 0, 5)
7ec681f3Smrg        tile_max_h   = max_bitfield_val(9, 5, 5),
7ec681f3Smrg        num_vsc_pipes = 8,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(508),
7ec681f3Smrg        GPUId(509),
7ec681f3Smrg        GPUId(510),
7ec681f3Smrg        GPUId(512),
7ec681f3Smrg        GPUId(530),
7ec681f3Smrg        GPUId(540),
7ec681f3Smrg    ], GPUInfo(
7ec681f3Smrg        gmem_align_w = 64,  gmem_align_h = 32,
7ec681f3Smrg        tile_align_w = 64,  tile_align_h = 32,
7ec681f3Smrg        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
7ec681f3Smrg        tile_max_h   = max_bitfield_val(16, 9, 5),
7ec681f3Smrg        num_vsc_pipes = 16,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrg# a6xx can be divided into distinct sub-generations, where certain device-
7ec681f3Smrg# info parameters are keyed to the sub-generation.  These templates reduce
7ec681f3Smrg# the copypaste
7ec681f3Smrg
7ec681f3Smrg# a615, a618, a630:
7ec681f3Smrga6xx_gen1 = dict(
7ec681f3Smrg        fibers_per_sp = 128 * 16,
7ec681f3Smrg        reg_size_vec4 = 96,
7ec681f3Smrg        ccu_cntl_gmem_unk2 = True,
7ec681f3Smrg        indirect_draw_wfm_quirk = True,
7ec681f3Smrg        depth_bounds_require_depth_test_quirk = True,
7ec681f3Smrg        magic = dict(
7ec681f3Smrg            TPL1_DBG_ECO_CNTL = 0x100000,
7ec681f3Smrg        )
7ec681f3Smrg    )
7ec681f3Smrg
7ec681f3Smrg# a640, a680:
7ec681f3Smrga6xx_gen2 = dict(
7ec681f3Smrg        fibers_per_sp = 128 * 4 * 16,
7ec681f3Smrg        reg_size_vec4 = 96,
7ec681f3Smrg        supports_multiview_mask = True,
7ec681f3Smrg        has_z24uint_s8uint = True,
7ec681f3Smrg        indirect_draw_wfm_quirk = True,
7ec681f3Smrg        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
7ec681f3Smrg        magic = dict(
7ec681f3Smrg            TPL1_DBG_ECO_CNTL = 0,
7ec681f3Smrg        ),
7ec681f3Smrg    )
7ec681f3Smrg
7ec681f3Smrg# a650:
7ec681f3Smrga6xx_gen3 = dict(
7ec681f3Smrg        fibers_per_sp = 128 * 2 * 16,
7ec681f3Smrg        reg_size_vec4 = 64,
7ec681f3Smrg        supports_multiview_mask = True,
7ec681f3Smrg        has_z24uint_s8uint = True,
7ec681f3Smrg        tess_use_shared = True,
7ec681f3Smrg        storage_16bit = True,
7ec681f3Smrg        has_tex_filter_cubic = True,
7ec681f3Smrg        has_sample_locations = True,
7ec681f3Smrg        has_ccu_flush_bug = True,
7ec681f3Smrg        has_8bpp_ubwc = False,
7ec681f3Smrg        magic = dict(
7ec681f3Smrg            # this seems to be a chicken bit that fixes cubic filtering:
7ec681f3Smrg            TPL1_DBG_ECO_CNTL = 0x1000000,
7ec681f3Smrg        ),
7ec681f3Smrg    )
7ec681f3Smrg
7ec681f3Smrg# a635, a660:
7ec681f3Smrga6xx_gen4 = dict(
7ec681f3Smrg        fibers_per_sp = 128 * 2 * 16,
7ec681f3Smrg        reg_size_vec4 = 64,
7ec681f3Smrg        supports_multiview_mask = True,
7ec681f3Smrg        has_z24uint_s8uint = True,
7ec681f3Smrg        tess_use_shared = True,
7ec681f3Smrg        storage_16bit = True,
7ec681f3Smrg        has_tex_filter_cubic = True,
7ec681f3Smrg        has_sample_locations = True,
7ec681f3Smrg        has_cp_reg_write = False,
7ec681f3Smrg        has_8bpp_ubwc = False,
7ec681f3Smrg        has_lpac = True,
7ec681f3Smrg        has_shading_rate = True,
7ec681f3Smrg        magic = dict(
7ec681f3Smrg            TPL1_DBG_ECO_CNTL = 0x5008000,
7ec681f3Smrg        ),
7ec681f3Smrg    )
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(615),
7ec681f3Smrg        GPUId(618),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen1,
7ec681f3Smrg        num_sp_cores = 1,
7ec681f3Smrg        num_ccu = 1,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x00100000,
7ec681f3Smrg        PC_POWER_CNTL = 0,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(630),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen1,
7ec681f3Smrg        num_sp_cores = 2,
7ec681f3Smrg        num_ccu = 2,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x01000000,
7ec681f3Smrg        PC_POWER_CNTL = 1,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(640),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen2,
7ec681f3Smrg        num_sp_cores = 2,
7ec681f3Smrg        num_ccu = 2,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x00100000,
7ec681f3Smrg        PC_POWER_CNTL = 1,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(680),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen2,
7ec681f3Smrg        num_sp_cores = 4,
7ec681f3Smrg        num_ccu = 4,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x04100000,
7ec681f3Smrg        PC_POWER_CNTL = 3,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(650),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen3,
7ec681f3Smrg        num_sp_cores = 3,
7ec681f3Smrg        num_ccu = 3,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x04100000,
7ec681f3Smrg        PC_POWER_CNTL = 2,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(chip_id=0x06030500, name="Adreno 7c Gen 3"),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen4,
7ec681f3Smrg        num_sp_cores = 2,
7ec681f3Smrg        num_ccu = 2,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x00100000,
7ec681f3Smrg        PC_POWER_CNTL = 1,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgadd_gpus([
7ec681f3Smrg        GPUId(660),
7ec681f3Smrg    ], A6xxGPUInfo(
7ec681f3Smrg        a6xx_gen4,
7ec681f3Smrg        num_sp_cores = 3,
7ec681f3Smrg        num_ccu = 3,
7ec681f3Smrg        RB_UNKNOWN_8E04_blit = 0x04100000,
7ec681f3Smrg        PC_POWER_CNTL = 2,
7ec681f3Smrg    ))
7ec681f3Smrg
7ec681f3Smrgtemplate = """\
7ec681f3Smrg/* Copyright (C) 2021 Google, Inc.
7ec681f3Smrg *
7ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
7ec681f3Smrg * to deal in the Software without restriction, including without limitation
7ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
7ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
7ec681f3Smrg *
7ec681f3Smrg * The above copyright notice and this permission notice (including the next
7ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
7ec681f3Smrg * Software.
7ec681f3Smrg *
7ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
7ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
7ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
7ec681f3Smrg * IN THE SOFTWARE.
7ec681f3Smrg */
7ec681f3Smrg
7ec681f3Smrg#include "freedreno_dev_info.h"
7ec681f3Smrg
7ec681f3Smrg/* Map python to C: */
7ec681f3Smrg#define True true
7ec681f3Smrg#define False false
7ec681f3Smrg
7ec681f3Smrg%for info in s.gpu_infos:
7ec681f3Smrgstatic const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
7ec681f3Smrg%endfor
7ec681f3Smrg
7ec681f3Smrgstatic const struct fd_dev_rec fd_dev_recs[] = {
7ec681f3Smrg%for id, info in s.gpus.items():
7ec681f3Smrg   { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
7ec681f3Smrg%endfor
7ec681f3Smrg};
7ec681f3Smrg"""
7ec681f3Smrg
7ec681f3Smrgprint(Template(template).render(s=s))
7ec681f3Smrg