Home | History | Annotate | Line # | Download | only in amdgpu
      1 /*	$NetBSD: amdgpu_doorbell.h,v 1.2 2021/12/18 23:44:58 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2018 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 /*
     27  * GPU doorbell structures, functions & helpers
     28  */
     29 struct amdgpu_doorbell {
     30 	/* doorbell mmio */
     31 	resource_size_t		base;
     32 	resource_size_t		size;
     33 #ifdef __NetBSD__
     34 	bus_space_tag_t		bst;
     35 	bus_space_handle_t	bsh;
     36 #else
     37 	u32 __iomem		*ptr;
     38 #endif
     39 	u32			num_doorbells;	/* Number of doorbells actually reserved for amdgpu. */
     40 };
     41 
     42 /* Reserved doorbells for amdgpu (including multimedia).
     43  * KFD can use all the rest in the 2M doorbell bar.
     44  * For asic before vega10, doorbell is 32-bit, so the
     45  * index/offset is in dword. For vega10 and after, doorbell
     46  * can be 64-bit, so the index defined is in qword.
     47  */
     48 struct amdgpu_doorbell_index {
     49 	uint32_t kiq;
     50 	uint32_t mec_ring0;
     51 	uint32_t mec_ring1;
     52 	uint32_t mec_ring2;
     53 	uint32_t mec_ring3;
     54 	uint32_t mec_ring4;
     55 	uint32_t mec_ring5;
     56 	uint32_t mec_ring6;
     57 	uint32_t mec_ring7;
     58 	uint32_t userqueue_start;
     59 	uint32_t userqueue_end;
     60 	uint32_t gfx_ring0;
     61 	uint32_t gfx_ring1;
     62 	uint32_t sdma_engine[8];
     63 	uint32_t ih;
     64 	union {
     65 		struct {
     66 			uint32_t vcn_ring0_1;
     67 			uint32_t vcn_ring2_3;
     68 			uint32_t vcn_ring4_5;
     69 			uint32_t vcn_ring6_7;
     70 		} vcn;
     71 		struct {
     72 			uint32_t uvd_ring0_1;
     73 			uint32_t uvd_ring2_3;
     74 			uint32_t uvd_ring4_5;
     75 			uint32_t uvd_ring6_7;
     76 			uint32_t vce_ring0_1;
     77 			uint32_t vce_ring2_3;
     78 			uint32_t vce_ring4_5;
     79 			uint32_t vce_ring6_7;
     80 		} uvd_vce;
     81 	};
     82 	uint32_t first_non_cp;
     83 	uint32_t last_non_cp;
     84 	uint32_t max_assignment;
     85 	/* Per engine SDMA doorbell size in dword */
     86 	uint32_t sdma_doorbell_range;
     87 };
     88 
     89 typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
     90 {
     91 	AMDGPU_DOORBELL_KIQ                     = 0x000,
     92 	AMDGPU_DOORBELL_HIQ                     = 0x001,
     93 	AMDGPU_DOORBELL_DIQ                     = 0x002,
     94 	AMDGPU_DOORBELL_MEC_RING0               = 0x010,
     95 	AMDGPU_DOORBELL_MEC_RING1               = 0x011,
     96 	AMDGPU_DOORBELL_MEC_RING2               = 0x012,
     97 	AMDGPU_DOORBELL_MEC_RING3               = 0x013,
     98 	AMDGPU_DOORBELL_MEC_RING4               = 0x014,
     99 	AMDGPU_DOORBELL_MEC_RING5               = 0x015,
    100 	AMDGPU_DOORBELL_MEC_RING6               = 0x016,
    101 	AMDGPU_DOORBELL_MEC_RING7               = 0x017,
    102 	AMDGPU_DOORBELL_GFX_RING0               = 0x020,
    103 	AMDGPU_DOORBELL_sDMA_ENGINE0            = 0x1E0,
    104 	AMDGPU_DOORBELL_sDMA_ENGINE1            = 0x1E1,
    105 	AMDGPU_DOORBELL_IH                      = 0x1E8,
    106 	AMDGPU_DOORBELL_MAX_ASSIGNMENT          = 0x3FF,
    107 	AMDGPU_DOORBELL_INVALID                 = 0xFFFF
    108 } AMDGPU_DOORBELL_ASSIGNMENT;
    109 
    110 typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
    111 {
    112 	/* Compute + GFX: 0~255 */
    113 	AMDGPU_VEGA20_DOORBELL_KIQ                     = 0x000,
    114 	AMDGPU_VEGA20_DOORBELL_HIQ                     = 0x001,
    115 	AMDGPU_VEGA20_DOORBELL_DIQ                     = 0x002,
    116 	AMDGPU_VEGA20_DOORBELL_MEC_RING0               = 0x003,
    117 	AMDGPU_VEGA20_DOORBELL_MEC_RING1               = 0x004,
    118 	AMDGPU_VEGA20_DOORBELL_MEC_RING2               = 0x005,
    119 	AMDGPU_VEGA20_DOORBELL_MEC_RING3               = 0x006,
    120 	AMDGPU_VEGA20_DOORBELL_MEC_RING4               = 0x007,
    121 	AMDGPU_VEGA20_DOORBELL_MEC_RING5               = 0x008,
    122 	AMDGPU_VEGA20_DOORBELL_MEC_RING6               = 0x009,
    123 	AMDGPU_VEGA20_DOORBELL_MEC_RING7               = 0x00A,
    124 	AMDGPU_VEGA20_DOORBELL_USERQUEUE_START	       = 0x00B,
    125 	AMDGPU_VEGA20_DOORBELL_USERQUEUE_END	       = 0x08A,
    126 	AMDGPU_VEGA20_DOORBELL_GFX_RING0               = 0x08B,
    127 	/* SDMA:256~335*/
    128 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0            = 0x100,
    129 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1            = 0x10A,
    130 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2            = 0x114,
    131 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3            = 0x11E,
    132 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4            = 0x128,
    133 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5            = 0x132,
    134 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6            = 0x13C,
    135 	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7            = 0x146,
    136 	/* IH: 376~391 */
    137 	AMDGPU_VEGA20_DOORBELL_IH                      = 0x178,
    138 	/* MMSCH: 392~407
    139 	 * overlap the doorbell assignment with VCN as they are  mutually exclusive
    140 	 * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
    141 	 */
    142 	AMDGPU_VEGA20_DOORBELL64_VCN0_1                  = 0x188, /* VNC0 */
    143 	AMDGPU_VEGA20_DOORBELL64_VCN2_3                  = 0x189,
    144 	AMDGPU_VEGA20_DOORBELL64_VCN4_5                  = 0x18A,
    145 	AMDGPU_VEGA20_DOORBELL64_VCN6_7                  = 0x18B,
    146 
    147 	AMDGPU_VEGA20_DOORBELL64_VCN8_9                  = 0x18C, /* VNC1 */
    148 	AMDGPU_VEGA20_DOORBELL64_VCNa_b                  = 0x18D,
    149 	AMDGPU_VEGA20_DOORBELL64_VCNc_d                  = 0x18E,
    150 	AMDGPU_VEGA20_DOORBELL64_VCNe_f                  = 0x18F,
    151 
    152 	AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1             = 0x188,
    153 	AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3             = 0x189,
    154 	AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5             = 0x18A,
    155 	AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7             = 0x18B,
    156 
    157 	AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1             = 0x18C,
    158 	AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
    159 	AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
    160 	AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,
    161 
    162 	AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP            = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
    163 	AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
    164 
    165 	AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,
    166 	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
    167 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
    168 
    169 typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
    170 {
    171 	/* Compute + GFX: 0~255 */
    172 	AMDGPU_NAVI10_DOORBELL_KIQ			= 0x000,
    173 	AMDGPU_NAVI10_DOORBELL_HIQ			= 0x001,
    174 	AMDGPU_NAVI10_DOORBELL_DIQ			= 0x002,
    175 	AMDGPU_NAVI10_DOORBELL_MEC_RING0		= 0x003,
    176 	AMDGPU_NAVI10_DOORBELL_MEC_RING1		= 0x004,
    177 	AMDGPU_NAVI10_DOORBELL_MEC_RING2		= 0x005,
    178 	AMDGPU_NAVI10_DOORBELL_MEC_RING3		= 0x006,
    179 	AMDGPU_NAVI10_DOORBELL_MEC_RING4		= 0x007,
    180 	AMDGPU_NAVI10_DOORBELL_MEC_RING5		= 0x008,
    181 	AMDGPU_NAVI10_DOORBELL_MEC_RING6		= 0x009,
    182 	AMDGPU_NAVI10_DOORBELL_MEC_RING7		= 0x00A,
    183 	AMDGPU_NAVI10_DOORBELL_USERQUEUE_START		= 0x00B,
    184 	AMDGPU_NAVI10_DOORBELL_USERQUEUE_END		= 0x08A,
    185 	AMDGPU_NAVI10_DOORBELL_GFX_RING0		= 0x08B,
    186 	AMDGPU_NAVI10_DOORBELL_GFX_RING1		= 0x08C,
    187 	/* SDMA:256~335*/
    188 	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0		= 0x100,
    189 	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1		= 0x10A,
    190 	/* IH: 376~391 */
    191 	AMDGPU_NAVI10_DOORBELL_IH			= 0x178,
    192 	/* MMSCH: 392~407
    193 	 * overlap the doorbell assignment with VCN as they are  mutually exclusive
    194 	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
    195 	 */
    196 	AMDGPU_NAVI10_DOORBELL64_VCN0_1			= 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
    197 	AMDGPU_NAVI10_DOORBELL64_VCN2_3			= 0x189,
    198 	AMDGPU_NAVI10_DOORBELL64_VCN4_5			= 0x18A,
    199 	AMDGPU_NAVI10_DOORBELL64_VCN6_7			= 0x18B,
    200 
    201 	AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP		= AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
    202 	AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP		= AMDGPU_NAVI10_DOORBELL64_VCN6_7,
    203 
    204 	AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT		= 0x18F,
    205 	AMDGPU_NAVI10_DOORBELL_INVALID			= 0xFFFF
    206 } AMDGPU_NAVI10_DOORBELL_ASSIGNMENT;
    207 
    208 /*
    209  * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
    210  */
    211 typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
    212 {
    213 	/*
    214 	 * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
    215 	 * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
    216 	 *  Compute related doorbells are allocated from 0x00 to 0x8a
    217 	 */
    218 
    219 
    220 	/* kernel scheduling */
    221 	AMDGPU_DOORBELL64_KIQ                     = 0x00,
    222 
    223 	/* HSA interface queue and debug queue */
    224 	AMDGPU_DOORBELL64_HIQ                     = 0x01,
    225 	AMDGPU_DOORBELL64_DIQ                     = 0x02,
    226 
    227 	/* Compute engines */
    228 	AMDGPU_DOORBELL64_MEC_RING0               = 0x03,
    229 	AMDGPU_DOORBELL64_MEC_RING1               = 0x04,
    230 	AMDGPU_DOORBELL64_MEC_RING2               = 0x05,
    231 	AMDGPU_DOORBELL64_MEC_RING3               = 0x06,
    232 	AMDGPU_DOORBELL64_MEC_RING4               = 0x07,
    233 	AMDGPU_DOORBELL64_MEC_RING5               = 0x08,
    234 	AMDGPU_DOORBELL64_MEC_RING6               = 0x09,
    235 	AMDGPU_DOORBELL64_MEC_RING7               = 0x0a,
    236 
    237 	/* User queue doorbell range (128 doorbells) */
    238 	AMDGPU_DOORBELL64_USERQUEUE_START         = 0x0b,
    239 	AMDGPU_DOORBELL64_USERQUEUE_END           = 0x8a,
    240 
    241 	/* Graphics engine */
    242 	AMDGPU_DOORBELL64_GFX_RING0               = 0x8b,
    243 
    244 	/*
    245 	 * Other graphics doorbells can be allocated here: from 0x8c to 0xdf
    246 	 * Graphics voltage island aperture 1
    247 	 * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
    248 	 */
    249 
    250 	/* For vega10 sriov, the sdma doorbell must be fixed as follow
    251 	 * to keep the same setting with host driver, or it will
    252 	 * happen conflicts
    253 	 */
    254 	AMDGPU_DOORBELL64_sDMA_ENGINE0            = 0xF0,
    255 	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xF1,
    256 	AMDGPU_DOORBELL64_sDMA_ENGINE1            = 0xF2,
    257 	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xF3,
    258 
    259 	/* Interrupt handler */
    260 	AMDGPU_DOORBELL64_IH                      = 0xF4,  /* For legacy interrupt ring buffer */
    261 	AMDGPU_DOORBELL64_IH_RING1                = 0xF5,  /* For page migration request log */
    262 	AMDGPU_DOORBELL64_IH_RING2                = 0xF6,  /* For page migration translation/invalidation log */
    263 
    264 	/* VCN engine use 32 bits doorbell  */
    265 	AMDGPU_DOORBELL64_VCN0_1                  = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
    266 	AMDGPU_DOORBELL64_VCN2_3                  = 0xF9,
    267 	AMDGPU_DOORBELL64_VCN4_5                  = 0xFA,
    268 	AMDGPU_DOORBELL64_VCN6_7                  = 0xFB,
    269 
    270 	/* overlap the doorbell assignment with VCN as they are  mutually exclusive
    271 	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
    272 	 */
    273 	AMDGPU_DOORBELL64_UVD_RING0_1             = 0xF8,
    274 	AMDGPU_DOORBELL64_UVD_RING2_3             = 0xF9,
    275 	AMDGPU_DOORBELL64_UVD_RING4_5             = 0xFA,
    276 	AMDGPU_DOORBELL64_UVD_RING6_7             = 0xFB,
    277 
    278 	AMDGPU_DOORBELL64_VCE_RING0_1             = 0xFC,
    279 	AMDGPU_DOORBELL64_VCE_RING2_3             = 0xFD,
    280 	AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
    281 	AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,
    282 
    283 	AMDGPU_DOORBELL64_FIRST_NON_CP            = AMDGPU_DOORBELL64_sDMA_ENGINE0,
    284 	AMDGPU_DOORBELL64_LAST_NON_CP             = AMDGPU_DOORBELL64_VCE_RING6_7,
    285 
    286 	AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
    287 	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
    288 } AMDGPU_DOORBELL64_ASSIGNMENT;
    289 
    290 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
    291 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
    292 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
    293 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
    294 
    295 #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
    296 #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
    297 #define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
    298 #define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
    299 
    300