Home | History | Annotate | Line # | Download | only in amdgpu
      1 /*	$NetBSD: amdgpu_amdkfd_arcturus.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2019 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  */
     24 #include <sys/cdefs.h>
     25 __KERNEL_RCSID(0, "$NetBSD: amdgpu_amdkfd_arcturus.c,v 1.2 2021/12/18 23:44:58 riastradh Exp $");
     26 
     27 #include <linux/module.h>
     28 #include <linux/fdtable.h>
     29 #include <linux/uaccess.h>
     30 #include <linux/mmu_context.h>
     31 #include <linux/firmware.h>
     32 #include "amdgpu.h"
     33 #include "amdgpu_amdkfd.h"
     34 #include "sdma0/sdma0_4_2_2_offset.h"
     35 #include "sdma0/sdma0_4_2_2_sh_mask.h"
     36 #include "sdma1/sdma1_4_2_2_offset.h"
     37 #include "sdma1/sdma1_4_2_2_sh_mask.h"
     38 #include "sdma2/sdma2_4_2_2_offset.h"
     39 #include "sdma2/sdma2_4_2_2_sh_mask.h"
     40 #include "sdma3/sdma3_4_2_2_offset.h"
     41 #include "sdma3/sdma3_4_2_2_sh_mask.h"
     42 #include "sdma4/sdma4_4_2_2_offset.h"
     43 #include "sdma4/sdma4_4_2_2_sh_mask.h"
     44 #include "sdma5/sdma5_4_2_2_offset.h"
     45 #include "sdma5/sdma5_4_2_2_sh_mask.h"
     46 #include "sdma6/sdma6_4_2_2_offset.h"
     47 #include "sdma6/sdma6_4_2_2_sh_mask.h"
     48 #include "sdma7/sdma7_4_2_2_offset.h"
     49 #include "sdma7/sdma7_4_2_2_sh_mask.h"
     50 #include "v9_structs.h"
     51 #include "soc15.h"
     52 #include "soc15d.h"
     53 #include "amdgpu_amdkfd_gfx_v9.h"
     54 #include "gfxhub_v1_0.h"
     55 #include "mmhub_v9_4.h"
     56 
     57 #define HQD_N_REGS 56
     58 #define DUMP_REG(addr) do {				\
     59 		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
     60 			break;				\
     61 		(*dump)[i][0] = (addr) << 2;		\
     62 		(*dump)[i++][1] = RREG32(addr);		\
     63 	} while (0)
     64 
     65 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
     66 {
     67 	return (struct amdgpu_device *)kgd;
     68 }
     69 
     70 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
     71 {
     72 	return (struct v9_sdma_mqd *)mqd;
     73 }
     74 
     75 static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
     76 				unsigned int engine_id,
     77 				unsigned int queue_id)
     78 {
     79 	uint32_t sdma_engine_reg_base = 0;
     80 	uint32_t sdma_rlc_reg_offset;
     81 
     82 	switch (engine_id) {
     83 	default:
     84 		dev_warn(adev->dev,
     85 			 "Invalid sdma engine id (%d), using engine id 0\n",
     86 			 engine_id);
     87 		/* fall through */
     88 	case 0:
     89 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
     90 				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
     91 		break;
     92 	case 1:
     93 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
     94 				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
     95 		break;
     96 	case 2:
     97 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
     98 				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
     99 		break;
    100 	case 3:
    101 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
    102 				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
    103 		break;
    104 	case 4:
    105 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
    106 				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
    107 		break;
    108 	case 5:
    109 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
    110 				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
    111 		break;
    112 	case 6:
    113 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
    114 				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
    115 		break;
    116 	case 7:
    117 		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
    118 				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
    119 		break;
    120 	}
    121 
    122 	sdma_rlc_reg_offset = sdma_engine_reg_base
    123 		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
    124 
    125 	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
    126 			queue_id, sdma_rlc_reg_offset);
    127 
    128 	return sdma_rlc_reg_offset;
    129 }
    130 
    131 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
    132 			     uint32_t __user *wptr, struct mm_struct *mm)
    133 {
    134 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
    135 	struct v9_sdma_mqd *m;
    136 	uint32_t sdma_rlc_reg_offset;
    137 	unsigned long end_jiffies;
    138 	uint32_t data;
    139 	uint64_t data64;
    140 	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
    141 
    142 	m = get_sdma_mqd(mqd);
    143 	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    144 					    m->sdma_queue_id);
    145 
    146 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
    147 		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
    148 
    149 	end_jiffies = msecs_to_jiffies(2000) + jiffies;
    150 	while (true) {
    151 		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
    152 		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
    153 			break;
    154 		if (time_after(jiffies, end_jiffies)) {
    155 			pr_err("SDMA RLC not idle in %s\n", __func__);
    156 			return -ETIME;
    157 		}
    158 		usleep_range(500, 1000);
    159 	}
    160 
    161 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
    162 	       m->sdmax_rlcx_doorbell_offset);
    163 
    164 	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
    165 			     ENABLE, 1);
    166 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
    167 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
    168 				m->sdmax_rlcx_rb_rptr);
    169 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
    170 				m->sdmax_rlcx_rb_rptr_hi);
    171 
    172 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
    173 	if (read_user_wptr(mm, wptr64, data64)) {
    174 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
    175 		       lower_32_bits(data64));
    176 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
    177 		       upper_32_bits(data64));
    178 	} else {
    179 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
    180 		       m->sdmax_rlcx_rb_rptr);
    181 		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
    182 		       m->sdmax_rlcx_rb_rptr_hi);
    183 	}
    184 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
    185 
    186 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
    187 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
    188 			m->sdmax_rlcx_rb_base_hi);
    189 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
    190 			m->sdmax_rlcx_rb_rptr_addr_lo);
    191 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
    192 			m->sdmax_rlcx_rb_rptr_addr_hi);
    193 
    194 	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
    195 			     RB_ENABLE, 1);
    196 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
    197 
    198 	return 0;
    199 }
    200 
    201 static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
    202 			     uint32_t engine_id, uint32_t queue_id,
    203 			     uint32_t (**dump)[2], uint32_t *n_regs)
    204 {
    205 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
    206 	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
    207 			engine_id, queue_id);
    208 	uint32_t i = 0, reg;
    209 #undef HQD_N_REGS
    210 #define HQD_N_REGS (19+6+7+10)
    211 
    212 	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL);
    213 	if (*dump == NULL)
    214 		return -ENOMEM;
    215 
    216 	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
    217 		DUMP_REG(sdma_rlc_reg_offset + reg);
    218 	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
    219 		DUMP_REG(sdma_rlc_reg_offset + reg);
    220 	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
    221 	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
    222 		DUMP_REG(sdma_rlc_reg_offset + reg);
    223 	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
    224 	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
    225 		DUMP_REG(sdma_rlc_reg_offset + reg);
    226 
    227 	WARN_ON_ONCE(i != HQD_N_REGS);
    228 	*n_regs = i;
    229 
    230 	return 0;
    231 }
    232 
    233 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
    234 {
    235 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
    236 	struct v9_sdma_mqd *m;
    237 	uint32_t sdma_rlc_reg_offset;
    238 	uint32_t sdma_rlc_rb_cntl;
    239 
    240 	m = get_sdma_mqd(mqd);
    241 	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    242 					    m->sdma_queue_id);
    243 
    244 	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
    245 
    246 	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
    247 		return true;
    248 
    249 	return false;
    250 }
    251 
    252 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
    253 				unsigned int utimeout)
    254 {
    255 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
    256 	struct v9_sdma_mqd *m;
    257 	uint32_t sdma_rlc_reg_offset;
    258 	uint32_t temp;
    259 	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
    260 
    261 	m = get_sdma_mqd(mqd);
    262 	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
    263 					    m->sdma_queue_id);
    264 
    265 	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
    266 	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
    267 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
    268 
    269 	while (true) {
    270 		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
    271 		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
    272 			break;
    273 		if (time_after(jiffies, end_jiffies)) {
    274 			pr_err("SDMA RLC not idle in %s\n", __func__);
    275 			return -ETIME;
    276 		}
    277 		usleep_range(500, 1000);
    278 	}
    279 
    280 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
    281 	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
    282 		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
    283 		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
    284 
    285 	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
    286 	m->sdmax_rlcx_rb_rptr_hi =
    287 		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
    288 
    289 	return 0;
    290 }
    291 
    292 static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
    293 		uint64_t page_table_base)
    294 {
    295 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
    296 
    297 	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
    298 		pr_err("trying to set page table base for wrong VMID %u\n",
    299 		       vmid);
    300 		return;
    301 	}
    302 
    303 	mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
    304 
    305 	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
    306 }
    307 
    308 const struct kfd2kgd_calls arcturus_kfd2kgd = {
    309 	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
    310 	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
    311 	.init_interrupts = kgd_gfx_v9_init_interrupts,
    312 	.hqd_load = kgd_gfx_v9_hqd_load,
    313 	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
    314 	.hqd_sdma_load = kgd_hqd_sdma_load,
    315 	.hqd_dump = kgd_gfx_v9_hqd_dump,
    316 	.hqd_sdma_dump = kgd_hqd_sdma_dump,
    317 	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
    318 	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
    319 	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
    320 	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
    321 	.address_watch_disable = kgd_gfx_v9_address_watch_disable,
    322 	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
    323 	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
    324 	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
    325 	.get_atc_vmid_pasid_mapping_info =
    326 			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
    327 	.get_tile_config = kgd_gfx_v9_get_tile_config,
    328 	.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
    329 	.get_hive_id = amdgpu_amdkfd_get_hive_id,
    330 };
    331