1 1.6 riastrad /* $NetBSD: amdgpu_amdkfd.c,v 1.6 2021/12/19 10:59:01 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2014 Advanced Micro Devices, Inc. 5 1.1 riastrad * 6 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 7 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 8 1.1 riastrad * to deal in the Software without restriction, including without limitation 9 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 11 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 12 1.1 riastrad * 13 1.1 riastrad * The above copyright notice and this permission notice shall be included in 14 1.1 riastrad * all copies or substantial portions of the Software. 15 1.1 riastrad * 16 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 23 1.1 riastrad */ 24 1.1 riastrad 25 1.1 riastrad #include <sys/cdefs.h> 26 1.6 riastrad __KERNEL_RCSID(0, "$NetBSD: amdgpu_amdkfd.c,v 1.6 2021/12/19 10:59:01 riastradh Exp $"); 27 1.1 riastrad 28 1.1 riastrad #include "amdgpu_amdkfd.h" 29 1.1 riastrad #include "amd_shared.h" 30 1.4 riastrad 31 1.1 riastrad #include "amdgpu.h" 32 1.4 riastrad #include "amdgpu_gfx.h" 33 1.4 riastrad #include "amdgpu_dma_buf.h" 34 1.1 riastrad #include <linux/module.h> 35 1.4 riastrad #include <linux/dma-buf.h> 36 1.4 riastrad #include "amdgpu_xgmi.h" 37 1.1 riastrad 38 1.4 riastrad static const unsigned int compute_vmid_bitmap = 0xFF00; 39 1.1 riastrad 40 1.4 riastrad /* Total memory size in system memory and all GPU VRAM. Used to 41 1.4 riastrad * estimate worst case amount of memory to reserve for page tables 42 1.4 riastrad */ 43 1.4 riastrad uint64_t amdgpu_amdkfd_total_mem_size; 44 1.1 riastrad 45 1.4 riastrad int amdgpu_amdkfd_init(void) 46 1.1 riastrad { 47 1.4 riastrad struct sysinfo si; 48 1.4 riastrad int ret; 49 1.1 riastrad 50 1.4 riastrad si_meminfo(&si); 51 1.4 riastrad amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; 52 1.4 riastrad amdgpu_amdkfd_total_mem_size *= si.mem_unit; 53 1.1 riastrad 54 1.4 riastrad #ifdef CONFIG_HSA_AMD 55 1.4 riastrad ret = kgd2kfd_init(); 56 1.4 riastrad amdgpu_amdkfd_gpuvm_init_mem_limits(); 57 1.1 riastrad #else 58 1.4 riastrad ret = -ENOENT; 59 1.1 riastrad #endif 60 1.4 riastrad 61 1.4 riastrad return ret; 62 1.1 riastrad } 63 1.1 riastrad 64 1.1 riastrad void amdgpu_amdkfd_fini(void) 65 1.1 riastrad { 66 1.4 riastrad kgd2kfd_exit(); 67 1.1 riastrad } 68 1.1 riastrad 69 1.4 riastrad void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) 70 1.1 riastrad { 71 1.4 riastrad bool vf = amdgpu_sriov_vf(adev); 72 1.4 riastrad 73 1.4 riastrad adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, 74 1.4 riastrad adev->pdev, adev->asic_type, vf); 75 1.4 riastrad 76 1.4 riastrad if (adev->kfd.dev) 77 1.4 riastrad amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; 78 1.4 riastrad } 79 1.4 riastrad 80 1.4 riastrad /** 81 1.4 riastrad * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to 82 1.4 riastrad * setup amdkfd 83 1.4 riastrad * 84 1.4 riastrad * @adev: amdgpu_device pointer 85 1.4 riastrad * @aperture_base: output returning doorbell aperture base physical address 86 1.4 riastrad * @aperture_size: output returning doorbell aperture size in bytes 87 1.4 riastrad * @start_offset: output returning # of doorbell bytes reserved for amdgpu. 88 1.4 riastrad * 89 1.4 riastrad * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, 90 1.4 riastrad * takes doorbells required for its own rings and reports the setup to amdkfd. 91 1.4 riastrad * amdgpu reserved doorbells are at the start of the doorbell aperture. 92 1.4 riastrad */ 93 1.4 riastrad static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, 94 1.4 riastrad phys_addr_t *aperture_base, 95 1.4 riastrad size_t *aperture_size, 96 1.4 riastrad size_t *start_offset) 97 1.4 riastrad { 98 1.4 riastrad /* 99 1.4 riastrad * The first num_doorbells are used by amdgpu. 100 1.4 riastrad * amdkfd takes whatever's left in the aperture. 101 1.4 riastrad */ 102 1.4 riastrad if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { 103 1.4 riastrad *aperture_base = adev->doorbell.base; 104 1.4 riastrad *aperture_size = adev->doorbell.size; 105 1.4 riastrad *start_offset = adev->doorbell.num_doorbells * sizeof(u32); 106 1.4 riastrad } else { 107 1.4 riastrad *aperture_base = 0; 108 1.4 riastrad *aperture_size = 0; 109 1.4 riastrad *start_offset = 0; 110 1.4 riastrad } 111 1.1 riastrad } 112 1.1 riastrad 113 1.4 riastrad void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) 114 1.1 riastrad { 115 1.4 riastrad int i; 116 1.4 riastrad int last_valid_bit; 117 1.4 riastrad 118 1.4 riastrad if (adev->kfd.dev) { 119 1.1 riastrad struct kgd2kfd_shared_resources gpu_resources = { 120 1.4 riastrad .compute_vmid_bitmap = compute_vmid_bitmap, 121 1.4 riastrad .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, 122 1.4 riastrad .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, 123 1.4 riastrad .gpuvm_size = min(adev->vm_manager.max_pfn 124 1.4 riastrad << AMDGPU_GPU_PAGE_SHIFT, 125 1.4 riastrad AMDGPU_GMC_HOLE_START), 126 1.4 riastrad .drm_render_minor = adev->ddev->render->index, 127 1.4 riastrad .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, 128 1.1 riastrad 129 1.1 riastrad }; 130 1.1 riastrad 131 1.4 riastrad /* this is going to have a few of the MSBs set that we need to 132 1.4 riastrad * clear 133 1.4 riastrad */ 134 1.4 riastrad bitmap_complement(gpu_resources.queue_bitmap, 135 1.4 riastrad adev->gfx.mec.queue_bitmap, 136 1.4 riastrad KGD_MAX_QUEUES); 137 1.4 riastrad 138 1.4 riastrad /* According to linux/bitmap.h we shouldn't use bitmap_clear if 139 1.4 riastrad * nbits is not compile time constant 140 1.4 riastrad */ 141 1.4 riastrad last_valid_bit = 1 /* only first MEC can have compute queues */ 142 1.4 riastrad * adev->gfx.mec.num_pipe_per_mec 143 1.4 riastrad * adev->gfx.mec.num_queue_per_pipe; 144 1.4 riastrad for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) 145 1.4 riastrad clear_bit(i, gpu_resources.queue_bitmap); 146 1.4 riastrad 147 1.4 riastrad amdgpu_doorbell_get_kfd_info(adev, 148 1.1 riastrad &gpu_resources.doorbell_physical_address, 149 1.1 riastrad &gpu_resources.doorbell_aperture_size, 150 1.1 riastrad &gpu_resources.doorbell_start_offset); 151 1.1 riastrad 152 1.4 riastrad /* Since SOC15, BIF starts to statically use the 153 1.4 riastrad * lower 12 bits of doorbell addresses for routing 154 1.4 riastrad * based on settings in registers like 155 1.4 riastrad * SDMA0_DOORBELL_RANGE etc.. 156 1.4 riastrad * In order to route a doorbell to CP engine, the lower 157 1.4 riastrad * 12 bits of its address has to be outside the range 158 1.4 riastrad * set for SDMA, VCN, and IH blocks. 159 1.4 riastrad */ 160 1.4 riastrad if (adev->asic_type >= CHIP_VEGA10) { 161 1.4 riastrad gpu_resources.non_cp_doorbells_start = 162 1.4 riastrad adev->doorbell_index.first_non_cp; 163 1.4 riastrad gpu_resources.non_cp_doorbells_end = 164 1.4 riastrad adev->doorbell_index.last_non_cp; 165 1.4 riastrad } 166 1.4 riastrad 167 1.4 riastrad kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources); 168 1.1 riastrad } 169 1.1 riastrad } 170 1.1 riastrad 171 1.4 riastrad void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) 172 1.1 riastrad { 173 1.4 riastrad if (adev->kfd.dev) { 174 1.4 riastrad kgd2kfd_device_exit(adev->kfd.dev); 175 1.4 riastrad adev->kfd.dev = NULL; 176 1.1 riastrad } 177 1.1 riastrad } 178 1.1 riastrad 179 1.4 riastrad void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, 180 1.1 riastrad const void *ih_ring_entry) 181 1.1 riastrad { 182 1.4 riastrad if (adev->kfd.dev) 183 1.4 riastrad kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); 184 1.1 riastrad } 185 1.1 riastrad 186 1.4 riastrad void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) 187 1.1 riastrad { 188 1.4 riastrad if (adev->kfd.dev) 189 1.4 riastrad kgd2kfd_suspend(adev->kfd.dev); 190 1.1 riastrad } 191 1.1 riastrad 192 1.4 riastrad int amdgpu_amdkfd_resume(struct amdgpu_device *adev) 193 1.1 riastrad { 194 1.1 riastrad int r = 0; 195 1.1 riastrad 196 1.4 riastrad if (adev->kfd.dev) 197 1.4 riastrad r = kgd2kfd_resume(adev->kfd.dev); 198 1.1 riastrad 199 1.1 riastrad return r; 200 1.1 riastrad } 201 1.1 riastrad 202 1.4 riastrad int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) 203 1.1 riastrad { 204 1.4 riastrad int r = 0; 205 1.4 riastrad 206 1.4 riastrad if (adev->kfd.dev) 207 1.4 riastrad r = kgd2kfd_pre_reset(adev->kfd.dev); 208 1.4 riastrad 209 1.4 riastrad return r; 210 1.4 riastrad } 211 1.4 riastrad 212 1.4 riastrad int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) 213 1.4 riastrad { 214 1.4 riastrad int r = 0; 215 1.4 riastrad 216 1.4 riastrad if (adev->kfd.dev) 217 1.4 riastrad r = kgd2kfd_post_reset(adev->kfd.dev); 218 1.4 riastrad 219 1.4 riastrad return r; 220 1.4 riastrad } 221 1.4 riastrad 222 1.4 riastrad void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) 223 1.4 riastrad { 224 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 225 1.4 riastrad 226 1.4 riastrad if (amdgpu_device_should_recover_gpu(adev)) 227 1.4 riastrad amdgpu_device_gpu_recover(adev, NULL); 228 1.1 riastrad } 229 1.1 riastrad 230 1.4 riastrad int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, 231 1.4 riastrad void **mem_obj, uint64_t *gpu_addr, 232 1.4 riastrad void **cpu_ptr, bool mqd_gfx9) 233 1.4 riastrad { 234 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 235 1.4 riastrad struct amdgpu_bo *bo = NULL; 236 1.4 riastrad struct amdgpu_bo_param bp; 237 1.1 riastrad int r; 238 1.4 riastrad void *cpu_ptr_tmp = NULL; 239 1.4 riastrad 240 1.4 riastrad memset(&bp, 0, sizeof(bp)); 241 1.4 riastrad bp.size = size; 242 1.4 riastrad bp.byte_align = PAGE_SIZE; 243 1.4 riastrad bp.domain = AMDGPU_GEM_DOMAIN_GTT; 244 1.4 riastrad bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; 245 1.4 riastrad bp.type = ttm_bo_type_kernel; 246 1.4 riastrad bp.resv = NULL; 247 1.1 riastrad 248 1.4 riastrad if (mqd_gfx9) 249 1.4 riastrad bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9; 250 1.1 riastrad 251 1.4 riastrad r = amdgpu_bo_create(adev, &bp, &bo); 252 1.1 riastrad if (r) { 253 1.4 riastrad dev_err(adev->dev, 254 1.1 riastrad "failed to allocate BO for amdkfd (%d)\n", r); 255 1.1 riastrad return r; 256 1.1 riastrad } 257 1.1 riastrad 258 1.1 riastrad /* map the buffer */ 259 1.4 riastrad r = amdgpu_bo_reserve(bo, true); 260 1.1 riastrad if (r) { 261 1.4 riastrad dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); 262 1.1 riastrad goto allocate_mem_reserve_bo_failed; 263 1.1 riastrad } 264 1.1 riastrad 265 1.4 riastrad r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); 266 1.1 riastrad if (r) { 267 1.4 riastrad dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); 268 1.1 riastrad goto allocate_mem_pin_bo_failed; 269 1.1 riastrad } 270 1.1 riastrad 271 1.4 riastrad r = amdgpu_ttm_alloc_gart(&bo->tbo); 272 1.4 riastrad if (r) { 273 1.4 riastrad dev_err(adev->dev, "%p bind failed\n", bo); 274 1.4 riastrad goto allocate_mem_kmap_bo_failed; 275 1.4 riastrad } 276 1.4 riastrad 277 1.4 riastrad r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); 278 1.1 riastrad if (r) { 279 1.4 riastrad dev_err(adev->dev, 280 1.1 riastrad "(%d) failed to map bo to kernel for amdkfd\n", r); 281 1.1 riastrad goto allocate_mem_kmap_bo_failed; 282 1.1 riastrad } 283 1.1 riastrad 284 1.4 riastrad *mem_obj = bo; 285 1.4 riastrad *gpu_addr = amdgpu_bo_gpu_offset(bo); 286 1.4 riastrad *cpu_ptr = cpu_ptr_tmp; 287 1.4 riastrad 288 1.4 riastrad amdgpu_bo_unreserve(bo); 289 1.1 riastrad 290 1.1 riastrad return 0; 291 1.1 riastrad 292 1.1 riastrad allocate_mem_kmap_bo_failed: 293 1.4 riastrad amdgpu_bo_unpin(bo); 294 1.1 riastrad allocate_mem_pin_bo_failed: 295 1.4 riastrad amdgpu_bo_unreserve(bo); 296 1.1 riastrad allocate_mem_reserve_bo_failed: 297 1.4 riastrad amdgpu_bo_unref(&bo); 298 1.4 riastrad 299 1.4 riastrad return r; 300 1.4 riastrad } 301 1.4 riastrad 302 1.4 riastrad void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) 303 1.4 riastrad { 304 1.4 riastrad struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; 305 1.4 riastrad 306 1.4 riastrad amdgpu_bo_reserve(bo, true); 307 1.4 riastrad amdgpu_bo_kunmap(bo); 308 1.4 riastrad amdgpu_bo_unpin(bo); 309 1.4 riastrad amdgpu_bo_unreserve(bo); 310 1.4 riastrad amdgpu_bo_unref(&(bo)); 311 1.4 riastrad } 312 1.4 riastrad 313 1.4 riastrad int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, 314 1.4 riastrad void **mem_obj) 315 1.4 riastrad { 316 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 317 1.4 riastrad struct amdgpu_bo *bo = NULL; 318 1.4 riastrad struct amdgpu_bo_param bp; 319 1.4 riastrad int r; 320 1.4 riastrad 321 1.4 riastrad memset(&bp, 0, sizeof(bp)); 322 1.4 riastrad bp.size = size; 323 1.4 riastrad bp.byte_align = 1; 324 1.4 riastrad bp.domain = AMDGPU_GEM_DOMAIN_GWS; 325 1.4 riastrad bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 326 1.4 riastrad bp.type = ttm_bo_type_device; 327 1.4 riastrad bp.resv = NULL; 328 1.4 riastrad 329 1.4 riastrad r = amdgpu_bo_create(adev, &bp, &bo); 330 1.4 riastrad if (r) { 331 1.4 riastrad dev_err(adev->dev, 332 1.4 riastrad "failed to allocate gws BO for amdkfd (%d)\n", r); 333 1.4 riastrad return r; 334 1.4 riastrad } 335 1.4 riastrad 336 1.4 riastrad *mem_obj = bo; 337 1.4 riastrad return 0; 338 1.4 riastrad } 339 1.4 riastrad 340 1.4 riastrad void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) 341 1.4 riastrad { 342 1.4 riastrad struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; 343 1.4 riastrad 344 1.4 riastrad amdgpu_bo_unref(&bo); 345 1.4 riastrad } 346 1.4 riastrad 347 1.4 riastrad uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, 348 1.4 riastrad enum kgd_engine_type type) 349 1.4 riastrad { 350 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 351 1.4 riastrad 352 1.4 riastrad switch (type) { 353 1.4 riastrad case KGD_ENGINE_PFP: 354 1.4 riastrad return adev->gfx.pfp_fw_version; 355 1.4 riastrad 356 1.4 riastrad case KGD_ENGINE_ME: 357 1.4 riastrad return adev->gfx.me_fw_version; 358 1.4 riastrad 359 1.4 riastrad case KGD_ENGINE_CE: 360 1.4 riastrad return adev->gfx.ce_fw_version; 361 1.4 riastrad 362 1.4 riastrad case KGD_ENGINE_MEC1: 363 1.4 riastrad return adev->gfx.mec_fw_version; 364 1.4 riastrad 365 1.4 riastrad case KGD_ENGINE_MEC2: 366 1.4 riastrad return adev->gfx.mec2_fw_version; 367 1.4 riastrad 368 1.4 riastrad case KGD_ENGINE_RLC: 369 1.4 riastrad return adev->gfx.rlc_fw_version; 370 1.4 riastrad 371 1.4 riastrad case KGD_ENGINE_SDMA1: 372 1.4 riastrad return adev->sdma.instance[0].fw_version; 373 1.4 riastrad 374 1.4 riastrad case KGD_ENGINE_SDMA2: 375 1.4 riastrad return adev->sdma.instance[1].fw_version; 376 1.4 riastrad 377 1.4 riastrad default: 378 1.4 riastrad return 0; 379 1.4 riastrad } 380 1.4 riastrad 381 1.4 riastrad return 0; 382 1.4 riastrad } 383 1.4 riastrad 384 1.4 riastrad void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, 385 1.4 riastrad struct kfd_local_mem_info *mem_info) 386 1.4 riastrad { 387 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 388 1.6 riastrad #ifdef __NetBSD__ 389 1.6 riastrad uint64_t address_mask = ~(uint64_t)0; /* XXX */ 390 1.6 riastrad #else 391 1.4 riastrad uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : 392 1.4 riastrad ~((1ULL << 32) - 1); 393 1.6 riastrad #endif 394 1.4 riastrad resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; 395 1.4 riastrad 396 1.4 riastrad memset(mem_info, 0, sizeof(*mem_info)); 397 1.4 riastrad if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { 398 1.4 riastrad mem_info->local_mem_size_public = adev->gmc.visible_vram_size; 399 1.4 riastrad mem_info->local_mem_size_private = adev->gmc.real_vram_size - 400 1.4 riastrad adev->gmc.visible_vram_size; 401 1.4 riastrad } else { 402 1.4 riastrad mem_info->local_mem_size_public = 0; 403 1.4 riastrad mem_info->local_mem_size_private = adev->gmc.real_vram_size; 404 1.4 riastrad } 405 1.4 riastrad mem_info->vram_width = adev->gmc.vram_width; 406 1.4 riastrad 407 1.6 riastrad pr_debug("Address base: %pap limit %pap public 0x%"PRIx64" private 0x%"PRIx64"\n", 408 1.4 riastrad &adev->gmc.aper_base, &aper_limit, 409 1.4 riastrad mem_info->local_mem_size_public, 410 1.4 riastrad mem_info->local_mem_size_private); 411 1.4 riastrad 412 1.4 riastrad if (amdgpu_sriov_vf(adev)) 413 1.4 riastrad mem_info->mem_clk_max = adev->clock.default_mclk / 100; 414 1.4 riastrad else if (adev->powerplay.pp_funcs) { 415 1.4 riastrad if (amdgpu_emu_mode == 1) 416 1.4 riastrad mem_info->mem_clk_max = 0; 417 1.4 riastrad else 418 1.4 riastrad mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; 419 1.4 riastrad } else 420 1.4 riastrad mem_info->mem_clk_max = 100; 421 1.4 riastrad } 422 1.4 riastrad 423 1.4 riastrad uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) 424 1.4 riastrad { 425 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 426 1.4 riastrad 427 1.4 riastrad if (adev->gfx.funcs->get_gpu_clock_counter) 428 1.4 riastrad return adev->gfx.funcs->get_gpu_clock_counter(adev); 429 1.4 riastrad return 0; 430 1.4 riastrad } 431 1.4 riastrad 432 1.4 riastrad uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) 433 1.4 riastrad { 434 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 435 1.1 riastrad 436 1.4 riastrad /* the sclk is in quantas of 10kHz */ 437 1.4 riastrad if (amdgpu_sriov_vf(adev)) 438 1.4 riastrad return adev->clock.default_sclk / 100; 439 1.4 riastrad else if (adev->powerplay.pp_funcs) 440 1.4 riastrad return amdgpu_dpm_get_sclk(adev, false) / 100; 441 1.4 riastrad else 442 1.4 riastrad return 100; 443 1.4 riastrad } 444 1.4 riastrad 445 1.4 riastrad void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) 446 1.4 riastrad { 447 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 448 1.4 riastrad struct amdgpu_cu_info acu_info = adev->gfx.cu_info; 449 1.4 riastrad 450 1.4 riastrad memset(cu_info, 0, sizeof(*cu_info)); 451 1.4 riastrad if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) 452 1.4 riastrad return; 453 1.4 riastrad 454 1.4 riastrad cu_info->cu_active_number = acu_info.number; 455 1.4 riastrad cu_info->cu_ao_mask = acu_info.ao_cu_mask; 456 1.4 riastrad memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], 457 1.4 riastrad sizeof(acu_info.bitmap)); 458 1.4 riastrad cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; 459 1.4 riastrad cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; 460 1.4 riastrad cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; 461 1.4 riastrad cu_info->simd_per_cu = acu_info.simd_per_cu; 462 1.4 riastrad cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; 463 1.4 riastrad cu_info->wave_front_size = acu_info.wave_front_size; 464 1.4 riastrad cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; 465 1.4 riastrad cu_info->lds_size = acu_info.lds_size; 466 1.4 riastrad } 467 1.4 riastrad 468 1.4 riastrad int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, 469 1.4 riastrad struct kgd_dev **dma_buf_kgd, 470 1.4 riastrad uint64_t *bo_size, void *metadata_buffer, 471 1.4 riastrad size_t buffer_size, uint32_t *metadata_size, 472 1.4 riastrad uint32_t *flags) 473 1.4 riastrad { 474 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 475 1.4 riastrad struct dma_buf *dma_buf; 476 1.4 riastrad struct drm_gem_object *obj; 477 1.4 riastrad struct amdgpu_bo *bo; 478 1.4 riastrad uint64_t metadata_flags; 479 1.4 riastrad int r = -EINVAL; 480 1.4 riastrad 481 1.4 riastrad dma_buf = dma_buf_get(dma_buf_fd); 482 1.4 riastrad if (IS_ERR(dma_buf)) 483 1.4 riastrad return PTR_ERR(dma_buf); 484 1.4 riastrad 485 1.4 riastrad if (dma_buf->ops != &amdgpu_dmabuf_ops) 486 1.4 riastrad /* Can't handle non-graphics buffers */ 487 1.4 riastrad goto out_put; 488 1.4 riastrad 489 1.4 riastrad obj = dma_buf->priv; 490 1.4 riastrad if (obj->dev->driver != adev->ddev->driver) 491 1.4 riastrad /* Can't handle buffers from different drivers */ 492 1.4 riastrad goto out_put; 493 1.4 riastrad 494 1.4 riastrad adev = obj->dev->dev_private; 495 1.4 riastrad bo = gem_to_amdgpu_bo(obj); 496 1.4 riastrad if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | 497 1.4 riastrad AMDGPU_GEM_DOMAIN_GTT))) 498 1.4 riastrad /* Only VRAM and GTT BOs are supported */ 499 1.4 riastrad goto out_put; 500 1.4 riastrad 501 1.4 riastrad r = 0; 502 1.4 riastrad if (dma_buf_kgd) 503 1.4 riastrad *dma_buf_kgd = (struct kgd_dev *)adev; 504 1.4 riastrad if (bo_size) 505 1.4 riastrad *bo_size = amdgpu_bo_size(bo); 506 1.4 riastrad if (metadata_size) 507 1.4 riastrad *metadata_size = bo->metadata_size; 508 1.4 riastrad if (metadata_buffer) 509 1.4 riastrad r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, 510 1.4 riastrad metadata_size, &metadata_flags); 511 1.4 riastrad if (flags) { 512 1.4 riastrad *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? 513 1.4 riastrad ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; 514 1.4 riastrad 515 1.4 riastrad if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) 516 1.4 riastrad *flags |= ALLOC_MEM_FLAGS_PUBLIC; 517 1.4 riastrad } 518 1.4 riastrad 519 1.4 riastrad out_put: 520 1.4 riastrad dma_buf_put(dma_buf); 521 1.1 riastrad return r; 522 1.1 riastrad } 523 1.1 riastrad 524 1.4 riastrad uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) 525 1.4 riastrad { 526 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 527 1.4 riastrad 528 1.4 riastrad return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); 529 1.4 riastrad } 530 1.4 riastrad 531 1.4 riastrad uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) 532 1.4 riastrad { 533 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 534 1.4 riastrad 535 1.4 riastrad return adev->gmc.xgmi.hive_id; 536 1.4 riastrad } 537 1.4 riastrad uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) 538 1.4 riastrad { 539 1.4 riastrad struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; 540 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)dst; 541 1.4 riastrad int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); 542 1.4 riastrad 543 1.4 riastrad if (ret < 0) { 544 1.4 riastrad DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", 545 1.4 riastrad adev->gmc.xgmi.physical_node_id, 546 1.4 riastrad peer_adev->gmc.xgmi.physical_node_id, ret); 547 1.4 riastrad ret = 0; 548 1.4 riastrad } 549 1.4 riastrad return (uint8_t)ret; 550 1.4 riastrad } 551 1.4 riastrad 552 1.4 riastrad uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) 553 1.4 riastrad { 554 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 555 1.4 riastrad 556 1.4 riastrad return adev->rmmio_remap.bus_addr; 557 1.4 riastrad } 558 1.4 riastrad 559 1.4 riastrad uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) 560 1.4 riastrad { 561 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 562 1.4 riastrad 563 1.4 riastrad return adev->gds.gws_size; 564 1.4 riastrad } 565 1.4 riastrad 566 1.4 riastrad int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, 567 1.4 riastrad uint32_t vmid, uint64_t gpu_addr, 568 1.4 riastrad uint32_t *ib_cmd, uint32_t ib_len) 569 1.4 riastrad { 570 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 571 1.4 riastrad struct amdgpu_job *job; 572 1.4 riastrad struct amdgpu_ib *ib; 573 1.4 riastrad struct amdgpu_ring *ring; 574 1.4 riastrad struct dma_fence *f = NULL; 575 1.4 riastrad int ret; 576 1.4 riastrad 577 1.4 riastrad switch (engine) { 578 1.4 riastrad case KGD_ENGINE_MEC1: 579 1.4 riastrad ring = &adev->gfx.compute_ring[0]; 580 1.4 riastrad break; 581 1.4 riastrad case KGD_ENGINE_SDMA1: 582 1.4 riastrad ring = &adev->sdma.instance[0].ring; 583 1.4 riastrad break; 584 1.4 riastrad case KGD_ENGINE_SDMA2: 585 1.4 riastrad ring = &adev->sdma.instance[1].ring; 586 1.4 riastrad break; 587 1.4 riastrad default: 588 1.4 riastrad pr_err("Invalid engine in IB submission: %d\n", engine); 589 1.4 riastrad ret = -EINVAL; 590 1.4 riastrad goto err; 591 1.4 riastrad } 592 1.4 riastrad 593 1.4 riastrad ret = amdgpu_job_alloc(adev, 1, &job, NULL); 594 1.4 riastrad if (ret) 595 1.4 riastrad goto err; 596 1.4 riastrad 597 1.4 riastrad ib = &job->ibs[0]; 598 1.4 riastrad memset(ib, 0, sizeof(struct amdgpu_ib)); 599 1.4 riastrad 600 1.4 riastrad ib->gpu_addr = gpu_addr; 601 1.4 riastrad ib->ptr = ib_cmd; 602 1.4 riastrad ib->length_dw = ib_len; 603 1.4 riastrad /* This works for NO_HWS. TODO: need to handle without knowing VMID */ 604 1.4 riastrad job->vmid = vmid; 605 1.4 riastrad 606 1.4 riastrad ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); 607 1.4 riastrad if (ret) { 608 1.4 riastrad DRM_ERROR("amdgpu: failed to schedule IB.\n"); 609 1.4 riastrad goto err_ib_sched; 610 1.4 riastrad } 611 1.4 riastrad 612 1.4 riastrad ret = dma_fence_wait(f, false); 613 1.4 riastrad 614 1.4 riastrad err_ib_sched: 615 1.4 riastrad dma_fence_put(f); 616 1.4 riastrad amdgpu_job_free(job); 617 1.4 riastrad err: 618 1.4 riastrad return ret; 619 1.4 riastrad } 620 1.4 riastrad 621 1.4 riastrad void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) 622 1.4 riastrad { 623 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 624 1.4 riastrad 625 1.4 riastrad amdgpu_dpm_switch_power_profile(adev, 626 1.4 riastrad PP_SMC_POWER_PROFILE_COMPUTE, 627 1.4 riastrad !idle); 628 1.4 riastrad } 629 1.4 riastrad 630 1.4 riastrad bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) 631 1.4 riastrad { 632 1.4 riastrad if (adev->kfd.dev) { 633 1.4 riastrad if ((1 << vmid) & compute_vmid_bitmap) 634 1.4 riastrad return true; 635 1.4 riastrad } 636 1.4 riastrad 637 1.4 riastrad return false; 638 1.4 riastrad } 639 1.4 riastrad 640 1.4 riastrad int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) 641 1.4 riastrad { 642 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 643 1.4 riastrad 644 1.4 riastrad if (adev->family == AMDGPU_FAMILY_AI) { 645 1.4 riastrad int i; 646 1.4 riastrad 647 1.4 riastrad for (i = 0; i < adev->num_vmhubs; i++) 648 1.4 riastrad amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); 649 1.4 riastrad } else { 650 1.4 riastrad amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); 651 1.4 riastrad } 652 1.4 riastrad 653 1.4 riastrad return 0; 654 1.4 riastrad } 655 1.4 riastrad 656 1.4 riastrad int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) 657 1.4 riastrad { 658 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 659 1.4 riastrad uint32_t flush_type = 0; 660 1.4 riastrad bool all_hub = false; 661 1.4 riastrad 662 1.4 riastrad if (adev->gmc.xgmi.num_physical_nodes && 663 1.4 riastrad adev->asic_type == CHIP_VEGA20) 664 1.4 riastrad flush_type = 2; 665 1.4 riastrad 666 1.4 riastrad if (adev->family == AMDGPU_FAMILY_AI) 667 1.4 riastrad all_hub = true; 668 1.4 riastrad 669 1.4 riastrad return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); 670 1.4 riastrad } 671 1.4 riastrad 672 1.4 riastrad bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) 673 1.4 riastrad { 674 1.4 riastrad struct amdgpu_device *adev = (struct amdgpu_device *)kgd; 675 1.4 riastrad 676 1.4 riastrad return adev->have_atomics_support; 677 1.4 riastrad } 678 1.4 riastrad 679 1.4 riastrad #ifndef CONFIG_HSA_AMD 680 1.4 riastrad bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) 681 1.4 riastrad { 682 1.4 riastrad return false; 683 1.4 riastrad } 684 1.4 riastrad 685 1.4 riastrad void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) 686 1.4 riastrad { 687 1.4 riastrad } 688 1.4 riastrad 689 1.4 riastrad void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, 690 1.4 riastrad struct amdgpu_vm *vm) 691 1.4 riastrad { 692 1.4 riastrad } 693 1.4 riastrad 694 1.4 riastrad struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) 695 1.1 riastrad { 696 1.4 riastrad return NULL; 697 1.4 riastrad } 698 1.1 riastrad 699 1.4 riastrad int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) 700 1.4 riastrad { 701 1.4 riastrad return 0; 702 1.4 riastrad } 703 1.1 riastrad 704 1.4 riastrad struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, 705 1.4 riastrad unsigned int asic_type, bool vf) 706 1.4 riastrad { 707 1.4 riastrad return NULL; 708 1.1 riastrad } 709 1.1 riastrad 710 1.4 riastrad bool kgd2kfd_device_init(struct kfd_dev *kfd, 711 1.4 riastrad struct drm_device *ddev, 712 1.4 riastrad const struct kgd2kfd_shared_resources *gpu_resources) 713 1.1 riastrad { 714 1.4 riastrad return false; 715 1.4 riastrad } 716 1.1 riastrad 717 1.4 riastrad void kgd2kfd_device_exit(struct kfd_dev *kfd) 718 1.4 riastrad { 719 1.4 riastrad } 720 1.1 riastrad 721 1.4 riastrad void kgd2kfd_exit(void) 722 1.4 riastrad { 723 1.1 riastrad } 724 1.1 riastrad 725 1.4 riastrad void kgd2kfd_suspend(struct kfd_dev *kfd) 726 1.1 riastrad { 727 1.4 riastrad } 728 1.1 riastrad 729 1.4 riastrad int kgd2kfd_resume(struct kfd_dev *kfd) 730 1.4 riastrad { 731 1.4 riastrad return 0; 732 1.4 riastrad } 733 1.4 riastrad 734 1.4 riastrad int kgd2kfd_pre_reset(struct kfd_dev *kfd) 735 1.4 riastrad { 736 1.4 riastrad return 0; 737 1.4 riastrad } 738 1.4 riastrad 739 1.4 riastrad int kgd2kfd_post_reset(struct kfd_dev *kfd) 740 1.4 riastrad { 741 1.1 riastrad return 0; 742 1.1 riastrad } 743 1.1 riastrad 744 1.4 riastrad void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 745 1.1 riastrad { 746 1.4 riastrad } 747 1.1 riastrad 748 1.4 riastrad void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) 749 1.4 riastrad { 750 1.1 riastrad } 751 1.4 riastrad #endif 752