1 1.6 riastrad /* $NetBSD: amdgpu_gfx.c,v 1.6 2021/12/19 12:31:45 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2014 Advanced Micro Devices, Inc. 5 1.1 riastrad * Copyright 2008 Red Hat Inc. 6 1.1 riastrad * Copyright 2009 Jerome Glisse. 7 1.1 riastrad * 8 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 9 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 10 1.1 riastrad * to deal in the Software without restriction, including without limitation 11 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 13 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 14 1.1 riastrad * 15 1.1 riastrad * The above copyright notice and this permission notice shall be included in 16 1.1 riastrad * all copies or substantial portions of the Software. 17 1.1 riastrad * 18 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 1.1 riastrad * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 1.1 riastrad * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 1.1 riastrad * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 1.1 riastrad * OTHER DEALINGS IN THE SOFTWARE. 25 1.1 riastrad * 26 1.1 riastrad */ 27 1.4 riastrad 28 1.1 riastrad #include <sys/cdefs.h> 29 1.6 riastrad __KERNEL_RCSID(0, "$NetBSD: amdgpu_gfx.c,v 1.6 2021/12/19 12:31:45 riastradh Exp $"); 30 1.1 riastrad 31 1.1 riastrad #include "amdgpu.h" 32 1.3 riastrad #include "amdgpu_gfx.h" 33 1.4 riastrad #include "amdgpu_rlc.h" 34 1.4 riastrad #include "amdgpu_ras.h" 35 1.4 riastrad 36 1.4 riastrad /* delay 0.1 second to enable gfx off feature */ 37 1.4 riastrad #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) 38 1.1 riastrad 39 1.1 riastrad /* 40 1.4 riastrad * GPU GFX IP block helpers function. 41 1.1 riastrad */ 42 1.4 riastrad 43 1.4 riastrad int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, 44 1.4 riastrad int pipe, int queue) 45 1.4 riastrad { 46 1.4 riastrad int bit = 0; 47 1.4 riastrad 48 1.4 riastrad bit += mec * adev->gfx.mec.num_pipe_per_mec 49 1.4 riastrad * adev->gfx.mec.num_queue_per_pipe; 50 1.4 riastrad bit += pipe * adev->gfx.mec.num_queue_per_pipe; 51 1.4 riastrad bit += queue; 52 1.4 riastrad 53 1.4 riastrad return bit; 54 1.4 riastrad } 55 1.4 riastrad 56 1.4 riastrad void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, 57 1.4 riastrad int *mec, int *pipe, int *queue) 58 1.4 riastrad { 59 1.4 riastrad *queue = bit % adev->gfx.mec.num_queue_per_pipe; 60 1.4 riastrad *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) 61 1.4 riastrad % adev->gfx.mec.num_pipe_per_mec; 62 1.4 riastrad *mec = (bit / adev->gfx.mec.num_queue_per_pipe) 63 1.4 riastrad / adev->gfx.mec.num_pipe_per_mec; 64 1.4 riastrad 65 1.4 riastrad } 66 1.4 riastrad 67 1.4 riastrad bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, 68 1.4 riastrad int mec, int pipe, int queue) 69 1.4 riastrad { 70 1.4 riastrad return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), 71 1.4 riastrad adev->gfx.mec.queue_bitmap); 72 1.4 riastrad } 73 1.4 riastrad 74 1.4 riastrad int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, 75 1.4 riastrad int me, int pipe, int queue) 76 1.4 riastrad { 77 1.4 riastrad int bit = 0; 78 1.4 riastrad 79 1.4 riastrad bit += me * adev->gfx.me.num_pipe_per_me 80 1.4 riastrad * adev->gfx.me.num_queue_per_pipe; 81 1.4 riastrad bit += pipe * adev->gfx.me.num_queue_per_pipe; 82 1.4 riastrad bit += queue; 83 1.4 riastrad 84 1.4 riastrad return bit; 85 1.4 riastrad } 86 1.4 riastrad 87 1.4 riastrad void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, 88 1.4 riastrad int *me, int *pipe, int *queue) 89 1.4 riastrad { 90 1.4 riastrad *queue = bit % adev->gfx.me.num_queue_per_pipe; 91 1.4 riastrad *pipe = (bit / adev->gfx.me.num_queue_per_pipe) 92 1.4 riastrad % adev->gfx.me.num_pipe_per_me; 93 1.4 riastrad *me = (bit / adev->gfx.me.num_queue_per_pipe) 94 1.4 riastrad / adev->gfx.me.num_pipe_per_me; 95 1.4 riastrad } 96 1.4 riastrad 97 1.4 riastrad bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, 98 1.4 riastrad int me, int pipe, int queue) 99 1.4 riastrad { 100 1.4 riastrad return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), 101 1.4 riastrad adev->gfx.me.queue_bitmap); 102 1.4 riastrad } 103 1.4 riastrad 104 1.1 riastrad /** 105 1.1 riastrad * amdgpu_gfx_scratch_get - Allocate a scratch register 106 1.1 riastrad * 107 1.1 riastrad * @adev: amdgpu_device pointer 108 1.1 riastrad * @reg: scratch register mmio offset 109 1.1 riastrad * 110 1.1 riastrad * Allocate a CP scratch register for use by the driver (all asics). 111 1.1 riastrad * Returns 0 on success or -EINVAL on failure. 112 1.1 riastrad */ 113 1.1 riastrad int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg) 114 1.1 riastrad { 115 1.1 riastrad int i; 116 1.1 riastrad 117 1.4 riastrad i = ffs(adev->gfx.scratch.free_mask); 118 1.4 riastrad if (i != 0 && i <= adev->gfx.scratch.num_reg) { 119 1.4 riastrad i--; 120 1.4 riastrad adev->gfx.scratch.free_mask &= ~(1u << i); 121 1.4 riastrad *reg = adev->gfx.scratch.reg_base + i; 122 1.4 riastrad return 0; 123 1.1 riastrad } 124 1.1 riastrad return -EINVAL; 125 1.1 riastrad } 126 1.1 riastrad 127 1.1 riastrad /** 128 1.1 riastrad * amdgpu_gfx_scratch_free - Free a scratch register 129 1.1 riastrad * 130 1.1 riastrad * @adev: amdgpu_device pointer 131 1.1 riastrad * @reg: scratch register mmio offset 132 1.1 riastrad * 133 1.1 riastrad * Free a CP scratch register allocated for use by the driver (all asics) 134 1.1 riastrad */ 135 1.1 riastrad void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg) 136 1.1 riastrad { 137 1.4 riastrad adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base); 138 1.4 riastrad } 139 1.4 riastrad 140 1.4 riastrad /** 141 1.4 riastrad * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter 142 1.4 riastrad * 143 1.4 riastrad * @mask: array in which the per-shader array disable masks will be stored 144 1.4 riastrad * @max_se: number of SEs 145 1.4 riastrad * @max_sh: number of SHs 146 1.4 riastrad * 147 1.4 riastrad * The bitmask of CUs to be disabled in the shader array determined by se and 148 1.4 riastrad * sh is stored in mask[se * max_sh + sh]. 149 1.4 riastrad */ 150 1.4 riastrad void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh) 151 1.4 riastrad { 152 1.4 riastrad unsigned se, sh, cu; 153 1.4 riastrad const char *p; 154 1.4 riastrad 155 1.4 riastrad memset(mask, 0, sizeof(*mask) * max_se * max_sh); 156 1.4 riastrad 157 1.4 riastrad if (!amdgpu_disable_cu || !*amdgpu_disable_cu) 158 1.4 riastrad return; 159 1.4 riastrad 160 1.4 riastrad p = amdgpu_disable_cu; 161 1.4 riastrad for (;;) { 162 1.4 riastrad char *next; 163 1.4 riastrad int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); 164 1.4 riastrad if (ret < 3) { 165 1.4 riastrad DRM_ERROR("amdgpu: could not parse disable_cu\n"); 166 1.4 riastrad return; 167 1.4 riastrad } 168 1.4 riastrad 169 1.4 riastrad if (se < max_se && sh < max_sh && cu < 16) { 170 1.4 riastrad DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu); 171 1.4 riastrad mask[se * max_sh + sh] |= 1u << cu; 172 1.4 riastrad } else { 173 1.4 riastrad DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n", 174 1.4 riastrad se, sh, cu); 175 1.4 riastrad } 176 1.4 riastrad 177 1.4 riastrad next = strchr(p, ','); 178 1.4 riastrad if (!next) 179 1.4 riastrad break; 180 1.4 riastrad p = next + 1; 181 1.4 riastrad } 182 1.4 riastrad } 183 1.4 riastrad 184 1.4 riastrad static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) 185 1.4 riastrad { 186 1.4 riastrad if (amdgpu_compute_multipipe != -1) { 187 1.4 riastrad DRM_INFO("amdgpu: forcing compute pipe policy %d\n", 188 1.4 riastrad amdgpu_compute_multipipe); 189 1.4 riastrad return amdgpu_compute_multipipe == 1; 190 1.4 riastrad } 191 1.4 riastrad 192 1.4 riastrad /* FIXME: spreading the queues across pipes causes perf regressions 193 1.4 riastrad * on POLARIS11 compute workloads */ 194 1.4 riastrad if (adev->asic_type == CHIP_POLARIS11) 195 1.4 riastrad return false; 196 1.4 riastrad 197 1.4 riastrad return adev->gfx.mec.num_mec > 1; 198 1.4 riastrad } 199 1.4 riastrad 200 1.4 riastrad void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) 201 1.4 riastrad { 202 1.4 riastrad int i, queue, pipe, mec; 203 1.4 riastrad bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); 204 1.4 riastrad 205 1.4 riastrad /* policy for amdgpu compute queue ownership */ 206 1.4 riastrad for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 207 1.4 riastrad queue = i % adev->gfx.mec.num_queue_per_pipe; 208 1.4 riastrad pipe = (i / adev->gfx.mec.num_queue_per_pipe) 209 1.4 riastrad % adev->gfx.mec.num_pipe_per_mec; 210 1.4 riastrad mec = (i / adev->gfx.mec.num_queue_per_pipe) 211 1.4 riastrad / adev->gfx.mec.num_pipe_per_mec; 212 1.4 riastrad 213 1.4 riastrad /* we've run out of HW */ 214 1.4 riastrad if (mec >= adev->gfx.mec.num_mec) 215 1.4 riastrad break; 216 1.4 riastrad 217 1.4 riastrad if (multipipe_policy) { 218 1.4 riastrad /* policy: amdgpu owns the first two queues of the first MEC */ 219 1.4 riastrad if (mec == 0 && queue < 2) 220 1.4 riastrad set_bit(i, adev->gfx.mec.queue_bitmap); 221 1.4 riastrad } else { 222 1.4 riastrad /* policy: amdgpu owns all queues in the first pipe */ 223 1.4 riastrad if (mec == 0 && pipe == 0) 224 1.4 riastrad set_bit(i, adev->gfx.mec.queue_bitmap); 225 1.4 riastrad } 226 1.4 riastrad } 227 1.4 riastrad 228 1.4 riastrad /* update the number of active compute rings */ 229 1.4 riastrad adev->gfx.num_compute_rings = 230 1.4 riastrad bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 231 1.4 riastrad 232 1.4 riastrad /* If you hit this case and edited the policy, you probably just 233 1.4 riastrad * need to increase AMDGPU_MAX_COMPUTE_RINGS */ 234 1.4 riastrad if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) 235 1.4 riastrad adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 236 1.4 riastrad } 237 1.4 riastrad 238 1.4 riastrad void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) 239 1.4 riastrad { 240 1.4 riastrad int i, queue, me; 241 1.4 riastrad 242 1.4 riastrad for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { 243 1.4 riastrad queue = i % adev->gfx.me.num_queue_per_pipe; 244 1.4 riastrad me = (i / adev->gfx.me.num_queue_per_pipe) 245 1.4 riastrad / adev->gfx.me.num_pipe_per_me; 246 1.4 riastrad 247 1.4 riastrad if (me >= adev->gfx.me.num_me) 248 1.4 riastrad break; 249 1.4 riastrad /* policy: amdgpu owns the first queue per pipe at this stage 250 1.4 riastrad * will extend to mulitple queues per pipe later */ 251 1.4 riastrad if (me == 0 && queue < 1) 252 1.4 riastrad set_bit(i, adev->gfx.me.queue_bitmap); 253 1.4 riastrad } 254 1.4 riastrad 255 1.4 riastrad /* update the number of active graphics rings */ 256 1.4 riastrad adev->gfx.num_gfx_rings = 257 1.4 riastrad bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 258 1.4 riastrad } 259 1.4 riastrad 260 1.4 riastrad static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, 261 1.4 riastrad struct amdgpu_ring *ring) 262 1.4 riastrad { 263 1.4 riastrad int queue_bit; 264 1.4 riastrad int mec, pipe, queue; 265 1.4 riastrad 266 1.4 riastrad queue_bit = adev->gfx.mec.num_mec 267 1.4 riastrad * adev->gfx.mec.num_pipe_per_mec 268 1.4 riastrad * adev->gfx.mec.num_queue_per_pipe; 269 1.4 riastrad 270 1.4 riastrad while (queue_bit-- >= 0) { 271 1.4 riastrad if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) 272 1.4 riastrad continue; 273 1.4 riastrad 274 1.4 riastrad amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); 275 1.4 riastrad 276 1.4 riastrad /* 277 1.4 riastrad * 1. Using pipes 2/3 from MEC 2 seems cause problems. 278 1.4 riastrad * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN 279 1.4 riastrad * only can be issued on queue 0. 280 1.4 riastrad */ 281 1.4 riastrad if ((mec == 1 && pipe > 1) || queue != 0) 282 1.4 riastrad continue; 283 1.4 riastrad 284 1.4 riastrad ring->me = mec + 1; 285 1.4 riastrad ring->pipe = pipe; 286 1.4 riastrad ring->queue = queue; 287 1.4 riastrad 288 1.4 riastrad return 0; 289 1.4 riastrad } 290 1.4 riastrad 291 1.4 riastrad dev_err(adev->dev, "Failed to find a queue for KIQ\n"); 292 1.4 riastrad return -EINVAL; 293 1.4 riastrad } 294 1.4 riastrad 295 1.4 riastrad int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, 296 1.4 riastrad struct amdgpu_ring *ring, 297 1.4 riastrad struct amdgpu_irq_src *irq) 298 1.4 riastrad { 299 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 300 1.4 riastrad int r = 0; 301 1.4 riastrad 302 1.4 riastrad spin_lock_init(&kiq->ring_lock); 303 1.4 riastrad 304 1.4 riastrad r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); 305 1.4 riastrad if (r) 306 1.4 riastrad return r; 307 1.4 riastrad 308 1.4 riastrad ring->adev = NULL; 309 1.4 riastrad ring->ring_obj = NULL; 310 1.4 riastrad ring->use_doorbell = true; 311 1.4 riastrad ring->doorbell_index = adev->doorbell_index.kiq; 312 1.4 riastrad 313 1.4 riastrad r = amdgpu_gfx_kiq_acquire(adev, ring); 314 1.4 riastrad if (r) 315 1.4 riastrad return r; 316 1.4 riastrad 317 1.4 riastrad ring->eop_gpu_addr = kiq->eop_gpu_addr; 318 1.5 riastrad snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); 319 1.4 riastrad r = amdgpu_ring_init(adev, ring, 1024, 320 1.4 riastrad irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); 321 1.4 riastrad if (r) 322 1.4 riastrad dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 323 1.4 riastrad 324 1.4 riastrad return r; 325 1.4 riastrad } 326 1.4 riastrad 327 1.4 riastrad void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) 328 1.4 riastrad { 329 1.4 riastrad amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); 330 1.4 riastrad amdgpu_ring_fini(ring); 331 1.6 riastrad spin_lock_destroy(&ring->adev->gfx.kiq.ring_lock); 332 1.4 riastrad } 333 1.4 riastrad 334 1.4 riastrad void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) 335 1.4 riastrad { 336 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 337 1.4 riastrad 338 1.4 riastrad amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 339 1.4 riastrad } 340 1.4 riastrad 341 1.4 riastrad int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, 342 1.4 riastrad unsigned hpd_size) 343 1.4 riastrad { 344 1.4 riastrad int r; 345 1.4 riastrad u32 *hpd; 346 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 347 1.4 riastrad 348 1.4 riastrad r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, 349 1.4 riastrad AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 350 1.4 riastrad &kiq->eop_gpu_addr, (void **)&hpd); 351 1.4 riastrad if (r) { 352 1.4 riastrad dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 353 1.4 riastrad return r; 354 1.4 riastrad } 355 1.4 riastrad 356 1.4 riastrad memset(hpd, 0, hpd_size); 357 1.4 riastrad 358 1.4 riastrad r = amdgpu_bo_reserve(kiq->eop_obj, true); 359 1.4 riastrad if (unlikely(r != 0)) 360 1.4 riastrad dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 361 1.4 riastrad amdgpu_bo_kunmap(kiq->eop_obj); 362 1.4 riastrad amdgpu_bo_unreserve(kiq->eop_obj); 363 1.4 riastrad 364 1.4 riastrad return 0; 365 1.4 riastrad } 366 1.4 riastrad 367 1.4 riastrad /* create MQD for each compute/gfx queue */ 368 1.4 riastrad int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, 369 1.4 riastrad unsigned mqd_size) 370 1.4 riastrad { 371 1.4 riastrad struct amdgpu_ring *ring = NULL; 372 1.4 riastrad int r, i; 373 1.4 riastrad 374 1.4 riastrad /* create MQD for KIQ */ 375 1.4 riastrad ring = &adev->gfx.kiq.ring; 376 1.4 riastrad if (!ring->mqd_obj) { 377 1.4 riastrad /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must 378 1.4 riastrad * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD 379 1.4 riastrad * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for 380 1.4 riastrad * KIQ MQD no matter SRIOV or Bare-metal 381 1.4 riastrad */ 382 1.4 riastrad r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 383 1.4 riastrad AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj, 384 1.4 riastrad &ring->mqd_gpu_addr, &ring->mqd_ptr); 385 1.4 riastrad if (r) { 386 1.4 riastrad dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 387 1.4 riastrad return r; 388 1.4 riastrad } 389 1.4 riastrad 390 1.4 riastrad /* prepare MQD backup */ 391 1.4 riastrad adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); 392 1.4 riastrad if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) 393 1.4 riastrad dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 394 1.4 riastrad } 395 1.4 riastrad 396 1.4 riastrad if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { 397 1.4 riastrad /* create MQD for each KGQ */ 398 1.4 riastrad for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 399 1.4 riastrad ring = &adev->gfx.gfx_ring[i]; 400 1.4 riastrad if (!ring->mqd_obj) { 401 1.4 riastrad r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 402 1.4 riastrad AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 403 1.4 riastrad &ring->mqd_gpu_addr, &ring->mqd_ptr); 404 1.4 riastrad if (r) { 405 1.4 riastrad dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 406 1.4 riastrad return r; 407 1.4 riastrad } 408 1.4 riastrad 409 1.4 riastrad /* prepare MQD backup */ 410 1.4 riastrad adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); 411 1.4 riastrad if (!adev->gfx.me.mqd_backup[i]) 412 1.4 riastrad dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 413 1.4 riastrad } 414 1.4 riastrad } 415 1.4 riastrad } 416 1.4 riastrad 417 1.4 riastrad /* create MQD for each KCQ */ 418 1.4 riastrad for (i = 0; i < adev->gfx.num_compute_rings; i++) { 419 1.4 riastrad ring = &adev->gfx.compute_ring[i]; 420 1.4 riastrad if (!ring->mqd_obj) { 421 1.4 riastrad r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 422 1.4 riastrad AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 423 1.4 riastrad &ring->mqd_gpu_addr, &ring->mqd_ptr); 424 1.4 riastrad if (r) { 425 1.4 riastrad dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 426 1.4 riastrad return r; 427 1.4 riastrad } 428 1.4 riastrad 429 1.4 riastrad /* prepare MQD backup */ 430 1.4 riastrad adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); 431 1.4 riastrad if (!adev->gfx.mec.mqd_backup[i]) 432 1.4 riastrad dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 433 1.4 riastrad } 434 1.4 riastrad } 435 1.4 riastrad 436 1.4 riastrad return 0; 437 1.4 riastrad } 438 1.4 riastrad 439 1.4 riastrad void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) 440 1.4 riastrad { 441 1.4 riastrad struct amdgpu_ring *ring = NULL; 442 1.1 riastrad int i; 443 1.1 riastrad 444 1.4 riastrad if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { 445 1.4 riastrad for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 446 1.4 riastrad ring = &adev->gfx.gfx_ring[i]; 447 1.4 riastrad kfree(adev->gfx.me.mqd_backup[i]); 448 1.4 riastrad amdgpu_bo_free_kernel(&ring->mqd_obj, 449 1.4 riastrad &ring->mqd_gpu_addr, 450 1.4 riastrad &ring->mqd_ptr); 451 1.4 riastrad } 452 1.4 riastrad } 453 1.4 riastrad 454 1.4 riastrad for (i = 0; i < adev->gfx.num_compute_rings; i++) { 455 1.4 riastrad ring = &adev->gfx.compute_ring[i]; 456 1.4 riastrad kfree(adev->gfx.mec.mqd_backup[i]); 457 1.4 riastrad amdgpu_bo_free_kernel(&ring->mqd_obj, 458 1.4 riastrad &ring->mqd_gpu_addr, 459 1.4 riastrad &ring->mqd_ptr); 460 1.4 riastrad } 461 1.4 riastrad 462 1.4 riastrad ring = &adev->gfx.kiq.ring; 463 1.4 riastrad kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); 464 1.4 riastrad amdgpu_bo_free_kernel(&ring->mqd_obj, 465 1.4 riastrad &ring->mqd_gpu_addr, 466 1.4 riastrad &ring->mqd_ptr); 467 1.4 riastrad } 468 1.4 riastrad 469 1.4 riastrad int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) 470 1.4 riastrad { 471 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 472 1.4 riastrad struct amdgpu_ring *kiq_ring = &kiq->ring; 473 1.4 riastrad int i; 474 1.4 riastrad 475 1.4 riastrad if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 476 1.4 riastrad return -EINVAL; 477 1.4 riastrad 478 1.4 riastrad if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 479 1.4 riastrad adev->gfx.num_compute_rings)) 480 1.4 riastrad return -ENOMEM; 481 1.4 riastrad 482 1.4 riastrad for (i = 0; i < adev->gfx.num_compute_rings; i++) 483 1.4 riastrad kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], 484 1.4 riastrad RESET_QUEUES, 0, 0); 485 1.4 riastrad 486 1.4 riastrad return amdgpu_ring_test_ring(kiq_ring); 487 1.4 riastrad } 488 1.4 riastrad 489 1.4 riastrad int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) 490 1.4 riastrad { 491 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 492 1.4 riastrad struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 493 1.4 riastrad uint64_t queue_mask = 0; 494 1.4 riastrad int r, i; 495 1.4 riastrad 496 1.4 riastrad if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) 497 1.4 riastrad return -EINVAL; 498 1.4 riastrad 499 1.4 riastrad for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 500 1.4 riastrad if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 501 1.4 riastrad continue; 502 1.4 riastrad 503 1.4 riastrad /* This situation may be hit in the future if a new HW 504 1.4 riastrad * generation exposes more than 64 queues. If so, the 505 1.4 riastrad * definition of queue_mask needs updating */ 506 1.4 riastrad if (WARN_ON(i > (sizeof(queue_mask)*8))) { 507 1.4 riastrad DRM_ERROR("Invalid KCQ enabled: %d\n", i); 508 1.4 riastrad break; 509 1.1 riastrad } 510 1.4 riastrad 511 1.4 riastrad queue_mask |= (1ull << i); 512 1.4 riastrad } 513 1.4 riastrad 514 1.4 riastrad DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, 515 1.4 riastrad kiq_ring->queue); 516 1.4 riastrad 517 1.4 riastrad r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 518 1.4 riastrad adev->gfx.num_compute_rings + 519 1.4 riastrad kiq->pmf->set_resources_size); 520 1.4 riastrad if (r) { 521 1.4 riastrad DRM_ERROR("Failed to lock KIQ (%d).\n", r); 522 1.4 riastrad return r; 523 1.1 riastrad } 524 1.4 riastrad 525 1.4 riastrad kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); 526 1.4 riastrad for (i = 0; i < adev->gfx.num_compute_rings; i++) 527 1.4 riastrad kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); 528 1.4 riastrad 529 1.4 riastrad r = amdgpu_ring_test_helper(kiq_ring); 530 1.4 riastrad if (r) 531 1.4 riastrad DRM_ERROR("KCQ enable failed\n"); 532 1.4 riastrad 533 1.4 riastrad return r; 534 1.4 riastrad } 535 1.4 riastrad 536 1.4 riastrad /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable 537 1.4 riastrad * 538 1.4 riastrad * @adev: amdgpu_device pointer 539 1.4 riastrad * @bool enable true: enable gfx off feature, false: disable gfx off feature 540 1.4 riastrad * 541 1.4 riastrad * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. 542 1.4 riastrad * 2. other client can send request to disable gfx off feature, the request should be honored. 543 1.4 riastrad * 3. other client can cancel their request of disable gfx off feature 544 1.4 riastrad * 4. other client should not send request to enable gfx off feature before disable gfx off feature. 545 1.4 riastrad */ 546 1.4 riastrad 547 1.4 riastrad void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) 548 1.4 riastrad { 549 1.4 riastrad if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) 550 1.4 riastrad return; 551 1.4 riastrad 552 1.4 riastrad mutex_lock(&adev->gfx.gfx_off_mutex); 553 1.4 riastrad 554 1.4 riastrad if (!enable) 555 1.4 riastrad adev->gfx.gfx_off_req_count++; 556 1.4 riastrad else if (adev->gfx.gfx_off_req_count > 0) 557 1.4 riastrad adev->gfx.gfx_off_req_count--; 558 1.4 riastrad 559 1.4 riastrad if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { 560 1.4 riastrad schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); 561 1.4 riastrad } else if (!enable && adev->gfx.gfx_off_state) { 562 1.4 riastrad if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) 563 1.4 riastrad adev->gfx.gfx_off_state = false; 564 1.4 riastrad } 565 1.4 riastrad 566 1.4 riastrad mutex_unlock(&adev->gfx.gfx_off_mutex); 567 1.4 riastrad } 568 1.4 riastrad 569 1.4 riastrad int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) 570 1.4 riastrad { 571 1.4 riastrad int r; 572 1.4 riastrad struct ras_fs_if fs_info = { 573 1.4 riastrad .sysfs_name = "gfx_err_count", 574 1.4 riastrad .debugfs_name = "gfx_err_inject", 575 1.4 riastrad }; 576 1.4 riastrad struct ras_ih_if ih_info = { 577 1.4 riastrad .cb = amdgpu_gfx_process_ras_data_cb, 578 1.4 riastrad }; 579 1.4 riastrad 580 1.4 riastrad if (!adev->gfx.ras_if) { 581 1.4 riastrad adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); 582 1.4 riastrad if (!adev->gfx.ras_if) 583 1.4 riastrad return -ENOMEM; 584 1.4 riastrad adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; 585 1.4 riastrad adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 586 1.4 riastrad adev->gfx.ras_if->sub_block_index = 0; 587 1.4 riastrad strcpy(adev->gfx.ras_if->name, "gfx"); 588 1.4 riastrad } 589 1.4 riastrad fs_info.head = ih_info.head = *adev->gfx.ras_if; 590 1.4 riastrad 591 1.4 riastrad r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, 592 1.4 riastrad &fs_info, &ih_info); 593 1.4 riastrad if (r) 594 1.4 riastrad goto free; 595 1.4 riastrad 596 1.4 riastrad if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { 597 1.4 riastrad r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 598 1.4 riastrad if (r) 599 1.4 riastrad goto late_fini; 600 1.4 riastrad } else { 601 1.4 riastrad /* free gfx ras_if if ras is not supported */ 602 1.4 riastrad r = 0; 603 1.4 riastrad goto free; 604 1.4 riastrad } 605 1.4 riastrad 606 1.4 riastrad return 0; 607 1.4 riastrad late_fini: 608 1.4 riastrad amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); 609 1.4 riastrad free: 610 1.4 riastrad kfree(adev->gfx.ras_if); 611 1.4 riastrad adev->gfx.ras_if = NULL; 612 1.4 riastrad return r; 613 1.4 riastrad } 614 1.4 riastrad 615 1.4 riastrad void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) 616 1.4 riastrad { 617 1.4 riastrad if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 618 1.4 riastrad adev->gfx.ras_if) { 619 1.4 riastrad struct ras_common_if *ras_if = adev->gfx.ras_if; 620 1.4 riastrad struct ras_ih_if ih_info = { 621 1.4 riastrad .head = *ras_if, 622 1.4 riastrad .cb = amdgpu_gfx_process_ras_data_cb, 623 1.4 riastrad }; 624 1.4 riastrad 625 1.4 riastrad amdgpu_ras_late_fini(adev, ras_if, &ih_info); 626 1.4 riastrad kfree(ras_if); 627 1.4 riastrad } 628 1.4 riastrad } 629 1.4 riastrad 630 1.4 riastrad int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, 631 1.4 riastrad void *err_data, 632 1.4 riastrad struct amdgpu_iv_entry *entry) 633 1.4 riastrad { 634 1.4 riastrad /* TODO ue will trigger an interrupt. 635 1.4 riastrad * 636 1.4 riastrad * When Full RAS is enabled, the per-IP interrupt sources should 637 1.4 riastrad * be disabled and the driver should only look for the aggregated 638 1.4 riastrad * interrupt via sync flood 639 1.4 riastrad */ 640 1.4 riastrad if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 641 1.4 riastrad kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 642 1.4 riastrad if (adev->gfx.funcs->query_ras_error_count) 643 1.4 riastrad adev->gfx.funcs->query_ras_error_count(adev, err_data); 644 1.4 riastrad amdgpu_ras_reset_gpu(adev); 645 1.4 riastrad } 646 1.4 riastrad return AMDGPU_RAS_SUCCESS; 647 1.4 riastrad } 648 1.4 riastrad 649 1.4 riastrad int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, 650 1.4 riastrad struct amdgpu_irq_src *source, 651 1.4 riastrad struct amdgpu_iv_entry *entry) 652 1.4 riastrad { 653 1.4 riastrad struct ras_common_if *ras_if = adev->gfx.ras_if; 654 1.4 riastrad struct ras_dispatch_if ih_data = { 655 1.4 riastrad .entry = entry, 656 1.4 riastrad }; 657 1.4 riastrad 658 1.4 riastrad if (!ras_if) 659 1.4 riastrad return 0; 660 1.4 riastrad 661 1.4 riastrad ih_data.head = *ras_if; 662 1.4 riastrad 663 1.4 riastrad DRM_ERROR("CP ECC ERROR IRQ\n"); 664 1.4 riastrad amdgpu_ras_interrupt_dispatch(adev, &ih_data); 665 1.4 riastrad return 0; 666 1.4 riastrad } 667 1.4 riastrad 668 1.4 riastrad uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) 669 1.4 riastrad { 670 1.4 riastrad signed long r, cnt = 0; 671 1.4 riastrad unsigned long flags; 672 1.4 riastrad uint32_t seq; 673 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 674 1.4 riastrad struct amdgpu_ring *ring = &kiq->ring; 675 1.4 riastrad 676 1.4 riastrad BUG_ON(!ring->funcs->emit_rreg); 677 1.4 riastrad 678 1.4 riastrad spin_lock_irqsave(&kiq->ring_lock, flags); 679 1.4 riastrad amdgpu_ring_alloc(ring, 32); 680 1.4 riastrad amdgpu_ring_emit_rreg(ring, reg); 681 1.4 riastrad amdgpu_fence_emit_polling(ring, &seq); 682 1.4 riastrad amdgpu_ring_commit(ring); 683 1.4 riastrad spin_unlock_irqrestore(&kiq->ring_lock, flags); 684 1.4 riastrad 685 1.4 riastrad r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 686 1.4 riastrad 687 1.4 riastrad /* don't wait anymore for gpu reset case because this way may 688 1.4 riastrad * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 689 1.4 riastrad * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 690 1.4 riastrad * never return if we keep waiting in virt_kiq_rreg, which cause 691 1.4 riastrad * gpu_recover() hang there. 692 1.4 riastrad * 693 1.4 riastrad * also don't wait anymore for IRQ context 694 1.4 riastrad * */ 695 1.4 riastrad if (r < 1 && (adev->in_gpu_reset || in_interrupt())) 696 1.4 riastrad goto failed_kiq_read; 697 1.4 riastrad 698 1.4 riastrad might_sleep(); 699 1.4 riastrad while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 700 1.4 riastrad msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 701 1.4 riastrad r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 702 1.4 riastrad } 703 1.4 riastrad 704 1.4 riastrad if (cnt > MAX_KIQ_REG_TRY) 705 1.4 riastrad goto failed_kiq_read; 706 1.4 riastrad 707 1.4 riastrad return adev->wb.wb[kiq->reg_val_offs]; 708 1.4 riastrad 709 1.4 riastrad failed_kiq_read: 710 1.4 riastrad pr_err("failed to read reg:%x\n", reg); 711 1.4 riastrad return ~0; 712 1.4 riastrad } 713 1.4 riastrad 714 1.4 riastrad void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 715 1.4 riastrad { 716 1.4 riastrad signed long r, cnt = 0; 717 1.4 riastrad unsigned long flags; 718 1.4 riastrad uint32_t seq; 719 1.4 riastrad struct amdgpu_kiq *kiq = &adev->gfx.kiq; 720 1.4 riastrad struct amdgpu_ring *ring = &kiq->ring; 721 1.4 riastrad 722 1.4 riastrad BUG_ON(!ring->funcs->emit_wreg); 723 1.4 riastrad 724 1.4 riastrad spin_lock_irqsave(&kiq->ring_lock, flags); 725 1.4 riastrad amdgpu_ring_alloc(ring, 32); 726 1.4 riastrad amdgpu_ring_emit_wreg(ring, reg, v); 727 1.4 riastrad amdgpu_fence_emit_polling(ring, &seq); 728 1.4 riastrad amdgpu_ring_commit(ring); 729 1.4 riastrad spin_unlock_irqrestore(&kiq->ring_lock, flags); 730 1.4 riastrad 731 1.4 riastrad r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 732 1.4 riastrad 733 1.4 riastrad /* don't wait anymore for gpu reset case because this way may 734 1.4 riastrad * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 735 1.4 riastrad * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 736 1.4 riastrad * never return if we keep waiting in virt_kiq_rreg, which cause 737 1.4 riastrad * gpu_recover() hang there. 738 1.4 riastrad * 739 1.4 riastrad * also don't wait anymore for IRQ context 740 1.4 riastrad * */ 741 1.4 riastrad if (r < 1 && (adev->in_gpu_reset || in_interrupt())) 742 1.4 riastrad goto failed_kiq_write; 743 1.4 riastrad 744 1.4 riastrad might_sleep(); 745 1.4 riastrad while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 746 1.4 riastrad 747 1.4 riastrad msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 748 1.4 riastrad r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 749 1.4 riastrad } 750 1.4 riastrad 751 1.4 riastrad if (cnt > MAX_KIQ_REG_TRY) 752 1.4 riastrad goto failed_kiq_write; 753 1.4 riastrad 754 1.4 riastrad return; 755 1.4 riastrad 756 1.4 riastrad failed_kiq_write: 757 1.4 riastrad pr_err("failed to write reg:%x\n", reg); 758 1.1 riastrad } 759