amdgpu_gmc.c revision 1.1.1.1 1 /* $NetBSD: amdgpu_gmc.c,v 1.1.1.1 2021/12/18 20:11:09 riastradh Exp $ */
2
3 /*
4 * Copyright 2018 Advanced Micro Devices, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc.c,v 1.1.1.1 2021/12/18 20:11:09 riastradh Exp $");
31
32 #include <linux/io-64-nonatomic-lo-hi.h>
33
34 #include "amdgpu.h"
35 #include "amdgpu_ras.h"
36 #include "amdgpu_xgmi.h"
37
38 /**
39 * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
40 *
41 * @bo: the BO to get the PDE for
42 * @level: the level in the PD hirarchy
43 * @addr: resulting addr
44 * @flags: resulting flags
45 *
46 * Get the address and flags to be used for a PDE (Page Directory Entry).
47 */
48 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
49 uint64_t *addr, uint64_t *flags)
50 {
51 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
52 struct ttm_dma_tt *ttm;
53
54 switch (bo->tbo.mem.mem_type) {
55 case TTM_PL_TT:
56 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
57 *addr = ttm->dma_address[0];
58 break;
59 case TTM_PL_VRAM:
60 *addr = amdgpu_bo_gpu_offset(bo);
61 break;
62 default:
63 *addr = 0;
64 break;
65 }
66 *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
67 amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
68 }
69
70 /**
71 * amdgpu_gmc_pd_addr - return the address of the root directory
72 *
73 */
74 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
75 {
76 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
77 uint64_t pd_addr;
78
79 /* TODO: move that into ASIC specific code */
80 if (adev->asic_type >= CHIP_VEGA10) {
81 uint64_t flags = AMDGPU_PTE_VALID;
82
83 amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
84 pd_addr |= flags;
85 } else {
86 pd_addr = amdgpu_bo_gpu_offset(bo);
87 }
88 return pd_addr;
89 }
90
91 /**
92 * amdgpu_gmc_set_pte_pde - update the page tables using CPU
93 *
94 * @adev: amdgpu_device pointer
95 * @cpu_pt_addr: cpu address of the page table
96 * @gpu_page_idx: entry in the page table to update
97 * @addr: dst addr to write into pte/pde
98 * @flags: access flags
99 *
100 * Update the page tables using CPU.
101 */
102 int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
103 uint32_t gpu_page_idx, uint64_t addr,
104 uint64_t flags)
105 {
106 void __iomem *ptr = (void *)cpu_pt_addr;
107 uint64_t value;
108
109 /*
110 * The following is for PTE only. GART does not have PDEs.
111 */
112 value = addr & 0x0000FFFFFFFFF000ULL;
113 value |= flags;
114 writeq(value, ptr + (gpu_page_idx * 8));
115 return 0;
116 }
117
118 /**
119 * amdgpu_gmc_agp_addr - return the address in the AGP address space
120 *
121 * @tbo: TTM BO which needs the address, must be in GTT domain
122 *
123 * Tries to figure out how to access the BO through the AGP aperture. Returns
124 * AMDGPU_BO_INVALID_OFFSET if that is not possible.
125 */
126 uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
127 {
128 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
129 struct ttm_dma_tt *ttm;
130
131 if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
132 return AMDGPU_BO_INVALID_OFFSET;
133
134 ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
135 if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
136 return AMDGPU_BO_INVALID_OFFSET;
137
138 return adev->gmc.agp_start + ttm->dma_address[0];
139 }
140
141 /**
142 * amdgpu_gmc_vram_location - try to find VRAM location
143 *
144 * @adev: amdgpu device structure holding all necessary informations
145 * @mc: memory controller structure holding memory informations
146 * @base: base address at which to put VRAM
147 *
148 * Function will try to place VRAM at base address provided
149 * as parameter.
150 */
151 void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
152 u64 base)
153 {
154 uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
155
156 mc->vram_start = base;
157 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
158 if (limit && limit < mc->real_vram_size)
159 mc->real_vram_size = limit;
160
161 if (mc->xgmi.num_physical_nodes == 0) {
162 mc->fb_start = mc->vram_start;
163 mc->fb_end = mc->vram_end;
164 }
165 dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
166 mc->mc_vram_size >> 20, mc->vram_start,
167 mc->vram_end, mc->real_vram_size >> 20);
168 }
169
170 /**
171 * amdgpu_gmc_gart_location - try to find GART location
172 *
173 * @adev: amdgpu device structure holding all necessary informations
174 * @mc: memory controller structure holding memory informations
175 *
176 * Function will place try to place GART before or after VRAM.
177 *
178 * If GART size is bigger than space left then we ajust GART size.
179 * Thus function will never fails.
180 */
181 void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
182 {
183 const uint64_t four_gb = 0x100000000ULL;
184 u64 size_af, size_bf;
185 /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
186 u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
187
188 mc->gart_size += adev->pm.smu_prv_buffer_size;
189
190 /* VCE doesn't like it when BOs cross a 4GB segment, so align
191 * the GART base on a 4GB boundary as well.
192 */
193 size_bf = mc->fb_start;
194 size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
195
196 if (mc->gart_size > max(size_bf, size_af)) {
197 dev_warn(adev->dev, "limiting GART\n");
198 mc->gart_size = max(size_bf, size_af);
199 }
200
201 if ((size_bf >= mc->gart_size && size_bf < size_af) ||
202 (size_af < mc->gart_size))
203 mc->gart_start = 0;
204 else
205 mc->gart_start = max_mc_address - mc->gart_size + 1;
206
207 mc->gart_start &= ~(four_gb - 1);
208 mc->gart_end = mc->gart_start + mc->gart_size - 1;
209 dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
210 mc->gart_size >> 20, mc->gart_start, mc->gart_end);
211 }
212
213 /**
214 * amdgpu_gmc_agp_location - try to find AGP location
215 * @adev: amdgpu device structure holding all necessary informations
216 * @mc: memory controller structure holding memory informations
217 *
218 * Function will place try to find a place for the AGP BAR in the MC address
219 * space.
220 *
221 * AGP BAR will be assigned the largest available hole in the address space.
222 * Should be called after VRAM and GART locations are setup.
223 */
224 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
225 {
226 const uint64_t sixteen_gb = 1ULL << 34;
227 const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
228 u64 size_af, size_bf;
229
230 if (amdgpu_sriov_vf(adev)) {
231 mc->agp_start = 0xffffffffffff;
232 mc->agp_end = 0x0;
233 mc->agp_size = 0;
234
235 return;
236 }
237
238 if (mc->fb_start > mc->gart_start) {
239 size_bf = (mc->fb_start & sixteen_gb_mask) -
240 ALIGN(mc->gart_end + 1, sixteen_gb);
241 size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
242 } else {
243 size_bf = mc->fb_start & sixteen_gb_mask;
244 size_af = (mc->gart_start & sixteen_gb_mask) -
245 ALIGN(mc->fb_end + 1, sixteen_gb);
246 }
247
248 if (size_bf > size_af) {
249 mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
250 mc->agp_size = size_bf;
251 } else {
252 mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
253 mc->agp_size = size_af;
254 }
255
256 mc->agp_end = mc->agp_start + mc->agp_size - 1;
257 dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
258 mc->agp_size >> 20, mc->agp_start, mc->agp_end);
259 }
260
261 /**
262 * amdgpu_gmc_filter_faults - filter VM faults
263 *
264 * @adev: amdgpu device structure
265 * @addr: address of the VM fault
266 * @pasid: PASID of the process causing the fault
267 * @timestamp: timestamp of the fault
268 *
269 * Returns:
270 * True if the fault was filtered and should not be processed further.
271 * False if the fault is a new one and needs to be handled.
272 */
273 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
274 uint16_t pasid, uint64_t timestamp)
275 {
276 struct amdgpu_gmc *gmc = &adev->gmc;
277
278 uint64_t stamp, key = addr << 4 | pasid;
279 struct amdgpu_gmc_fault *fault;
280 uint32_t hash;
281
282 /* If we don't have space left in the ring buffer return immediately */
283 stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
284 AMDGPU_GMC_FAULT_TIMEOUT;
285 if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
286 return true;
287
288 /* Try to find the fault in the hash */
289 hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
290 fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
291 while (fault->timestamp >= stamp) {
292 uint64_t tmp;
293
294 if (fault->key == key)
295 return true;
296
297 tmp = fault->timestamp;
298 fault = &gmc->fault_ring[fault->next];
299
300 /* Check if the entry was reused */
301 if (fault->timestamp >= tmp)
302 break;
303 }
304
305 /* Add the fault to the ring */
306 fault = &gmc->fault_ring[gmc->last_fault];
307 fault->key = key;
308 fault->timestamp = timestamp;
309
310 /* And update the hash */
311 fault->next = gmc->fault_hash[hash].idx;
312 gmc->fault_hash[hash].idx = gmc->last_fault++;
313 return false;
314 }
315
316 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
317 {
318 int r;
319
320 if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
321 r = adev->umc.funcs->ras_late_init(adev);
322 if (r)
323 return r;
324 }
325
326 if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) {
327 r = adev->mmhub.funcs->ras_late_init(adev);
328 if (r)
329 return r;
330 }
331
332 return amdgpu_xgmi_ras_late_init(adev);
333 }
334
335 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
336 {
337 amdgpu_umc_ras_fini(adev);
338 amdgpu_mmhub_ras_fini(adev);
339 amdgpu_xgmi_ras_fini(adev);
340 }
341
342 /*
343 * The latest engine allocation on gfx9/10 is:
344 * Engine 2, 3: firmware
345 * Engine 0, 1, 4~16: amdgpu ring,
346 * subject to change when ring number changes
347 * Engine 17: Gart flushes
348 */
349 #define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
350 #define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3
351
352 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
353 {
354 struct amdgpu_ring *ring;
355 unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
356 {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP,
357 GFXHUB_FREE_VM_INV_ENGS_BITMAP};
358 unsigned i;
359 unsigned vmhub, inv_eng;
360
361 for (i = 0; i < adev->num_rings; ++i) {
362 ring = adev->rings[i];
363 vmhub = ring->funcs->vmhub;
364
365 inv_eng = ffs(vm_inv_engs[vmhub]);
366 if (!inv_eng) {
367 dev_err(adev->dev, "no VM inv eng for ring %s\n",
368 ring->name);
369 return -EINVAL;
370 }
371
372 ring->vm_inv_eng = inv_eng - 1;
373 vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
374
375 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
376 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
377 }
378
379 return 0;
380 }
381