radeon_gart.c revision 1.13 1 /* $NetBSD: radeon_gart.c,v 1.13 2021/12/18 23:45:43 riastradh Exp $ */
2
3 /*
4 * Copyright 2008 Advanced Micro Devices, Inc.
5 * Copyright 2008 Red Hat Inc.
6 * Copyright 2009 Jerome Glisse.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 *
26 * Authors: Dave Airlie
27 * Alex Deucher
28 * Jerome Glisse
29 */
30
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: radeon_gart.c,v 1.13 2021/12/18 23:45:43 riastradh Exp $");
33
34 #include <linux/pci.h>
35 #include <linux/vmalloc.h>
36
37 #include <drm/radeon_drm.h>
38 #ifdef CONFIG_X86
39 #include <asm/set_memory.h>
40 #endif
41 #include "radeon.h"
42
43 /*
44 * GART
45 * The GART (Graphics Aperture Remapping Table) is an aperture
46 * in the GPU's address space. System pages can be mapped into
47 * the aperture and look like contiguous pages from the GPU's
48 * perspective. A page table maps the pages in the aperture
49 * to the actual backing pages in system memory.
50 *
51 * Radeon GPUs support both an internal GART, as described above,
52 * and AGP. AGP works similarly, but the GART table is configured
53 * and maintained by the northbridge rather than the driver.
54 * Radeon hw has a separate AGP aperture that is programmed to
55 * point to the AGP aperture provided by the northbridge and the
56 * requests are passed through to the northbridge aperture.
57 * Both AGP and internal GART can be used at the same time, however
58 * that is not currently supported by the driver.
59 *
60 * This file handles the common internal GART management.
61 */
62
63 /*
64 * Common GART table functions.
65 */
66 /**
67 * radeon_gart_table_ram_alloc - allocate system ram for gart page table
68 *
69 * @rdev: radeon_device pointer
70 *
71 * Allocate system memory for GART page table
72 * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the
73 * gart table to be in system memory.
74 * Returns 0 for success, -ENOMEM for failure.
75 */
76 int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
77 {
78 #ifdef __NetBSD__
79 int rsegs;
80 int error;
81
82 error = bus_dmamem_alloc(rdev->ddev->dmat, rdev->gart.table_size,
83 PAGE_SIZE, 0, &rdev->gart.rg_table_seg, 1, &rsegs, BUS_DMA_WAITOK);
84 if (error)
85 goto fail0;
86 KASSERT(rsegs == 1);
87 error = bus_dmamap_create(rdev->ddev->dmat, rdev->gart.table_size, 1,
88 rdev->gart.table_size, 0, BUS_DMA_WAITOK,
89 &rdev->gart.rg_table_map);
90 if (error)
91 goto fail1;
92 error = bus_dmamem_map(rdev->ddev->dmat, &rdev->gart.rg_table_seg, 1,
93 rdev->gart.table_size, &rdev->gart.ptr,
94 BUS_DMA_WAITOK|BUS_DMA_NOCACHE);
95 if (error)
96 goto fail2;
97 error = bus_dmamap_load(rdev->ddev->dmat, rdev->gart.rg_table_map,
98 rdev->gart.ptr, rdev->gart.table_size, NULL, BUS_DMA_WAITOK);
99 if (error)
100 goto fail3;
101
102 memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size);
103
104 /* Success! */
105 rdev->gart.table_addr = rdev->gart.rg_table_map->dm_segs[0].ds_addr;
106 return 0;
107
108 fail4: __unused
109 bus_dmamap_unload(rdev->ddev->dmat, rdev->gart.rg_table_map);
110 fail3: bus_dmamem_unmap(rdev->ddev->dmat, rdev->gart.ptr,
111 rdev->gart.table_size);
112 fail2: bus_dmamap_destroy(rdev->ddev->dmat, rdev->gart.rg_table_map);
113 fail1: bus_dmamem_free(rdev->ddev->dmat, &rdev->gart.rg_table_seg, 1);
114 fail0: KASSERT(error);
115 /* XXX errno NetBSD->Linux */
116 return -error;
117 #else
118 void *ptr;
119
120 ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size,
121 &rdev->gart.table_addr);
122 if (ptr == NULL) {
123 return -ENOMEM;
124 }
125 #ifdef CONFIG_X86
126 if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 ||
127 rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
128 set_memory_uc((unsigned long)ptr,
129 rdev->gart.table_size >> PAGE_SHIFT);
130 }
131 #endif
132 rdev->gart.ptr = ptr;
133 memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size);
134 return 0;
135 #endif
136 }
137
138 /**
139 * radeon_gart_table_ram_free - free system ram for gart page table
140 *
141 * @rdev: radeon_device pointer
142 *
143 * Free system memory for GART page table
144 * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the
145 * gart table to be in system memory.
146 */
147 void radeon_gart_table_ram_free(struct radeon_device *rdev)
148 {
149 if (rdev->gart.ptr == NULL) {
150 return;
151 }
152 #ifdef __NetBSD__
153 bus_dmamap_unload(rdev->ddev->dmat, rdev->gart.rg_table_map);
154 bus_dmamem_unmap(rdev->ddev->dmat, rdev->gart.ptr,
155 rdev->gart.table_size);
156 bus_dmamap_destroy(rdev->ddev->dmat, rdev->gart.rg_table_map);
157 bus_dmamem_free(rdev->ddev->dmat, &rdev->gart.rg_table_seg, 1);
158 #else
159 #ifdef CONFIG_X86
160 if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 ||
161 rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
162 set_memory_wb((unsigned long)rdev->gart.ptr,
163 rdev->gart.table_size >> PAGE_SHIFT);
164 }
165 #endif
166 pci_free_consistent(rdev->pdev, rdev->gart.table_size,
167 (void *)rdev->gart.ptr,
168 rdev->gart.table_addr);
169 rdev->gart.ptr = NULL;
170 rdev->gart.table_addr = 0;
171 #endif
172 }
173
174 /**
175 * radeon_gart_table_vram_alloc - allocate vram for gart page table
176 *
177 * @rdev: radeon_device pointer
178 *
179 * Allocate video memory for GART page table
180 * (pcie r4xx, r5xx+). These asics require the
181 * gart table to be in video memory.
182 * Returns 0 for success, error for failure.
183 */
184 int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
185 {
186 int r;
187
188 if (rdev->gart.robj == NULL) {
189 r = radeon_bo_create(rdev, rdev->gart.table_size,
190 PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
191 0, NULL, NULL, &rdev->gart.robj);
192 if (r) {
193 return r;
194 }
195 }
196 return 0;
197 }
198
199 /**
200 * radeon_gart_table_vram_pin - pin gart page table in vram
201 *
202 * @rdev: radeon_device pointer
203 *
204 * Pin the GART page table in vram so it will not be moved
205 * by the memory manager (pcie r4xx, r5xx+). These asics require the
206 * gart table to be in video memory.
207 * Returns 0 for success, error for failure.
208 */
209 int radeon_gart_table_vram_pin(struct radeon_device *rdev)
210 {
211 uint64_t gpu_addr;
212 int r;
213
214 r = radeon_bo_reserve(rdev->gart.robj, false);
215 if (unlikely(r != 0))
216 return r;
217 r = radeon_bo_pin(rdev->gart.robj,
218 RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
219 if (r) {
220 radeon_bo_unreserve(rdev->gart.robj);
221 return r;
222 }
223 r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr);
224 if (r)
225 radeon_bo_unpin(rdev->gart.robj);
226 radeon_bo_unreserve(rdev->gart.robj);
227 rdev->gart.table_addr = gpu_addr;
228
229 if (!r) {
230 int i;
231
232 /* We might have dropped some GART table updates while it wasn't
233 * mapped, restore all entries
234 */
235 for (i = 0; i < rdev->gart.num_gpu_pages; i++)
236 radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
237 mb();
238 radeon_gart_tlb_flush(rdev);
239 }
240
241 return r;
242 }
243
244 /**
245 * radeon_gart_table_vram_unpin - unpin gart page table in vram
246 *
247 * @rdev: radeon_device pointer
248 *
249 * Unpin the GART page table in vram (pcie r4xx, r5xx+).
250 * These asics require the gart table to be in video memory.
251 */
252 void radeon_gart_table_vram_unpin(struct radeon_device *rdev)
253 {
254 int r;
255
256 if (rdev->gart.robj == NULL) {
257 return;
258 }
259 r = radeon_bo_reserve(rdev->gart.robj, false);
260 if (likely(r == 0)) {
261 radeon_bo_kunmap(rdev->gart.robj);
262 radeon_bo_unpin(rdev->gart.robj);
263 radeon_bo_unreserve(rdev->gart.robj);
264 rdev->gart.ptr = NULL;
265 }
266 }
267
268 /**
269 * radeon_gart_table_vram_free - free gart page table vram
270 *
271 * @rdev: radeon_device pointer
272 *
273 * Free the video memory used for the GART page table
274 * (pcie r4xx, r5xx+). These asics require the gart table to
275 * be in video memory.
276 */
277 void radeon_gart_table_vram_free(struct radeon_device *rdev)
278 {
279 if (rdev->gart.robj == NULL) {
280 return;
281 }
282 radeon_bo_unref(&rdev->gart.robj);
283 }
284
285 #ifdef __NetBSD__
286 static void
287 radeon_gart_pre_update(struct radeon_device *rdev, unsigned gpu_pgstart,
288 unsigned gpu_npages)
289 {
290
291 if (rdev->gart.rg_table_map != NULL) {
292 const unsigned entsize =
293 rdev->gart.table_size / rdev->gart.num_gpu_pages;
294
295 bus_dmamap_sync(rdev->ddev->dmat, rdev->gart.rg_table_map,
296 gpu_pgstart*entsize, gpu_npages*entsize,
297 BUS_DMASYNC_POSTWRITE);
298 }
299 }
300
301 static void
302 radeon_gart_post_update(struct radeon_device *rdev, unsigned gpu_pgstart,
303 unsigned gpu_npages)
304 {
305
306 if (rdev->gart.rg_table_map != NULL) {
307 const unsigned entsize =
308 rdev->gart.table_size / rdev->gart.num_gpu_pages;
309
310 bus_dmamap_sync(rdev->ddev->dmat, rdev->gart.rg_table_map,
311 gpu_pgstart*entsize, gpu_npages*entsize,
312 BUS_DMASYNC_PREWRITE);
313 }
314 if (rdev->gart.ptr != NULL) {
315 mb();
316 radeon_gart_tlb_flush(rdev);
317 }
318 }
319 #endif
320
321 /*
322 * Common gart functions.
323 */
324 #ifdef __NetBSD__
325 void
326 radeon_gart_unbind(struct radeon_device *rdev, unsigned gpu_start,
327 unsigned npages)
328 {
329 const unsigned gpu_per_cpu = (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
330 const unsigned gpu_npages = (npages * gpu_per_cpu);
331 const unsigned gpu_pgstart = (gpu_start / RADEON_GPU_PAGE_SIZE);
332 const unsigned pgstart = (gpu_pgstart / gpu_per_cpu);
333 unsigned pgno, gpu_pgno;
334
335 KASSERT(pgstart == (gpu_start / PAGE_SIZE));
336 KASSERT(npages <= rdev->gart.num_cpu_pages);
337 KASSERT(gpu_npages <= rdev->gart.num_cpu_pages);
338
339 if (!rdev->gart.ready) {
340 WARN(1, "trying to bind memory to uninitialized GART !\n");
341 return;
342 }
343
344 radeon_gart_pre_update(rdev, gpu_pgstart, gpu_npages);
345 for (pgno = 0; pgno < npages; pgno++) {
346 if (rdev->gart.pages[pgstart + pgno] == NULL)
347 continue;
348 rdev->gart.pages[pgstart + pgno] = NULL;
349 for (gpu_pgno = 0; gpu_pgno < gpu_per_cpu; gpu_pgno++) {
350 const unsigned t = gpu_pgstart + gpu_per_cpu*pgno +
351 gpu_pgno;
352 rdev->gart.pages_entry[t] = rdev->dummy_page.entry;
353 if (rdev->gart.ptr == NULL)
354 continue;
355 radeon_gart_set_page(rdev, t, rdev->dummy_page.entry);
356 }
357 }
358 radeon_gart_post_update(rdev, gpu_pgstart, gpu_npages);
359 }
360 #else
361 /**
362 * radeon_gart_unbind - unbind pages from the gart page table
363 *
364 * @rdev: radeon_device pointer
365 * @offset: offset into the GPU's gart aperture
366 * @pages: number of pages to unbind
367 *
368 * Unbinds the requested pages from the gart page table and
369 * replaces them with the dummy page (all asics).
370 */
371 void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
372 int pages)
373 {
374 unsigned t;
375 unsigned p;
376 int i, j;
377
378 if (!rdev->gart.ready) {
379 WARN(1, "trying to unbind memory from uninitialized GART !\n");
380 return;
381 }
382 t = offset / RADEON_GPU_PAGE_SIZE;
383 p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
384 for (i = 0; i < pages; i++, p++) {
385 if (rdev->gart.pages[p]) {
386 rdev->gart.pages[p] = NULL;
387 for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
388 rdev->gart.pages_entry[t] = rdev->dummy_page.entry;
389 if (rdev->gart.ptr) {
390 radeon_gart_set_page(rdev, t,
391 rdev->dummy_page.entry);
392 }
393 }
394 }
395 }
396 if (rdev->gart.ptr) {
397 mb();
398 radeon_gart_tlb_flush(rdev);
399 }
400 }
401 #endif
402
403 #ifdef __NetBSD__
404 int
405 radeon_gart_bind(struct radeon_device *rdev, unsigned gpu_start,
406 unsigned npages, struct page **pages, bus_dmamap_t dmamap, uint32_t flags)
407 {
408 const unsigned gpu_per_cpu = (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
409 const unsigned gpu_npages = (npages * gpu_per_cpu);
410 const unsigned gpu_pgstart = (gpu_start / RADEON_GPU_PAGE_SIZE);
411 const unsigned pgstart = (gpu_pgstart / gpu_per_cpu);
412 unsigned pgno, gpu_pgno;
413 uint64_t page_entry;
414
415 KASSERT(pgstart == (gpu_start / PAGE_SIZE));
416 KASSERT(npages == dmamap->dm_nsegs);
417 KASSERT(npages <= rdev->gart.num_cpu_pages);
418 KASSERT(gpu_npages <= rdev->gart.num_cpu_pages);
419
420 if (!rdev->gart.ready) {
421 WARN(1, "trying to bind memory to uninitialized GART !\n");
422 return -EINVAL;
423 }
424
425 radeon_gart_pre_update(rdev, gpu_pgstart, gpu_npages);
426 for (pgno = 0; pgno < npages; pgno++) {
427 const bus_addr_t addr = dmamap->dm_segs[pgno].ds_addr;
428
429 KASSERT(dmamap->dm_segs[pgno].ds_len == PAGE_SIZE);
430 rdev->gart.pages[pgstart + pgno] = pages[pgno];
431 for (gpu_pgno = 0; gpu_pgno < gpu_per_cpu; gpu_pgno++) {
432 const unsigned i = gpu_pgstart + gpu_per_cpu*pgno +
433 gpu_pgno;
434 page_entry = radeon_gart_get_page_entry(
435 addr + gpu_pgno*RADEON_GPU_PAGE_SIZE, flags);
436 rdev->gart.pages_entry[i] = page_entry;
437 if (rdev->gart.ptr == NULL)
438 continue;
439 radeon_gart_set_page(rdev, i, page_entry);
440 }
441 }
442 radeon_gart_post_update(rdev, gpu_pgstart, gpu_npages);
443
444 return 0;
445 }
446 #else
447 /**
448 * radeon_gart_bind - bind pages into the gart page table
449 *
450 * @rdev: radeon_device pointer
451 * @offset: offset into the GPU's gart aperture
452 * @pages: number of pages to bind
453 * @pagelist: pages to bind
454 * @dma_addr: DMA addresses of pages
455 * @flags: RADEON_GART_PAGE_* flags
456 *
457 * Binds the requested pages to the gart page table
458 * (all asics).
459 * Returns 0 for success, -EINVAL for failure.
460 */
461 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
462 int pages, struct page **pagelist, dma_addr_t *dma_addr,
463 uint32_t flags)
464 {
465 unsigned t;
466 unsigned p;
467 uint64_t page_base, page_entry;
468 int i, j;
469
470 if (!rdev->gart.ready) {
471 WARN(1, "trying to bind memory to uninitialized GART !\n");
472 return -EINVAL;
473 }
474 t = offset / RADEON_GPU_PAGE_SIZE;
475 p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
476
477 for (i = 0; i < pages; i++, p++) {
478 rdev->gart.pages[p] = pagelist[i];
479 page_base = dma_addr[i];
480 for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
481 page_entry = radeon_gart_get_page_entry(page_base, flags);
482 rdev->gart.pages_entry[t] = page_entry;
483 if (rdev->gart.ptr) {
484 radeon_gart_set_page(rdev, t, page_entry);
485 }
486 page_base += RADEON_GPU_PAGE_SIZE;
487 }
488 }
489 if (rdev->gart.ptr) {
490 mb();
491 radeon_gart_tlb_flush(rdev);
492 }
493 return 0;
494 }
495 #endif
496
497 /**
498 * radeon_gart_init - init the driver info for managing the gart
499 *
500 * @rdev: radeon_device pointer
501 *
502 * Allocate the dummy page and init the gart driver info (all asics).
503 * Returns 0 for success, error for failure.
504 */
505 int radeon_gart_init(struct radeon_device *rdev)
506 {
507 int r, i;
508
509 if (rdev->gart.pages) {
510 return 0;
511 }
512 /* We need PAGE_SIZE >= RADEON_GPU_PAGE_SIZE */
513 if (PAGE_SIZE < RADEON_GPU_PAGE_SIZE) {
514 DRM_ERROR("Page size is smaller than GPU page size!\n");
515 return -EINVAL;
516 }
517 r = radeon_dummy_page_init(rdev);
518 if (r)
519 return r;
520 /* Compute table size */
521 rdev->gart.num_cpu_pages = rdev->mc.gtt_size / PAGE_SIZE;
522 rdev->gart.num_gpu_pages = rdev->mc.gtt_size / RADEON_GPU_PAGE_SIZE;
523 DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
524 rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages);
525 /* Allocate pages table */
526 rdev->gart.pages = vzalloc(array_size(sizeof(void *),
527 rdev->gart.num_cpu_pages));
528 if (rdev->gart.pages == NULL) {
529 radeon_gart_fini(rdev);
530 return -ENOMEM;
531 }
532 rdev->gart.pages_entry = vmalloc(array_size(sizeof(uint64_t),
533 rdev->gart.num_gpu_pages));
534 if (rdev->gart.pages_entry == NULL) {
535 radeon_gart_fini(rdev);
536 return -ENOMEM;
537 }
538 /* set GART entry to point to the dummy page by default */
539 for (i = 0; i < rdev->gart.num_gpu_pages; i++)
540 rdev->gart.pages_entry[i] = rdev->dummy_page.entry;
541 return 0;
542 }
543
544 /**
545 * radeon_gart_fini - tear down the driver info for managing the gart
546 *
547 * @rdev: radeon_device pointer
548 *
549 * Tear down the gart driver info and free the dummy page (all asics).
550 */
551 void radeon_gart_fini(struct radeon_device *rdev)
552 {
553 if (rdev->gart.ready) {
554 /* unbind pages */
555 radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages);
556 }
557 rdev->gart.ready = false;
558 vfree(rdev->gart.pages);
559 vfree(rdev->gart.pages_entry);
560 rdev->gart.pages = NULL;
561 rdev->gart.pages_entry = NULL;
562
563 radeon_dummy_page_fini(rdev);
564 }
565