Home | History | Annotate | Line # | Download | only in amdgpu
amdgpu_gart.c revision 1.3.2.2
      1 /*	$NetBSD: amdgpu_gart.c,v 1.3.2.2 2018/09/06 06:56:10 pgoyette Exp $	*/
      2 
      3 /*
      4  * Copyright 2008 Advanced Micro Devices, Inc.
      5  * Copyright 2008 Red Hat Inc.
      6  * Copyright 2009 Jerome Glisse.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included in
     16  * all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     22  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     23  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     24  * OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  * Authors: Dave Airlie
     27  *          Alex Deucher
     28  *          Jerome Glisse
     29  */
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gart.c,v 1.3.2.2 2018/09/06 06:56:10 pgoyette Exp $");
     32 
     33 #include <drm/drmP.h>
     34 #include <drm/amdgpu_drm.h>
     35 #include "amdgpu.h"
     36 
     37 /*
     38  * GART
     39  * The GART (Graphics Aperture Remapping Table) is an aperture
     40  * in the GPU's address space.  System pages can be mapped into
     41  * the aperture and look like contiguous pages from the GPU's
     42  * perspective.  A page table maps the pages in the aperture
     43  * to the actual backing pages in system memory.
     44  *
     45  * Radeon GPUs support both an internal GART, as described above,
     46  * and AGP.  AGP works similarly, but the GART table is configured
     47  * and maintained by the northbridge rather than the driver.
     48  * Radeon hw has a separate AGP aperture that is programmed to
     49  * point to the AGP aperture provided by the northbridge and the
     50  * requests are passed through to the northbridge aperture.
     51  * Both AGP and internal GART can be used at the same time, however
     52  * that is not currently supported by the driver.
     53  *
     54  * This file handles the common internal GART management.
     55  */
     56 
     57 /*
     58  * Common GART table functions.
     59  */
     60 /**
     61  * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
     62  *
     63  * @adev: amdgpu_device pointer
     64  *
     65  * Allocate system memory for GART page table
     66  * (r1xx-r3xx, non-pcie r4xx, rs400).  These asics require the
     67  * gart table to be in system memory.
     68  * Returns 0 for success, -ENOMEM for failure.
     69  */
     70 int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
     71 {
     72 #ifdef __NetBSD__
     73 	int rsegs;
     74 	int error;
     75 
     76 	error = bus_dmamem_alloc(adev->ddev->dmat, adev->gart.table_size,
     77 	    PAGE_SIZE, 0, &adev->gart.ag_table_seg, 1, &rsegs, BUS_DMA_WAITOK);
     78 	if (error)
     79 		goto fail0;
     80 	KASSERT(rsegs == 1);
     81 	error = bus_dmamap_create(adev->ddev->dmat, adev->gart.table_size, 1,
     82 	    adev->gart.table_size, 0, BUS_DMA_WAITOK,
     83 	    &adev->gart.ag_table_map);
     84 	if (error)
     85 		goto fail1;
     86 	error = bus_dmamem_map(adev->ddev->dmat, &adev->gart.ag_table_seg, 1,
     87 	    adev->gart.table_size, &adev->gart.ptr,
     88 	    BUS_DMA_WAITOK|BUS_DMA_NOCACHE);
     89 	if (error)
     90 		goto fail2;
     91 	error = bus_dmamap_load(adev->ddev->dmat, adev->gart.ag_table_map,
     92 	    adev->gart.ptr, adev->gart.table_size, NULL, BUS_DMA_WAITOK);
     93 	if (error)
     94 		goto fail3;
     95 
     96 	/* Success!  */
     97 	adev->gart.table_addr = adev->gart.ag_table_map->dm_segs[0].ds_addr;
     98 	return 0;
     99 
    100 fail4: __unused
    101 	bus_dmamap_unload(adev->ddev->dmat, adev->gart.ag_table_map);
    102 fail3:	bus_dmamem_unmap(adev->ddev->dmat, adev->gart.ptr,
    103 	    adev->gart.table_size);
    104 fail2:	bus_dmamap_destroy(adev->ddev->dmat, adev->gart.ag_table_map);
    105 fail1:	bus_dmamem_free(adev->ddev->dmat, &adev->gart.ag_table_seg, 1);
    106 fail0:	KASSERT(error);
    107 	/* XXX errno NetBSD->Linux */
    108 	return -error;
    109 #else  /* __NetBSD__ */
    110 	void *ptr;
    111 
    112 	ptr = pci_alloc_consistent(adev->pdev, adev->gart.table_size,
    113 				   &adev->gart.table_addr);
    114 	if (ptr == NULL) {
    115 		return -ENOMEM;
    116 	}
    117 #ifdef CONFIG_X86
    118 	if (0) {
    119 		set_memory_uc((unsigned long)ptr,
    120 			      adev->gart.table_size >> PAGE_SHIFT);
    121 	}
    122 #endif
    123 	adev->gart.ptr = ptr;
    124 	memset((void *)adev->gart.ptr, 0, adev->gart.table_size);
    125 	return 0;
    126 #endif	/* __NetBSD__ */
    127 }
    128 
    129 /**
    130  * amdgpu_gart_table_ram_free - free system ram for gart page table
    131  *
    132  * @adev: amdgpu_device pointer
    133  *
    134  * Free system memory for GART page table
    135  * (r1xx-r3xx, non-pcie r4xx, rs400).  These asics require the
    136  * gart table to be in system memory.
    137  */
    138 void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
    139 {
    140 	if (adev->gart.ptr == NULL) {
    141 		return;
    142 	}
    143 #ifdef __NetBSD__
    144 	bus_dmamap_unload(adev->ddev->dmat, adev->gart.ag_table_map);
    145 	bus_dmamem_unmap(adev->ddev->dmat, adev->gart.ptr,
    146 	    adev->gart.table_size);
    147 	bus_dmamap_destroy(adev->ddev->dmat, adev->gart.ag_table_map);
    148 	bus_dmamem_free(adev->ddev->dmat, &adev->gart.ag_table_seg, 1);
    149 #else
    150 #ifdef CONFIG_X86
    151 	if (0) {
    152 		set_memory_wb((unsigned long)adev->gart.ptr,
    153 			      adev->gart.table_size >> PAGE_SHIFT);
    154 	}
    155 #endif
    156 	pci_free_consistent(adev->pdev, adev->gart.table_size,
    157 			    (void *)adev->gart.ptr,
    158 			    adev->gart.table_addr);
    159 	adev->gart.ptr = NULL;
    160 	adev->gart.table_addr = 0;
    161 #endif
    162 }
    163 
    164 /**
    165  * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
    166  *
    167  * @adev: amdgpu_device pointer
    168  *
    169  * Allocate video memory for GART page table
    170  * (pcie r4xx, r5xx+).  These asics require the
    171  * gart table to be in video memory.
    172  * Returns 0 for success, error for failure.
    173  */
    174 int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
    175 {
    176 	int r;
    177 
    178 	if (adev->gart.robj == NULL) {
    179 		r = amdgpu_bo_create(adev, adev->gart.table_size,
    180 				     PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
    181 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
    182 				     NULL, NULL, &adev->gart.robj);
    183 		if (r) {
    184 			return r;
    185 		}
    186 	}
    187 	return 0;
    188 }
    189 
    190 /**
    191  * amdgpu_gart_table_vram_pin - pin gart page table in vram
    192  *
    193  * @adev: amdgpu_device pointer
    194  *
    195  * Pin the GART page table in vram so it will not be moved
    196  * by the memory manager (pcie r4xx, r5xx+).  These asics require the
    197  * gart table to be in video memory.
    198  * Returns 0 for success, error for failure.
    199  */
    200 int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
    201 {
    202 	uint64_t gpu_addr;
    203 	int r;
    204 
    205 	r = amdgpu_bo_reserve(adev->gart.robj, false);
    206 	if (unlikely(r != 0))
    207 		return r;
    208 	r = amdgpu_bo_pin(adev->gart.robj,
    209 				AMDGPU_GEM_DOMAIN_VRAM, &gpu_addr);
    210 	if (r) {
    211 		amdgpu_bo_unreserve(adev->gart.robj);
    212 		return r;
    213 	}
    214 	r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr);
    215 	if (r)
    216 		amdgpu_bo_unpin(adev->gart.robj);
    217 	amdgpu_bo_unreserve(adev->gart.robj);
    218 	adev->gart.table_addr = gpu_addr;
    219 	return r;
    220 }
    221 
    222 /**
    223  * amdgpu_gart_table_vram_unpin - unpin gart page table in vram
    224  *
    225  * @adev: amdgpu_device pointer
    226  *
    227  * Unpin the GART page table in vram (pcie r4xx, r5xx+).
    228  * These asics require the gart table to be in video memory.
    229  */
    230 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
    231 {
    232 	int r;
    233 
    234 	if (adev->gart.robj == NULL) {
    235 		return;
    236 	}
    237 	r = amdgpu_bo_reserve(adev->gart.robj, false);
    238 	if (likely(r == 0)) {
    239 		amdgpu_bo_kunmap(adev->gart.robj);
    240 		amdgpu_bo_unpin(adev->gart.robj);
    241 		amdgpu_bo_unreserve(adev->gart.robj);
    242 		adev->gart.ptr = NULL;
    243 	}
    244 }
    245 
    246 /**
    247  * amdgpu_gart_table_vram_free - free gart page table vram
    248  *
    249  * @adev: amdgpu_device pointer
    250  *
    251  * Free the video memory used for the GART page table
    252  * (pcie r4xx, r5xx+).  These asics require the gart table to
    253  * be in video memory.
    254  */
    255 void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
    256 {
    257 	if (adev->gart.robj == NULL) {
    258 		return;
    259 	}
    260 	amdgpu_bo_unref(&adev->gart.robj);
    261 }
    262 
    263 #ifdef __NetBSD__
    264 static void
    265 amdgpu_gart_pre_update(struct amdgpu_device *adev, unsigned gpu_pgstart,
    266     unsigned gpu_npages)
    267 {
    268 
    269 	if (adev->gart.ag_table_map != NULL) {
    270 		const unsigned entsize =
    271 		    adev->gart.table_size / adev->gart.num_gpu_pages;
    272 
    273 		bus_dmamap_sync(adev->ddev->dmat, adev->gart.ag_table_map,
    274 		    gpu_pgstart*entsize, gpu_npages*entsize,
    275 		    BUS_DMASYNC_POSTWRITE);
    276 	}
    277 }
    278 
    279 static void
    280 amdgpu_gart_post_update(struct amdgpu_device *adev, unsigned gpu_pgstart,
    281     unsigned gpu_npages)
    282 {
    283 
    284 	if (adev->gart.ag_table_map != NULL) {
    285 		const unsigned entsize =
    286 		    adev->gart.table_size / adev->gart.num_gpu_pages;
    287 
    288 		bus_dmamap_sync(adev->ddev->dmat, adev->gart.ag_table_map,
    289 		    gpu_pgstart*entsize, gpu_npages*entsize,
    290 		    BUS_DMASYNC_PREWRITE);
    291 	}
    292 	membar_sync();		/* XXX overkill */
    293 	amdgpu_gart_flush_gpu_tlb(adev, 0);
    294 }
    295 #endif
    296 
    297 /*
    298  * Common gart functions.
    299  */
    300 #ifdef __NetBSD__
    301 void
    302 amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t gpu_start,
    303     unsigned npages)
    304 {
    305 	const unsigned gpu_per_cpu = (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
    306 	const unsigned gpu_npages = (npages * gpu_per_cpu);
    307 	const uint64_t gpu_pgstart = (gpu_start / AMDGPU_GPU_PAGE_SIZE);
    308 	const uint64_t pgstart = (gpu_pgstart / gpu_per_cpu);
    309 	uint64_t pgno, gpu_pgno;
    310 	uint32_t flags = AMDGPU_PTE_SYSTEM;
    311 
    312 	KASSERT(pgstart == (gpu_start / PAGE_SIZE));
    313 	KASSERT(npages <= adev->gart.num_cpu_pages);
    314 	KASSERT(gpu_npages <= adev->gart.num_cpu_pages);
    315 
    316 	if (!adev->gart.ready) {
    317 		WARN(1, "trying to bind memory to uninitialized GART !\n");
    318 		return;
    319 	}
    320 
    321 	amdgpu_gart_pre_update(adev, gpu_pgstart, gpu_npages);
    322 	for (pgno = 0; pgno < npages; pgno++) {
    323 		if (adev->gart.pages[pgstart + pgno] == NULL)
    324 			continue;
    325 		adev->gart.pages[pgstart + pgno] = NULL;
    326 		adev->gart.pages_addr[pgstart + pgno] = adev->dummy_page.addr;
    327 
    328 		if (adev->gart.ptr == NULL)
    329 			continue;
    330 		for (gpu_pgno = 0; gpu_pgno < gpu_per_cpu; gpu_pgno++) {
    331 			amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
    332 			    gpu_pgstart + gpu_per_cpu*pgno + gpu_pgno,
    333 			    adev->dummy_page.addr, flags);
    334 		}
    335 	}
    336 	amdgpu_gart_post_update(adev, gpu_pgstart, gpu_npages);
    337 }
    338 #else  /* __NetBSD__ */
    339 /**
    340  * amdgpu_gart_unbind - unbind pages from the gart page table
    341  *
    342  * @adev: amdgpu_device pointer
    343  * @offset: offset into the GPU's gart aperture
    344  * @pages: number of pages to unbind
    345  *
    346  * Unbinds the requested pages from the gart page table and
    347  * replaces them with the dummy page (all asics).
    348  */
    349 void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
    350 			int pages)
    351 {
    352 	unsigned t;
    353 	unsigned p;
    354 	int i, j;
    355 	u64 page_base;
    356 	uint32_t flags = AMDGPU_PTE_SYSTEM;
    357 
    358 	if (!adev->gart.ready) {
    359 		WARN(1, "trying to unbind memory from uninitialized GART !\n");
    360 		return;
    361 	}
    362 
    363 	t = offset / AMDGPU_GPU_PAGE_SIZE;
    364 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
    365 	for (i = 0; i < pages; i++, p++) {
    366 		if (adev->gart.pages[p]) {
    367 			adev->gart.pages[p] = NULL;
    368 			adev->gart.pages_addr[p] = adev->dummy_page.addr;
    369 			page_base = adev->gart.pages_addr[p];
    370 			if (!adev->gart.ptr)
    371 				continue;
    372 
    373 			for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
    374 				amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
    375 							t, page_base, flags);
    376 				page_base += AMDGPU_GPU_PAGE_SIZE;
    377 			}
    378 		}
    379 	}
    380 	mb();
    381 	amdgpu_gart_flush_gpu_tlb(adev, 0);
    382 }
    383 #endif	/* __NetBSD__ */
    384 
    385 #ifdef __NetBSD__
    386 int
    387 amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t gpu_start,
    388     unsigned npages, struct page **pages, bus_dmamap_t dmamap, uint32_t flags)
    389 {
    390 	const unsigned gpu_per_cpu = (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
    391 	const unsigned gpu_npages = (npages * gpu_per_cpu);
    392 	const uint64_t gpu_pgstart = (gpu_start / AMDGPU_GPU_PAGE_SIZE);
    393 	const uint64_t pgstart = (gpu_pgstart / gpu_per_cpu);
    394 	uint64_t pgno, gpu_pgno;
    395 
    396 	KASSERT(pgstart == (gpu_start / PAGE_SIZE));
    397 	KASSERT(npages == dmamap->dm_nsegs);
    398 	KASSERT(npages <= adev->gart.num_cpu_pages);
    399 	KASSERT(gpu_npages <= adev->gart.num_cpu_pages);
    400 
    401 	if (!adev->gart.ready) {
    402 		WARN(1, "trying to bind memory to uninitialized GART !\n");
    403 		return -EINVAL;
    404 	}
    405 
    406 	amdgpu_gart_pre_update(adev, gpu_pgstart, gpu_npages);
    407 	for (pgno = 0; pgno < npages; pgno++) {
    408 		const bus_addr_t addr = dmamap->dm_segs[pgno].ds_addr;
    409 
    410 		KASSERT(dmamap->dm_segs[pgno].ds_len == PAGE_SIZE);
    411 		adev->gart.pages[pgstart + pgno] = pages[pgno];
    412 		adev->gart.pages_addr[pgstart + pgno] = addr;
    413 
    414 		if (adev->gart.ptr == NULL)
    415 			continue;
    416 
    417 		for (gpu_pgno = 0; gpu_pgno < gpu_per_cpu; gpu_pgno++) {
    418 			amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
    419 			    gpu_pgstart + gpu_per_cpu*pgno + gpu_pgno,
    420 			    addr + gpu_pgno*AMDGPU_GPU_PAGE_SIZE, flags);
    421 		}
    422 	}
    423 	amdgpu_gart_post_update(adev, gpu_pgstart, gpu_npages);
    424 
    425 	return 0;
    426 }
    427 #else  /* __NetBSD__ */
    428 /**
    429  * amdgpu_gart_bind - bind pages into the gart page table
    430  *
    431  * @adev: amdgpu_device pointer
    432  * @offset: offset into the GPU's gart aperture
    433  * @pages: number of pages to bind
    434  * @pagelist: pages to bind
    435  * @dma_addr: DMA addresses of pages
    436  *
    437  * Binds the requested pages to the gart page table
    438  * (all asics).
    439  * Returns 0 for success, -EINVAL for failure.
    440  */
    441 int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
    442 		     int pages, struct page **pagelist, dma_addr_t *dma_addr,
    443 		     uint32_t flags)
    444 {
    445 	unsigned t;
    446 	unsigned p;
    447 	uint64_t page_base;
    448 	int i, j;
    449 
    450 	if (!adev->gart.ready) {
    451 		WARN(1, "trying to bind memory to uninitialized GART !\n");
    452 		return -EINVAL;
    453 	}
    454 
    455 	t = offset / AMDGPU_GPU_PAGE_SIZE;
    456 	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
    457 
    458 	for (i = 0; i < pages; i++, p++) {
    459 		adev->gart.pages_addr[p] = dma_addr[i];
    460 		adev->gart.pages[p] = pagelist[i];
    461 		if (adev->gart.ptr) {
    462 			page_base = adev->gart.pages_addr[p];
    463 			for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
    464 				amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags);
    465 				page_base += AMDGPU_GPU_PAGE_SIZE;
    466 			}
    467 		}
    468 	}
    469 	mb();
    470 	amdgpu_gart_flush_gpu_tlb(adev, 0);
    471 	return 0;
    472 }
    473 #endif
    474 
    475 /**
    476  * amdgpu_gart_init - init the driver info for managing the gart
    477  *
    478  * @adev: amdgpu_device pointer
    479  *
    480  * Allocate the dummy page and init the gart driver info (all asics).
    481  * Returns 0 for success, error for failure.
    482  */
    483 int amdgpu_gart_init(struct amdgpu_device *adev)
    484 {
    485 	int r, i;
    486 
    487 	if (adev->gart.pages) {
    488 		return 0;
    489 	}
    490 	/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
    491 	if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
    492 		DRM_ERROR("Page size is smaller than GPU page size!\n");
    493 		return -EINVAL;
    494 	}
    495 	r = amdgpu_dummy_page_init(adev);
    496 	if (r)
    497 		return r;
    498 	/* Compute table size */
    499 	adev->gart.num_cpu_pages = adev->mc.gtt_size / PAGE_SIZE;
    500 	adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE;
    501 	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
    502 		 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
    503 	/* Allocate pages table */
    504 	adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages);
    505 	if (adev->gart.pages == NULL) {
    506 		amdgpu_gart_fini(adev);
    507 		return -ENOMEM;
    508 	}
    509 	adev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) *
    510 					adev->gart.num_cpu_pages);
    511 	if (adev->gart.pages_addr == NULL) {
    512 		amdgpu_gart_fini(adev);
    513 		return -ENOMEM;
    514 	}
    515 	/* set GART entry to point to the dummy page by default */
    516 	for (i = 0; i < adev->gart.num_cpu_pages; i++) {
    517 		adev->gart.pages_addr[i] = adev->dummy_page.addr;
    518 	}
    519 	return 0;
    520 }
    521 
    522 /**
    523  * amdgpu_gart_fini - tear down the driver info for managing the gart
    524  *
    525  * @adev: amdgpu_device pointer
    526  *
    527  * Tear down the gart driver info and free the dummy page (all asics).
    528  */
    529 void amdgpu_gart_fini(struct amdgpu_device *adev)
    530 {
    531 	if (adev->gart.pages && adev->gart.pages_addr && adev->gart.ready) {
    532 		/* unbind pages */
    533 		amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages);
    534 	}
    535 	adev->gart.ready = false;
    536 	vfree(adev->gart.pages);
    537 	vfree(adev->gart.pages_addr);
    538 	adev->gart.pages = NULL;
    539 	adev->gart.pages_addr = NULL;
    540 
    541 	amdgpu_dummy_page_fini(adev);
    542 }
    543