17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2014-2017 Broadcom 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg/* @file 257ec681f3Smrg * 267ec681f3Smrg * v3d driver code interacting v3dv3 simulator/fpga library. 277ec681f3Smrg * 287ec681f3Smrg * This is compiled per V3D version we support, since the register definitions 297ec681f3Smrg * conflict. 307ec681f3Smrg */ 317ec681f3Smrg 327ec681f3Smrg#include <errno.h> 337ec681f3Smrg#include <stdbool.h> 347ec681f3Smrg#include <stdio.h> 357ec681f3Smrg#include <string.h> 367ec681f3Smrg#include <sys/mman.h> 377ec681f3Smrg#include "util/macros.h" 387ec681f3Smrg#include "util/u_mm.h" 397ec681f3Smrg#include "broadcom/common/v3d_macros.h" 407ec681f3Smrg#include "v3d_simulator_wrapper.h" 417ec681f3Smrg#include "drm-shim/drm_shim.h" 427ec681f3Smrg#include "drm-uapi/v3d_drm.h" 437ec681f3Smrg#include "v3d.h" 447ec681f3Smrg 457ec681f3Smrg#define HW_REGISTER_RO(x) (x) 467ec681f3Smrg#define HW_REGISTER_RW(x) (x) 477ec681f3Smrg#if V3D_VERSION >= 41 487ec681f3Smrg#include "libs/core/v3d/registers/4.1.34.0/v3d.h" 497ec681f3Smrg#else 507ec681f3Smrg#include "libs/core/v3d/registers/3.3.0.0/v3d.h" 517ec681f3Smrg#endif 527ec681f3Smrg 537ec681f3Smrg#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d.hw, reg, val) 547ec681f3Smrg#define V3D_READ(reg) v3d_hw_read_reg(v3d.hw, reg) 557ec681f3Smrg 567ec681f3Smrgstatic void 577ec681f3Smrgv3d_flush_l3() 587ec681f3Smrg{ 597ec681f3Smrg if (!v3d_hw_has_gca(v3d.hw)) 607ec681f3Smrg return; 617ec681f3Smrg 627ec681f3Smrg#if V3D_VERSION < 40 637ec681f3Smrg uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); 647ec681f3Smrg 657ec681f3Smrg V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET); 667ec681f3Smrg V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET); 677ec681f3Smrg#endif 687ec681f3Smrg} 697ec681f3Smrg 707ec681f3Smrg/* Invalidates the L2 cache. This is a read-only cache. */ 717ec681f3Smrgstatic void 727ec681f3Smrgv3d_flush_l2(void) 737ec681f3Smrg{ 747ec681f3Smrg V3D_WRITE(V3D_CTL_0_L2CACTL, 757ec681f3Smrg V3D_CTL_0_L2CACTL_L2CCLR_SET | 767ec681f3Smrg V3D_CTL_0_L2CACTL_L2CENA_SET); 777ec681f3Smrg} 787ec681f3Smrg 797ec681f3Smrg/* Invalidates texture L2 cachelines */ 807ec681f3Smrgstatic void 817ec681f3Smrgv3d_flush_l2t(void) 827ec681f3Smrg{ 837ec681f3Smrg V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); 847ec681f3Smrg V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); 857ec681f3Smrg V3D_WRITE(V3D_CTL_0_L2TCACTL, 867ec681f3Smrg V3D_CTL_0_L2TCACTL_L2TFLS_SET | 877ec681f3Smrg (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); 887ec681f3Smrg} 897ec681f3Smrg 907ec681f3Smrg/* Invalidates the slice caches. These are read-only caches. */ 917ec681f3Smrgstatic void 927ec681f3Smrgv3d_flush_slices(void) 937ec681f3Smrg{ 947ec681f3Smrg V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); 957ec681f3Smrg} 967ec681f3Smrg 977ec681f3Smrgstatic void 987ec681f3Smrgv3d_flush_caches(void) 997ec681f3Smrg{ 1007ec681f3Smrg v3d_flush_l3(); 1017ec681f3Smrg v3d_flush_l2(); 1027ec681f3Smrg v3d_flush_l2t(); 1037ec681f3Smrg v3d_flush_slices(); 1047ec681f3Smrg} 1057ec681f3Smrg 1067ec681f3Smrgstatic void 1077ec681f3Smrgv3d_simulator_copy_in_handle(struct shim_fd *shim_fd, int handle) 1087ec681f3Smrg{ 1097ec681f3Smrg if (!handle) 1107ec681f3Smrg return; 1117ec681f3Smrg 1127ec681f3Smrg struct v3d_bo *bo = v3d_bo_lookup(shim_fd, handle); 1137ec681f3Smrg 1147ec681f3Smrg memcpy(bo->sim_vaddr, bo->gem_vaddr, bo->base.size); 1157ec681f3Smrg} 1167ec681f3Smrg 1177ec681f3Smrgstatic void 1187ec681f3Smrgv3d_simulator_copy_out_handle(struct shim_fd *shim_fd, int handle) 1197ec681f3Smrg{ 1207ec681f3Smrg if (!handle) 1217ec681f3Smrg return; 1227ec681f3Smrg 1237ec681f3Smrg struct v3d_bo *bo = v3d_bo_lookup(shim_fd, handle); 1247ec681f3Smrg 1257ec681f3Smrg memcpy(bo->gem_vaddr, bo->sim_vaddr, bo->base.size); 1267ec681f3Smrg} 1277ec681f3Smrg 1287ec681f3Smrgstatic int 1297ec681f3Smrgv3dX(v3d_ioctl_submit_cl)(int fd, unsigned long request, void *arg) 1307ec681f3Smrg{ 1317ec681f3Smrg struct shim_fd *shim_fd = drm_shim_fd_lookup(fd); 1327ec681f3Smrg struct drm_v3d_submit_cl *submit = arg; 1337ec681f3Smrg uint32_t *bo_handles = (uint32_t *)(uintptr_t)submit->bo_handles; 1347ec681f3Smrg 1357ec681f3Smrg for (int i = 0; i < submit->bo_handle_count; i++) 1367ec681f3Smrg v3d_simulator_copy_in_handle(shim_fd, bo_handles[i]); 1377ec681f3Smrg 1387ec681f3Smrg v3d_flush_caches(); 1397ec681f3Smrg 1407ec681f3Smrg if (submit->qma) { 1417ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); 1427ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms); 1437ec681f3Smrg } 1447ec681f3Smrg#if V3D_VERSION >= 41 1457ec681f3Smrg if (submit->qts) { 1467ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT0QTS, 1477ec681f3Smrg V3D_CLE_0_CT0QTS_CTQTSEN_SET | 1487ec681f3Smrg submit->qts); 1497ec681f3Smrg } 1507ec681f3Smrg#endif 1517ec681f3Smrg 1527ec681f3Smrg fprintf(stderr, "submit %x..%x!\n", submit->bcl_start, submit->bcl_end); 1537ec681f3Smrg 1547ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); 1557ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); 1567ec681f3Smrg 1577ec681f3Smrg /* Wait for bin to complete before firing render, as it seems the 1587ec681f3Smrg * simulator doesn't implement the semaphores. 1597ec681f3Smrg */ 1607ec681f3Smrg while (V3D_READ(V3D_CLE_0_CT0CA) != 1617ec681f3Smrg V3D_READ(V3D_CLE_0_CT0EA)) { 1627ec681f3Smrg v3d_hw_tick(v3d.hw); 1637ec681f3Smrg } 1647ec681f3Smrg 1657ec681f3Smrg fprintf(stderr, "submit %x..%x!\n", submit->rcl_start, submit->rcl_end); 1667ec681f3Smrg 1677ec681f3Smrg v3d_flush_caches(); 1687ec681f3Smrg 1697ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); 1707ec681f3Smrg V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); 1717ec681f3Smrg 1727ec681f3Smrg while (V3D_READ(V3D_CLE_0_CT1CA) != 1737ec681f3Smrg V3D_READ(V3D_CLE_0_CT1EA)) { 1747ec681f3Smrg v3d_hw_tick(v3d.hw); 1757ec681f3Smrg } 1767ec681f3Smrg 1777ec681f3Smrg for (int i = 0; i < submit->bo_handle_count; i++) 1787ec681f3Smrg v3d_simulator_copy_out_handle(shim_fd, bo_handles[i]); 1797ec681f3Smrg 1807ec681f3Smrg return 0; 1817ec681f3Smrg} 1827ec681f3Smrg 1837ec681f3Smrgstatic int 1847ec681f3Smrgv3dX(v3d_ioctl_submit_tfu)(int fd, unsigned long request, void *arg) 1857ec681f3Smrg{ 1867ec681f3Smrg struct shim_fd *shim_fd = drm_shim_fd_lookup(fd); 1877ec681f3Smrg struct drm_v3d_submit_tfu *submit = arg; 1887ec681f3Smrg 1897ec681f3Smrg v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[0]); 1907ec681f3Smrg v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[1]); 1917ec681f3Smrg v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[2]); 1927ec681f3Smrg v3d_simulator_copy_in_handle(shim_fd, submit->bo_handles[3]); 1937ec681f3Smrg 1947ec681f3Smrg int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET; 1957ec681f3Smrg 1967ec681f3Smrg V3D_WRITE(V3D_TFU_IIA, submit->iia); 1977ec681f3Smrg V3D_WRITE(V3D_TFU_IIS, submit->iis); 1987ec681f3Smrg V3D_WRITE(V3D_TFU_ICA, submit->ica); 1997ec681f3Smrg V3D_WRITE(V3D_TFU_IUA, submit->iua); 2007ec681f3Smrg V3D_WRITE(V3D_TFU_IOA, submit->ioa); 2017ec681f3Smrg V3D_WRITE(V3D_TFU_IOS, submit->ios); 2027ec681f3Smrg V3D_WRITE(V3D_TFU_COEF0, submit->coef[0]); 2037ec681f3Smrg V3D_WRITE(V3D_TFU_COEF1, submit->coef[1]); 2047ec681f3Smrg V3D_WRITE(V3D_TFU_COEF2, submit->coef[2]); 2057ec681f3Smrg V3D_WRITE(V3D_TFU_COEF3, submit->coef[3]); 2067ec681f3Smrg 2077ec681f3Smrg V3D_WRITE(V3D_TFU_ICFG, submit->icfg); 2087ec681f3Smrg 2097ec681f3Smrg while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) { 2107ec681f3Smrg v3d_hw_tick(v3d.hw); 2117ec681f3Smrg } 2127ec681f3Smrg 2137ec681f3Smrg v3d_simulator_copy_out_handle(shim_fd, submit->bo_handles[0]); 2147ec681f3Smrg 2157ec681f3Smrg return 0; 2167ec681f3Smrg} 2177ec681f3Smrg 2187ec681f3Smrgstatic int 2197ec681f3Smrgv3dX(v3d_ioctl_create_bo)(int fd, unsigned long request, void *arg) 2207ec681f3Smrg{ 2217ec681f3Smrg struct shim_fd *shim_fd = drm_shim_fd_lookup(fd); 2227ec681f3Smrg struct drm_v3d_create_bo *create = arg; 2237ec681f3Smrg struct v3d_bo *bo = calloc(1, sizeof(*bo)); 2247ec681f3Smrg 2257ec681f3Smrg drm_shim_bo_init(&bo->base, create->size); 2267ec681f3Smrg bo->offset = util_vma_heap_alloc(&v3d.heap, create->size, 4096); 2277ec681f3Smrg if (bo->offset == 0) 2287ec681f3Smrg return -ENOMEM; 2297ec681f3Smrg 2307ec681f3Smrg bo->sim_vaddr = v3d.mem + bo->offset - v3d.mem_base; 2317ec681f3Smrg#if 0 2327ec681f3Smrg /* Place a mapping of the BO inside of the simulator's address space 2337ec681f3Smrg * for V3D memory. This lets us avoid copy in/out for simpenrose, but 2347ec681f3Smrg * I'm betting we'll need something else for FPGA. 2357ec681f3Smrg */ 2367ec681f3Smrg void *sim_addr = v3d.mem + bo->block->ofs; 2377ec681f3Smrg void *mmap_ret = mmap(sim_addr, create->size, PROT_READ | PROT_WRITE, 2387ec681f3Smrg MAP_SHARED | MAP_FIXED, bo->base.fd, 0); 2397ec681f3Smrg assert(mmap_ret == sim_addr); 2407ec681f3Smrg#else 2417ec681f3Smrg /* Make a simulator-private mapping of the shim GEM object. */ 2427ec681f3Smrg bo->gem_vaddr = mmap(NULL, bo->base.size, 2437ec681f3Smrg PROT_READ | PROT_WRITE, 2447ec681f3Smrg MAP_SHARED, 2457ec681f3Smrg bo->base.fd, 0); 2467ec681f3Smrg if (bo->gem_vaddr == MAP_FAILED) { 2477ec681f3Smrg fprintf(stderr, "v3d: mmap of shim bo failed\n"); 2487ec681f3Smrg abort(); 2497ec681f3Smrg } 2507ec681f3Smrg#endif 2517ec681f3Smrg 2527ec681f3Smrg create->offset = bo->offset; 2537ec681f3Smrg create->handle = drm_shim_bo_get_handle(shim_fd, &bo->base); 2547ec681f3Smrg 2557ec681f3Smrg drm_shim_bo_put(&bo->base); 2567ec681f3Smrg 2577ec681f3Smrg return 0; 2587ec681f3Smrg} 2597ec681f3Smrg 2607ec681f3Smrgstatic int 2617ec681f3Smrgv3dX(v3d_ioctl_get_param)(int fd, unsigned long request, void *arg) 2627ec681f3Smrg{ 2637ec681f3Smrg struct drm_v3d_get_param *gp = arg; 2647ec681f3Smrg static const uint32_t reg_map[] = { 2657ec681f3Smrg [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, 2667ec681f3Smrg [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, 2677ec681f3Smrg [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, 2687ec681f3Smrg [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, 2697ec681f3Smrg [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0, 2707ec681f3Smrg [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1, 2717ec681f3Smrg [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, 2727ec681f3Smrg }; 2737ec681f3Smrg 2747ec681f3Smrg switch (gp->param) { 2757ec681f3Smrg case DRM_V3D_PARAM_SUPPORTS_TFU: 2767ec681f3Smrg gp->value = 1; 2777ec681f3Smrg return 0; 2787ec681f3Smrg } 2797ec681f3Smrg 2807ec681f3Smrg if (gp->param < ARRAY_SIZE(reg_map) && reg_map[gp->param]) { 2817ec681f3Smrg gp->value = V3D_READ(reg_map[gp->param]); 2827ec681f3Smrg return 0; 2837ec681f3Smrg } 2847ec681f3Smrg 2857ec681f3Smrg fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM %d\n", gp->param); 2867ec681f3Smrg return -1; 2877ec681f3Smrg} 2887ec681f3Smrg 2897ec681f3Smrgstatic ioctl_fn_t driver_ioctls[] = { 2907ec681f3Smrg [DRM_V3D_SUBMIT_CL] = v3dX(v3d_ioctl_submit_cl), 2917ec681f3Smrg [DRM_V3D_SUBMIT_TFU] = v3dX(v3d_ioctl_submit_tfu), 2927ec681f3Smrg [DRM_V3D_WAIT_BO] = v3d_ioctl_wait_bo, 2937ec681f3Smrg [DRM_V3D_CREATE_BO] = v3dX(v3d_ioctl_create_bo), 2947ec681f3Smrg [DRM_V3D_GET_PARAM] = v3dX(v3d_ioctl_get_param), 2957ec681f3Smrg [DRM_V3D_MMAP_BO] = v3d_ioctl_mmap_bo, 2967ec681f3Smrg [DRM_V3D_GET_BO_OFFSET] = v3d_ioctl_get_bo_offset, 2977ec681f3Smrg}; 2987ec681f3Smrg 2997ec681f3Smrgstatic void 3007ec681f3Smrgv3d_isr(uint32_t hub_status) 3017ec681f3Smrg{ 3027ec681f3Smrg /* Check the per-core bits */ 3037ec681f3Smrg if (hub_status & (1 << 0)) { 3047ec681f3Smrg uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS); 3057ec681f3Smrg 3067ec681f3Smrg if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) { 3077ec681f3Smrg fprintf(stderr, "GMP violation at 0x%08x\n", 3087ec681f3Smrg V3D_READ(V3D_GMP_0_VIO_ADDR)); 3097ec681f3Smrg abort(); 3107ec681f3Smrg } else { 3117ec681f3Smrg fprintf(stderr, 3127ec681f3Smrg "Unexpected ISR with core status 0x%08x\n", 3137ec681f3Smrg core_status); 3147ec681f3Smrg } 3157ec681f3Smrg abort(); 3167ec681f3Smrg } 3177ec681f3Smrg 3187ec681f3Smrg return; 3197ec681f3Smrg} 3207ec681f3Smrg 3217ec681f3Smrgstatic void 3227ec681f3Smrgv3dX(simulator_init_regs)(void) 3237ec681f3Smrg{ 3247ec681f3Smrg#if V3D_VERSION == 33 3257ec681f3Smrg /* Set OVRTMUOUT to match kernel behavior. 3267ec681f3Smrg * 3277ec681f3Smrg * This means that the texture sampler uniform configuration's tmu 3287ec681f3Smrg * output type field is used, instead of using the hardware default 3297ec681f3Smrg * behavior based on the texture type. If you want the default 3307ec681f3Smrg * behavior, you can still put "2" in the indirect texture state's 3317ec681f3Smrg * output_type field. 3327ec681f3Smrg */ 3337ec681f3Smrg V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); 3347ec681f3Smrg#endif 3357ec681f3Smrg 3367ec681f3Smrg uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_GMPV_SET; 3377ec681f3Smrg V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts); 3387ec681f3Smrg V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts); 3397ec681f3Smrg 3407ec681f3Smrg v3d_hw_set_isr(v3d.hw, v3d_isr); 3417ec681f3Smrg} 3427ec681f3Smrg 3437ec681f3Smrgstatic void 3447ec681f3Smrgv3d_bo_free(struct shim_bo *shim_bo) 3457ec681f3Smrg{ 3467ec681f3Smrg struct v3d_bo *bo = v3d_bo(shim_bo); 3477ec681f3Smrg 3487ec681f3Smrg if (bo->gem_vaddr) 3497ec681f3Smrg munmap(bo->gem_vaddr, shim_bo->size); 3507ec681f3Smrg 3517ec681f3Smrg util_vma_heap_free(&v3d.heap, bo->offset, bo->base.size); 3527ec681f3Smrg} 3537ec681f3Smrg 3547ec681f3Smrgvoid 3557ec681f3Smrgv3dX(drm_shim_driver_init)(void) 3567ec681f3Smrg{ 3577ec681f3Smrg shim_device.driver_ioctls = driver_ioctls; 3587ec681f3Smrg shim_device.driver_ioctl_count = ARRAY_SIZE(driver_ioctls); 3597ec681f3Smrg 3607ec681f3Smrg shim_device.driver_bo_free = v3d_bo_free; 3617ec681f3Smrg 3627ec681f3Smrg /* Allocate a gig of memory to play in. */ 3637ec681f3Smrg v3d_hw_alloc_mem(v3d.hw, 1024 * 1024 * 1024); 3647ec681f3Smrg v3d.mem_base = 3657ec681f3Smrg v3d_hw_get_mem(v3d.hw, &v3d.mem_size, 3667ec681f3Smrg &v3d.mem); 3677ec681f3Smrg util_vma_heap_init(&v3d.heap, 4096, v3d.mem_size - 4096); 3687ec681f3Smrg 3697ec681f3Smrg v3dX(simulator_init_regs)(); 3707ec681f3Smrg} 371