1/*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * @file v3d_simulator_hw.c
26 *
27 * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator.
28 *
29 * The register headers between V3D versions will have conflicting defines, so
30 * all register interactions appear in this file and are compiled per V3D version
31 * we support.
32 */
33
34#ifdef USE_V3D_SIMULATOR
35
36#include "v3d_screen.h"
37#include "v3d_context.h"
38#include "v3d_simulator_wrapper.h"
39
40#define HW_REGISTER_RO(x) (x)
41#define HW_REGISTER_RW(x) (x)
42#if V3D_VERSION >= 41
43#include "libs/core/v3d/registers/4.1.34.0/v3d.h"
44#else
45#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
46#endif
47
48#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
49#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
50
51static void
52v3d_invalidate_l3(struct v3d_hw *v3d)
53{
54        if (!v3d_hw_has_gca(v3d))
55                return;
56
57#if V3D_VERSION < 40
58        uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
59
60        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
61        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
62#endif
63}
64
65/* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
66static void
67v3d_invalidate_l2c(struct v3d_hw *v3d)
68{
69        if (V3D_VERSION >= 33)
70                return;
71
72        V3D_WRITE(V3D_CTL_0_L2CACTL,
73                  V3D_CTL_0_L2CACTL_L2CCLR_SET |
74                  V3D_CTL_0_L2CACTL_L2CENA_SET);
75}
76
77/* Invalidates texture L2 cachelines */
78static void
79v3d_invalidate_l2t(struct v3d_hw *v3d)
80{
81        V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
82        V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
83        V3D_WRITE(V3D_CTL_0_L2TCACTL,
84                  V3D_CTL_0_L2TCACTL_L2TFLS_SET |
85                  (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
86}
87
88/* Invalidates the slice caches.  These are read-only caches. */
89static void
90v3d_invalidate_slices(struct v3d_hw *v3d)
91{
92        V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
93}
94
95static void
96v3d_invalidate_caches(struct v3d_hw *v3d)
97{
98        v3d_invalidate_l3(v3d);
99        v3d_invalidate_l2c(v3d);
100        v3d_invalidate_l2t(v3d);
101        v3d_invalidate_slices(v3d);
102}
103
104static uint32_t g_gmp_ofs;
105static void
106v3d_reload_gmp(struct v3d_hw *v3d)
107{
108        /* Completely reset the GMP. */
109        V3D_WRITE(V3D_GMP_0_CFG,
110                  V3D_GMP_0_CFG_PROTENABLE_SET);
111        V3D_WRITE(V3D_GMP_0_TABLE_ADDR, g_gmp_ofs);
112        V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0);
113        while (V3D_READ(V3D_GMP_0_STATUS) &
114               V3D_GMP_0_STATUS_CFG_BUSY_SET) {
115                ;
116        }
117}
118
119int
120v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
121                                 struct drm_v3d_submit_tfu *args)
122{
123        int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
124
125        V3D_WRITE(V3D_TFU_IIA, args->iia);
126        V3D_WRITE(V3D_TFU_IIS, args->iis);
127        V3D_WRITE(V3D_TFU_ICA, args->ica);
128        V3D_WRITE(V3D_TFU_IUA, args->iua);
129        V3D_WRITE(V3D_TFU_IOA, args->ioa);
130        V3D_WRITE(V3D_TFU_IOS, args->ios);
131        V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
132        V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
133        V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
134        V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
135
136        V3D_WRITE(V3D_TFU_ICFG, args->icfg);
137
138        while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
139                v3d_hw_tick(v3d);
140        }
141
142        return 0;
143}
144
145int
146v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
147                                struct drm_v3d_get_param *args)
148{
149        static const uint32_t reg_map[] = {
150                [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
151                [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
152                [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
153                [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
154                [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
155                [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
156                [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
157        };
158
159        switch (args->param) {
160        case DRM_V3D_PARAM_SUPPORTS_TFU:
161                args->value = 1;
162                return 0;
163        }
164
165        if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
166                args->value = V3D_READ(reg_map[args->param]);
167                return 0;
168        }
169
170        fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
171                (long long)args->value);
172        abort();
173}
174
175static struct v3d_hw *v3d_isr_hw;
176
177static void
178v3d_isr(uint32_t hub_status)
179{
180        struct v3d_hw *v3d = v3d_isr_hw;
181
182        /* Check the per-core bits */
183        if (hub_status & (1 << 0)) {
184                uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
185                V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
186
187                if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
188                        uint32_t size = 256 * 1024;
189                        uint32_t offset = v3d_simulator_get_spill(size);
190
191                        v3d_reload_gmp(v3d);
192
193                        V3D_WRITE(V3D_PTB_0_BPOA, offset);
194                        V3D_WRITE(V3D_PTB_0_BPOS, size);
195                        return;
196                }
197
198                if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
199                        fprintf(stderr, "GMP violation at 0x%08x\n",
200                                V3D_READ(V3D_GMP_0_VIO_ADDR));
201                        abort();
202                } else {
203                        fprintf(stderr,
204                                "Unexpected ISR with core status 0x%08x\n",
205                                core_status);
206                }
207                abort();
208        }
209
210        return;
211}
212
213void
214v3dX(simulator_init_regs)(struct v3d_hw *v3d)
215{
216#if V3D_VERSION == 33
217        /* Set OVRTMUOUT to match kernel behavior.
218         *
219         * This means that the texture sampler uniform configuration's tmu
220         * output type field is used, instead of using the hardware default
221         * behavior based on the texture type.  If you want the default
222         * behavior, you can still put "2" in the indirect texture state's
223         * output_type field.
224         */
225        V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
226#endif
227
228        uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
229                                    V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
230        V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
231        V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
232
233        v3d_isr_hw = v3d;
234        v3d_hw_set_isr(v3d, v3d_isr);
235}
236
237void
238v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
239                                struct drm_v3d_submit_cl *submit,
240                                uint32_t gmp_ofs)
241{
242        g_gmp_ofs = gmp_ofs;
243        v3d_reload_gmp(v3d);
244
245        v3d_invalidate_caches(v3d);
246
247        if (submit->qma) {
248                V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
249                V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
250        }
251#if V3D_VERSION >= 41
252        if (submit->qts) {
253                V3D_WRITE(V3D_CLE_0_CT0QTS,
254                          V3D_CLE_0_CT0QTS_CTQTSEN_SET |
255                          submit->qts);
256        }
257#endif
258        V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
259        V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
260
261        /* Wait for bin to complete before firing render.  The kernel's
262         * scheduler implements this using the GPU scheduler blocking on the
263         * bin fence completing.  (We don't use HW semaphores).
264         */
265        while (V3D_READ(V3D_CLE_0_CT0CA) !=
266               V3D_READ(V3D_CLE_0_CT0EA)) {
267                v3d_hw_tick(v3d);
268        }
269
270        v3d_invalidate_caches(v3d);
271
272        V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
273        V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
274
275        while (V3D_READ(V3D_CLE_0_CT1CA) !=
276               V3D_READ(V3D_CLE_0_CT1EA) ||
277               V3D_READ(V3D_CLE_1_CT1CA) !=
278               V3D_READ(V3D_CLE_1_CT1EA)) {
279                v3d_hw_tick(v3d);
280        }
281}
282
283#endif /* USE_V3D_SIMULATOR */
284