1/*
2 * Copyright © 2021 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/**
25 * Gallium query object support for performance counters
26 *
27 * This contains the performance V3D counters queries.
28 */
29
30#include "v3d_query.h"
31
32struct v3d_query_perfcnt
33{
34        struct v3d_query base;
35
36        unsigned num_queries;
37        struct v3d_perfmon_state *perfmon;
38};
39
40static const char *v3d_counter_names[] = {
41        "FEP-valid-primitives-no-rendered-pixels",
42        "FEP-valid-primitives-rendered-pixels",
43        "FEP-clipped-quads",
44        "FEP-valid-quads",
45        "TLB-quads-not-passing-stencil-test",
46        "TLB-quads-not-passing-z-and-stencil-test",
47        "TLB-quads-passing-z-and-stencil-test",
48        "TLB-quads-with-zero-coverage",
49        "TLB-quads-with-non-zero-coverage",
50        "TLB-quads-written-to-color-buffer",
51        "PTB-primitives-discarded-outside-viewport",
52        "PTB-primitives-need-clipping",
53        "PTB-primitives-discared-reversed",
54        "QPU-total-idle-clk-cycles",
55        "QPU-total-active-clk-cycles-vertex-coord-shading",
56        "QPU-total-active-clk-cycles-fragment-shading",
57        "QPU-total-clk-cycles-executing-valid-instr",
58        "QPU-total-clk-cycles-waiting-TMU",
59        "QPU-total-clk-cycles-waiting-scoreboard",
60        "QPU-total-clk-cycles-waiting-varyings",
61        "QPU-total-instr-cache-hit",
62        "QPU-total-instr-cache-miss",
63        "QPU-total-uniform-cache-hit",
64        "QPU-total-uniform-cache-miss",
65        "TMU-total-text-quads-access",
66        "TMU-total-text-cache-miss",
67        "VPM-total-clk-cycles-VDW-stalled",
68        "VPM-total-clk-cycles-VCD-stalled",
69        "CLE-bin-thread-active-cycles",
70        "CLE-render-thread-active-cycles",
71        "L2T-total-cache-hit",
72        "L2T-total-cache-miss",
73        "cycle-count",
74        "QPU-total-clk-cycles-waiting-vertex-coord-shading",
75        "QPU-total-clk-cycles-waiting-fragment-shading",
76        "PTB-primitives-binned",
77        "AXI-writes-seen-watch-0",
78        "AXI-reads-seen-watch-0",
79        "AXI-writes-stalled-seen-watch-0",
80        "AXI-reads-stalled-seen-watch-0",
81        "AXI-write-bytes-seen-watch-0",
82        "AXI-read-bytes-seen-watch-0",
83        "AXI-writes-seen-watch-1",
84        "AXI-reads-seen-watch-1",
85        "AXI-writes-stalled-seen-watch-1",
86        "AXI-reads-stalled-seen-watch-1",
87        "AXI-write-bytes-seen-watch-1",
88        "AXI-read-bytes-seen-watch-1",
89        "TLB-partial-quads-written-to-color-buffer",
90        "TMU-total-config-access",
91        "L2T-no-id-stalled",
92        "L2T-command-queue-stalled",
93        "L2T-TMU-writes",
94        "TMU-active-cycles",
95        "TMU-stalled-cycles",
96        "CLE-thread-active-cycles",
97        "L2T-TMU-reads",
98        "L2T-CLE-reads",
99        "L2T-VCD-reads",
100        "L2T-TMU-config-reads",
101        "L2T-SLC0-reads",
102        "L2T-SLC1-reads",
103        "L2T-SLC2-reads",
104        "L2T-TMU-write-miss",
105        "L2T-TMU-read-miss",
106        "L2T-CLE-read-miss",
107        "L2T-VCD-read-miss",
108        "L2T-TMU-config-read-miss",
109        "L2T-SLC0-read-miss",
110        "L2T-SLC1-read-miss",
111        "L2T-SLC2-read-miss",
112        "core-memory-writes",
113        "L2T-memory-writes",
114        "PTB-memory-writes",
115        "TLB-memory-writes",
116        "core-memory-reads",
117        "L2T-memory-reads",
118        "PTB-memory-reads",
119        "PSE-memory-reads",
120        "TLB-memory-reads",
121        "GMP-memory-reads",
122        "PTB-memory-words-writes",
123        "TLB-memory-words-writes",
124        "PSE-memory-words-reads",
125        "TLB-memory-words-reads",
126        "TMU-MRU-hits",
127        "compute-active-cycles",
128};
129
130static void
131kperfmon_destroy(struct v3d_context *v3d, struct v3d_perfmon_state *perfmon)
132{
133        struct drm_v3d_perfmon_destroy destroyreq;
134
135        destroyreq.id = perfmon->kperfmon_id;
136        int ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_DESTROY, &destroyreq);
137        if (ret != 0)
138                fprintf(stderr, "failed to destroy perfmon %d: %s\n",
139                        perfmon->kperfmon_id, strerror(errno));
140}
141
142int
143v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index,
144                                        struct pipe_driver_query_group_info *info)
145{
146        if (!screen->has_perfmon)
147                return 0;
148
149        if (!info)
150                return 1;
151
152        if (index > 0)
153                return 0;
154
155        info->name = "V3D counters";
156        info->max_active_queries = DRM_V3D_MAX_PERF_COUNTERS;
157        info->num_queries = ARRAY_SIZE(v3d_counter_names);
158
159        return 1;
160}
161
162int
163v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index,
164                                  struct pipe_driver_query_info *info)
165{
166        if (!screen->has_perfmon)
167                return 0;
168
169        if (!info)
170                return ARRAY_SIZE(v3d_counter_names);
171
172        if (index >= ARRAY_SIZE(v3d_counter_names))
173                return 0;
174
175        info->group_id = 0;
176        info->name = v3d_counter_names[index];
177        info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
178        info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
179        info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
180        info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
181
182        return 1;
183}
184
185static void
186v3d_destroy_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
187{
188        struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
189
190        assert(pquery->perfmon);
191
192        if (v3d->active_perfmon == pquery->perfmon) {
193                fprintf(stderr, "Query is active; end query before destroying\n");
194                return;
195        }
196        if (pquery->perfmon->kperfmon_id)
197                kperfmon_destroy(v3d, pquery->perfmon);
198
199        v3d_fence_unreference(&pquery->perfmon->last_job_fence);
200        free(pquery->perfmon);
201        free(query);
202}
203
204static bool
205v3d_begin_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
206{
207        struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
208        struct drm_v3d_perfmon_create createreq = { 0 };
209        int i, ret;
210
211        /* Only one perfmon can be activated per context */
212        if (v3d->active_perfmon) {
213                fprintf(stderr,
214                        "Another query is already active; "
215                        "finish it before starting a new one\n");
216                return false;
217        }
218
219        assert(pquery->perfmon);
220
221        /* Reset the counters by destroying the previously allocated perfmon */
222        if (pquery->perfmon->kperfmon_id)
223                kperfmon_destroy(v3d, pquery->perfmon);
224
225        for (i = 0; i < pquery->num_queries; i++)
226                createreq.counters[i] = pquery->perfmon->counters[i];
227
228        createreq.ncounters = pquery->num_queries;
229        ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_CREATE, &createreq);
230        if (ret != 0)
231                return false;
232
233        pquery->perfmon->kperfmon_id = createreq.id;
234        pquery->perfmon->job_submitted = false;
235        v3d_fence_unreference(&pquery->perfmon->last_job_fence);
236
237        /* Ensure all pending jobs are flushed before activating the
238         * perfmon
239         */
240        v3d_flush((struct pipe_context *)v3d);
241        v3d->active_perfmon = pquery->perfmon;
242
243        return true;
244}
245
246static bool
247v3d_end_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query)
248{
249        struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
250
251        assert(pquery->perfmon);
252
253        if (v3d->active_perfmon != pquery->perfmon) {
254                fprintf(stderr, "This query is not active\n");
255                return false;
256        }
257
258        /* Ensure all pending jobs are flushed before deactivating the
259         * perfmon
260         */
261        v3d_flush((struct pipe_context *)v3d);
262
263        /* Get a copy of latest submitted job's fence to wait for its
264         * completion
265         */
266        if (v3d->active_perfmon->job_submitted)
267                v3d->active_perfmon->last_job_fence = v3d_fence_create(v3d);
268
269        v3d->active_perfmon = NULL;
270
271        return true;
272}
273
274static bool
275v3d_get_query_result_perfcnt(struct v3d_context *v3d, struct v3d_query *query,
276                             bool wait, union pipe_query_result *vresult)
277{
278        struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query;
279        struct drm_v3d_perfmon_get_values req = { 0 };
280        int i, ret;
281
282        assert(pquery->perfmon);
283
284        if (pquery->perfmon->job_submitted) {
285                if (!v3d_fence_wait(v3d->screen,
286                                    pquery->perfmon->last_job_fence,
287                                    wait ? PIPE_TIMEOUT_INFINITE : 0))
288                        return false;
289
290                req.id = pquery->perfmon->kperfmon_id;
291                req.values_ptr = (uintptr_t)pquery->perfmon->values;
292                ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_GET_VALUES, &req);
293                if (ret != 0) {
294                        fprintf(stderr, "Can't request perfmon counters values\n");
295                        return false;
296                }
297        }
298
299        for (i = 0; i < pquery->num_queries; i++)
300                vresult->batch[i].u64 = pquery->perfmon->values[i];
301
302        return true;
303}
304
305static const struct v3d_query_funcs perfcnt_query_funcs = {
306        .destroy_query = v3d_destroy_query_perfcnt,
307        .begin_query = v3d_begin_query_perfcnt,
308        .end_query = v3d_end_query_perfcnt,
309        .get_query_result = v3d_get_query_result_perfcnt,
310};
311
312struct pipe_query *
313v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries,
314                               unsigned *query_types)
315{
316        struct v3d_query_perfcnt *pquery = NULL;
317        struct v3d_query *query;
318        struct v3d_perfmon_state *perfmon = NULL;
319        int i;
320
321        /* Validate queries */
322        for (i = 0; i < num_queries; i++) {
323                if (query_types[i] < PIPE_QUERY_DRIVER_SPECIFIC ||
324                    query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC +
325                    ARRAY_SIZE(v3d_counter_names)) {
326                        fprintf(stderr, "Invalid query type\n");
327                        return NULL;
328                }
329        }
330
331        pquery = calloc(1, sizeof(*pquery));
332        if (!pquery)
333                return NULL;
334
335        perfmon = calloc(1, sizeof(*perfmon));
336        if (!perfmon) {
337                free(pquery);
338                return NULL;
339        }
340
341        for (i = 0; i < num_queries; i++)
342                perfmon->counters[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC;
343
344        pquery->perfmon = perfmon;
345        pquery->num_queries = num_queries;
346
347        query = &pquery->base;
348        query->funcs = &perfcnt_query_funcs;
349
350        /* Note that struct pipe_query isn't actually defined anywhere. */
351         return (struct pipe_query *)query;
352}
353