1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2014 Broadcom
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * Expose V3D HW perf counters.
26b8e80941Smrg *
27b8e80941Smrg * We also have code to fake support for occlusion queries.
28b8e80941Smrg * Since we expose support for GL 2.0, we have to expose occlusion queries,
29b8e80941Smrg * but the spec allows you to expose 0 query counter bits, so we just return 0
30b8e80941Smrg * as the result of all our queries.
31b8e80941Smrg */
32b8e80941Smrg#include "vc4_context.h"
33b8e80941Smrg
34b8e80941Smrgstruct vc4_query
35b8e80941Smrg{
36b8e80941Smrg        unsigned num_queries;
37b8e80941Smrg        struct vc4_hwperfmon *hwperfmon;
38b8e80941Smrg};
39b8e80941Smrg
40b8e80941Smrgstatic const char *v3d_counter_names[] = {
41b8e80941Smrg        "FEP-valid-primitives-no-rendered-pixels",
42b8e80941Smrg        "FEP-valid-primitives-rendered-pixels",
43b8e80941Smrg        "FEP-clipped-quads",
44b8e80941Smrg        "FEP-valid-quads",
45b8e80941Smrg        "TLB-quads-not-passing-stencil-test",
46b8e80941Smrg        "TLB-quads-not-passing-z-and-stencil-test",
47b8e80941Smrg        "TLB-quads-passing-z-and-stencil-test",
48b8e80941Smrg        "TLB-quads-with-zero-coverage",
49b8e80941Smrg        "TLB-quads-with-non-zero-coverage",
50b8e80941Smrg        "TLB-quads-written-to-color-buffer",
51b8e80941Smrg        "PTB-primitives-discarded-outside-viewport",
52b8e80941Smrg        "PTB-primitives-need-clipping",
53b8e80941Smrg        "PTB-primitives-discared-reversed",
54b8e80941Smrg        "QPU-total-idle-clk-cycles",
55b8e80941Smrg        "QPU-total-clk-cycles-vertex-coord-shading",
56b8e80941Smrg        "QPU-total-clk-cycles-fragment-shading",
57b8e80941Smrg        "QPU-total-clk-cycles-executing-valid-instr",
58b8e80941Smrg        "QPU-total-clk-cycles-waiting-TMU",
59b8e80941Smrg        "QPU-total-clk-cycles-waiting-scoreboard",
60b8e80941Smrg        "QPU-total-clk-cycles-waiting-varyings",
61b8e80941Smrg        "QPU-total-instr-cache-hit",
62b8e80941Smrg        "QPU-total-instr-cache-miss",
63b8e80941Smrg        "QPU-total-uniform-cache-hit",
64b8e80941Smrg        "QPU-total-uniform-cache-miss",
65b8e80941Smrg        "TMU-total-text-quads-processed",
66b8e80941Smrg        "TMU-total-text-cache-miss",
67b8e80941Smrg        "VPM-total-clk-cycles-VDW-stalled",
68b8e80941Smrg        "VPM-total-clk-cycles-VCD-stalled",
69b8e80941Smrg        "L2C-total-cache-hit",
70b8e80941Smrg        "L2C-total-cache-miss",
71b8e80941Smrg};
72b8e80941Smrg
73b8e80941Smrgint vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
74b8e80941Smrg                                    unsigned index,
75b8e80941Smrg                                    struct pipe_driver_query_group_info *info)
76b8e80941Smrg{
77b8e80941Smrg        struct vc4_screen *screen = vc4_screen(pscreen);
78b8e80941Smrg
79b8e80941Smrg        if (!screen->has_perfmon_ioctl)
80b8e80941Smrg                return 0;
81b8e80941Smrg
82b8e80941Smrg        if (!info)
83b8e80941Smrg                return 1;
84b8e80941Smrg
85b8e80941Smrg        if (index > 0)
86b8e80941Smrg                return 0;
87b8e80941Smrg
88b8e80941Smrg        info->name = "V3D counters";
89b8e80941Smrg        info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
90b8e80941Smrg        info->num_queries = ARRAY_SIZE(v3d_counter_names);
91b8e80941Smrg        return 1;
92b8e80941Smrg}
93b8e80941Smrg
94b8e80941Smrgint vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
95b8e80941Smrg                              struct pipe_driver_query_info *info)
96b8e80941Smrg{
97b8e80941Smrg        struct vc4_screen *screen = vc4_screen(pscreen);
98b8e80941Smrg
99b8e80941Smrg        if (!screen->has_perfmon_ioctl)
100b8e80941Smrg                return 0;
101b8e80941Smrg
102b8e80941Smrg        if (!info)
103b8e80941Smrg                return ARRAY_SIZE(v3d_counter_names);
104b8e80941Smrg
105b8e80941Smrg        if (index >= ARRAY_SIZE(v3d_counter_names))
106b8e80941Smrg                return 0;
107b8e80941Smrg
108b8e80941Smrg        info->group_id = 0;
109b8e80941Smrg        info->name = v3d_counter_names[index];
110b8e80941Smrg        info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
111b8e80941Smrg        info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
112b8e80941Smrg        info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
113b8e80941Smrg        info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
114b8e80941Smrg        return 1;
115b8e80941Smrg}
116b8e80941Smrg
117b8e80941Smrgstatic struct pipe_query *
118b8e80941Smrgvc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
119b8e80941Smrg                       unsigned *query_types)
120b8e80941Smrg{
121b8e80941Smrg        struct vc4_query *query = calloc(1, sizeof(*query));
122b8e80941Smrg        struct vc4_hwperfmon *hwperfmon;
123b8e80941Smrg        unsigned i, nhwqueries = 0;
124b8e80941Smrg
125b8e80941Smrg        if (!query)
126b8e80941Smrg                return NULL;
127b8e80941Smrg
128b8e80941Smrg        for (i = 0; i < num_queries; i++) {
129b8e80941Smrg                if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
130b8e80941Smrg                        nhwqueries++;
131b8e80941Smrg        }
132b8e80941Smrg
133b8e80941Smrg        /* We can't mix HW and non-HW queries. */
134b8e80941Smrg        if (nhwqueries && nhwqueries != num_queries)
135b8e80941Smrg                goto err_free_query;
136b8e80941Smrg
137b8e80941Smrg        if (!nhwqueries)
138b8e80941Smrg                return (struct pipe_query *)query;
139b8e80941Smrg
140b8e80941Smrg        hwperfmon = calloc(1, sizeof(*hwperfmon));
141b8e80941Smrg        if (!hwperfmon)
142b8e80941Smrg                goto err_free_query;
143b8e80941Smrg
144b8e80941Smrg        for (i = 0; i < num_queries; i++)
145b8e80941Smrg                hwperfmon->events[i] = query_types[i] -
146b8e80941Smrg                                       PIPE_QUERY_DRIVER_SPECIFIC;
147b8e80941Smrg
148b8e80941Smrg        query->hwperfmon = hwperfmon;
149b8e80941Smrg        query->num_queries = num_queries;
150b8e80941Smrg
151b8e80941Smrg        /* Note that struct pipe_query isn't actually defined anywhere. */
152b8e80941Smrg        return (struct pipe_query *)query;
153b8e80941Smrg
154b8e80941Smrgerr_free_query:
155b8e80941Smrg        free(query);
156b8e80941Smrg
157b8e80941Smrg        return NULL;
158b8e80941Smrg}
159b8e80941Smrg
160b8e80941Smrgstatic struct pipe_query *
161b8e80941Smrgvc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
162b8e80941Smrg{
163b8e80941Smrg        return vc4_create_batch_query(ctx, 1, &query_type);
164b8e80941Smrg}
165b8e80941Smrg
166b8e80941Smrgstatic void
167b8e80941Smrgvc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
168b8e80941Smrg{
169b8e80941Smrg        struct vc4_context *ctx = vc4_context(pctx);
170b8e80941Smrg        struct vc4_query *query = (struct vc4_query *)pquery;
171b8e80941Smrg
172b8e80941Smrg        if (query->hwperfmon && query->hwperfmon->id) {
173b8e80941Smrg                if (query->hwperfmon->id) {
174b8e80941Smrg                        struct drm_vc4_perfmon_destroy req = { };
175b8e80941Smrg
176b8e80941Smrg                        req.id = query->hwperfmon->id;
177b8e80941Smrg                        vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
178b8e80941Smrg                                  &req);
179b8e80941Smrg                }
180b8e80941Smrg
181b8e80941Smrg                free(query->hwperfmon);
182b8e80941Smrg        }
183b8e80941Smrg
184b8e80941Smrg        free(query);
185b8e80941Smrg}
186b8e80941Smrg
187b8e80941Smrgstatic boolean
188b8e80941Smrgvc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
189b8e80941Smrg{
190b8e80941Smrg        struct vc4_query *query = (struct vc4_query *)pquery;
191b8e80941Smrg        struct vc4_context *ctx = vc4_context(pctx);
192b8e80941Smrg        struct drm_vc4_perfmon_create req = { };
193b8e80941Smrg        unsigned i;
194b8e80941Smrg        int ret;
195b8e80941Smrg
196b8e80941Smrg        if (!query->hwperfmon)
197b8e80941Smrg                return true;
198b8e80941Smrg
199b8e80941Smrg        /* Only one perfmon can be activated per context. */
200b8e80941Smrg        if (ctx->perfmon)
201b8e80941Smrg                return false;
202b8e80941Smrg
203b8e80941Smrg        /* Reset the counters by destroying the previously allocated perfmon */
204b8e80941Smrg        if (query->hwperfmon->id) {
205b8e80941Smrg                struct drm_vc4_perfmon_destroy destroyreq = { };
206b8e80941Smrg
207b8e80941Smrg                destroyreq.id = query->hwperfmon->id;
208b8e80941Smrg                vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
209b8e80941Smrg        }
210b8e80941Smrg
211b8e80941Smrg        for (i = 0; i < query->num_queries; i++)
212b8e80941Smrg                req.events[i] = query->hwperfmon->events[i];
213b8e80941Smrg
214b8e80941Smrg        req.ncounters = query->num_queries;
215b8e80941Smrg        ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
216b8e80941Smrg        if (ret)
217b8e80941Smrg                return false;
218b8e80941Smrg
219b8e80941Smrg        query->hwperfmon->id = req.id;
220b8e80941Smrg
221b8e80941Smrg        /* Make sure all pendings jobs are flushed before activating the
222b8e80941Smrg         * perfmon.
223b8e80941Smrg         */
224b8e80941Smrg        vc4_flush(pctx);
225b8e80941Smrg        ctx->perfmon = query->hwperfmon;
226b8e80941Smrg        return true;
227b8e80941Smrg}
228b8e80941Smrg
229b8e80941Smrgstatic bool
230b8e80941Smrgvc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
231b8e80941Smrg{
232b8e80941Smrg        struct vc4_query *query = (struct vc4_query *)pquery;
233b8e80941Smrg        struct vc4_context *ctx = vc4_context(pctx);
234b8e80941Smrg
235b8e80941Smrg        if (!query->hwperfmon)
236b8e80941Smrg                return true;
237b8e80941Smrg
238b8e80941Smrg        if (ctx->perfmon != query->hwperfmon)
239b8e80941Smrg                return false;
240b8e80941Smrg
241b8e80941Smrg        /* Make sure all pendings jobs are flushed before deactivating the
242b8e80941Smrg         * perfmon.
243b8e80941Smrg         */
244b8e80941Smrg        vc4_flush(pctx);
245b8e80941Smrg        ctx->perfmon = NULL;
246b8e80941Smrg        return true;
247b8e80941Smrg}
248b8e80941Smrg
249b8e80941Smrgstatic boolean
250b8e80941Smrgvc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
251b8e80941Smrg                     boolean wait, union pipe_query_result *vresult)
252b8e80941Smrg{
253b8e80941Smrg        struct vc4_context *ctx = vc4_context(pctx);
254b8e80941Smrg        struct vc4_query *query = (struct vc4_query *)pquery;
255b8e80941Smrg        struct drm_vc4_perfmon_get_values req;
256b8e80941Smrg        unsigned i;
257b8e80941Smrg        int ret;
258b8e80941Smrg
259b8e80941Smrg        if (!query->hwperfmon) {
260b8e80941Smrg                vresult->u64 = 0;
261b8e80941Smrg                return true;
262b8e80941Smrg        }
263b8e80941Smrg
264b8e80941Smrg        if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
265b8e80941Smrg                            wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
266b8e80941Smrg                return false;
267b8e80941Smrg
268b8e80941Smrg        req.id = query->hwperfmon->id;
269b8e80941Smrg        req.values_ptr = (uintptr_t)query->hwperfmon->counters;
270b8e80941Smrg        ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
271b8e80941Smrg        if (ret)
272b8e80941Smrg                return false;
273b8e80941Smrg
274b8e80941Smrg        for (i = 0; i < query->num_queries; i++)
275b8e80941Smrg                vresult->batch[i].u64 = query->hwperfmon->counters[i];
276b8e80941Smrg
277b8e80941Smrg        return true;
278b8e80941Smrg}
279b8e80941Smrg
280b8e80941Smrgstatic void
281b8e80941Smrgvc4_set_active_query_state(struct pipe_context *pctx, boolean enable)
282b8e80941Smrg{
283b8e80941Smrg}
284b8e80941Smrg
285b8e80941Smrgvoid
286b8e80941Smrgvc4_query_init(struct pipe_context *pctx)
287b8e80941Smrg{
288b8e80941Smrg        pctx->create_query = vc4_create_query;
289b8e80941Smrg        pctx->create_batch_query = vc4_create_batch_query;
290b8e80941Smrg        pctx->destroy_query = vc4_destroy_query;
291b8e80941Smrg        pctx->begin_query = vc4_begin_query;
292b8e80941Smrg        pctx->end_query = vc4_end_query;
293b8e80941Smrg        pctx->get_query_result = vc4_get_query_result;
294b8e80941Smrg        pctx->set_active_query_state = vc4_set_active_query_state;
295b8e80941Smrg}
296