101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2014 Broadcom
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * Expose V3D HW perf counters.
2601e04c3fSmrg *
2701e04c3fSmrg * We also have code to fake support for occlusion queries.
2801e04c3fSmrg * Since we expose support for GL 2.0, we have to expose occlusion queries,
2901e04c3fSmrg * but the spec allows you to expose 0 query counter bits, so we just return 0
3001e04c3fSmrg * as the result of all our queries.
3101e04c3fSmrg */
3201e04c3fSmrg#include "vc4_context.h"
3301e04c3fSmrg
3401e04c3fSmrgstruct vc4_query
3501e04c3fSmrg{
3601e04c3fSmrg        unsigned num_queries;
3701e04c3fSmrg        struct vc4_hwperfmon *hwperfmon;
3801e04c3fSmrg};
3901e04c3fSmrg
4001e04c3fSmrgstatic const char *v3d_counter_names[] = {
4101e04c3fSmrg        "FEP-valid-primitives-no-rendered-pixels",
4201e04c3fSmrg        "FEP-valid-primitives-rendered-pixels",
4301e04c3fSmrg        "FEP-clipped-quads",
4401e04c3fSmrg        "FEP-valid-quads",
4501e04c3fSmrg        "TLB-quads-not-passing-stencil-test",
4601e04c3fSmrg        "TLB-quads-not-passing-z-and-stencil-test",
4701e04c3fSmrg        "TLB-quads-passing-z-and-stencil-test",
4801e04c3fSmrg        "TLB-quads-with-zero-coverage",
4901e04c3fSmrg        "TLB-quads-with-non-zero-coverage",
5001e04c3fSmrg        "TLB-quads-written-to-color-buffer",
5101e04c3fSmrg        "PTB-primitives-discarded-outside-viewport",
5201e04c3fSmrg        "PTB-primitives-need-clipping",
5301e04c3fSmrg        "PTB-primitives-discared-reversed",
5401e04c3fSmrg        "QPU-total-idle-clk-cycles",
5501e04c3fSmrg        "QPU-total-clk-cycles-vertex-coord-shading",
5601e04c3fSmrg        "QPU-total-clk-cycles-fragment-shading",
5701e04c3fSmrg        "QPU-total-clk-cycles-executing-valid-instr",
5801e04c3fSmrg        "QPU-total-clk-cycles-waiting-TMU",
5901e04c3fSmrg        "QPU-total-clk-cycles-waiting-scoreboard",
6001e04c3fSmrg        "QPU-total-clk-cycles-waiting-varyings",
6101e04c3fSmrg        "QPU-total-instr-cache-hit",
6201e04c3fSmrg        "QPU-total-instr-cache-miss",
6301e04c3fSmrg        "QPU-total-uniform-cache-hit",
6401e04c3fSmrg        "QPU-total-uniform-cache-miss",
6501e04c3fSmrg        "TMU-total-text-quads-processed",
6601e04c3fSmrg        "TMU-total-text-cache-miss",
6701e04c3fSmrg        "VPM-total-clk-cycles-VDW-stalled",
6801e04c3fSmrg        "VPM-total-clk-cycles-VCD-stalled",
6901e04c3fSmrg        "L2C-total-cache-hit",
7001e04c3fSmrg        "L2C-total-cache-miss",
7101e04c3fSmrg};
7201e04c3fSmrg
7301e04c3fSmrgint vc4_get_driver_query_group_info(struct pipe_screen *pscreen,
7401e04c3fSmrg                                    unsigned index,
7501e04c3fSmrg                                    struct pipe_driver_query_group_info *info)
7601e04c3fSmrg{
7701e04c3fSmrg        struct vc4_screen *screen = vc4_screen(pscreen);
7801e04c3fSmrg
7901e04c3fSmrg        if (!screen->has_perfmon_ioctl)
8001e04c3fSmrg                return 0;
8101e04c3fSmrg
8201e04c3fSmrg        if (!info)
8301e04c3fSmrg                return 1;
8401e04c3fSmrg
8501e04c3fSmrg        if (index > 0)
8601e04c3fSmrg                return 0;
8701e04c3fSmrg
8801e04c3fSmrg        info->name = "V3D counters";
8901e04c3fSmrg        info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS;
9001e04c3fSmrg        info->num_queries = ARRAY_SIZE(v3d_counter_names);
9101e04c3fSmrg        return 1;
9201e04c3fSmrg}
9301e04c3fSmrg
9401e04c3fSmrgint vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
9501e04c3fSmrg                              struct pipe_driver_query_info *info)
9601e04c3fSmrg{
9701e04c3fSmrg        struct vc4_screen *screen = vc4_screen(pscreen);
9801e04c3fSmrg
9901e04c3fSmrg        if (!screen->has_perfmon_ioctl)
10001e04c3fSmrg                return 0;
10101e04c3fSmrg
10201e04c3fSmrg        if (!info)
10301e04c3fSmrg                return ARRAY_SIZE(v3d_counter_names);
10401e04c3fSmrg
10501e04c3fSmrg        if (index >= ARRAY_SIZE(v3d_counter_names))
10601e04c3fSmrg                return 0;
10701e04c3fSmrg
10801e04c3fSmrg        info->group_id = 0;
10901e04c3fSmrg        info->name = v3d_counter_names[index];
11001e04c3fSmrg        info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
11101e04c3fSmrg        info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
11201e04c3fSmrg        info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
11301e04c3fSmrg        info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
11401e04c3fSmrg        return 1;
11501e04c3fSmrg}
11601e04c3fSmrg
11701e04c3fSmrgstatic struct pipe_query *
11801e04c3fSmrgvc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
11901e04c3fSmrg                       unsigned *query_types)
12001e04c3fSmrg{
12101e04c3fSmrg        struct vc4_query *query = calloc(1, sizeof(*query));
12201e04c3fSmrg        struct vc4_hwperfmon *hwperfmon;
12301e04c3fSmrg        unsigned i, nhwqueries = 0;
12401e04c3fSmrg
12501e04c3fSmrg        if (!query)
12601e04c3fSmrg                return NULL;
12701e04c3fSmrg
12801e04c3fSmrg        for (i = 0; i < num_queries; i++) {
12901e04c3fSmrg                if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC)
13001e04c3fSmrg                        nhwqueries++;
13101e04c3fSmrg        }
13201e04c3fSmrg
13301e04c3fSmrg        /* We can't mix HW and non-HW queries. */
13401e04c3fSmrg        if (nhwqueries && nhwqueries != num_queries)
13501e04c3fSmrg                goto err_free_query;
13601e04c3fSmrg
13701e04c3fSmrg        if (!nhwqueries)
13801e04c3fSmrg                return (struct pipe_query *)query;
13901e04c3fSmrg
14001e04c3fSmrg        hwperfmon = calloc(1, sizeof(*hwperfmon));
14101e04c3fSmrg        if (!hwperfmon)
14201e04c3fSmrg                goto err_free_query;
14301e04c3fSmrg
14401e04c3fSmrg        for (i = 0; i < num_queries; i++)
14501e04c3fSmrg                hwperfmon->events[i] = query_types[i] -
14601e04c3fSmrg                                       PIPE_QUERY_DRIVER_SPECIFIC;
14701e04c3fSmrg
14801e04c3fSmrg        query->hwperfmon = hwperfmon;
14901e04c3fSmrg        query->num_queries = num_queries;
15001e04c3fSmrg
15101e04c3fSmrg        /* Note that struct pipe_query isn't actually defined anywhere. */
15201e04c3fSmrg        return (struct pipe_query *)query;
15301e04c3fSmrg
15401e04c3fSmrgerr_free_query:
15501e04c3fSmrg        free(query);
15601e04c3fSmrg
15701e04c3fSmrg        return NULL;
15801e04c3fSmrg}
15901e04c3fSmrg
16001e04c3fSmrgstatic struct pipe_query *
16101e04c3fSmrgvc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
16201e04c3fSmrg{
16301e04c3fSmrg        return vc4_create_batch_query(ctx, 1, &query_type);
16401e04c3fSmrg}
16501e04c3fSmrg
16601e04c3fSmrgstatic void
16701e04c3fSmrgvc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
16801e04c3fSmrg{
16901e04c3fSmrg        struct vc4_context *ctx = vc4_context(pctx);
17001e04c3fSmrg        struct vc4_query *query = (struct vc4_query *)pquery;
17101e04c3fSmrg
17201e04c3fSmrg        if (query->hwperfmon && query->hwperfmon->id) {
17301e04c3fSmrg                if (query->hwperfmon->id) {
17401e04c3fSmrg                        struct drm_vc4_perfmon_destroy req = { };
17501e04c3fSmrg
17601e04c3fSmrg                        req.id = query->hwperfmon->id;
17701e04c3fSmrg                        vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY,
17801e04c3fSmrg                                  &req);
17901e04c3fSmrg                }
18001e04c3fSmrg
18101e04c3fSmrg                free(query->hwperfmon);
18201e04c3fSmrg        }
18301e04c3fSmrg
18401e04c3fSmrg        free(query);
18501e04c3fSmrg}
18601e04c3fSmrg
1877ec681f3Smrgstatic bool
18801e04c3fSmrgvc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
18901e04c3fSmrg{
19001e04c3fSmrg        struct vc4_query *query = (struct vc4_query *)pquery;
19101e04c3fSmrg        struct vc4_context *ctx = vc4_context(pctx);
19201e04c3fSmrg        struct drm_vc4_perfmon_create req = { };
19301e04c3fSmrg        unsigned i;
19401e04c3fSmrg        int ret;
19501e04c3fSmrg
19601e04c3fSmrg        if (!query->hwperfmon)
19701e04c3fSmrg                return true;
19801e04c3fSmrg
19901e04c3fSmrg        /* Only one perfmon can be activated per context. */
20001e04c3fSmrg        if (ctx->perfmon)
20101e04c3fSmrg                return false;
20201e04c3fSmrg
20301e04c3fSmrg        /* Reset the counters by destroying the previously allocated perfmon */
20401e04c3fSmrg        if (query->hwperfmon->id) {
20501e04c3fSmrg                struct drm_vc4_perfmon_destroy destroyreq = { };
20601e04c3fSmrg
20701e04c3fSmrg                destroyreq.id = query->hwperfmon->id;
20801e04c3fSmrg                vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq);
20901e04c3fSmrg        }
21001e04c3fSmrg
21101e04c3fSmrg        for (i = 0; i < query->num_queries; i++)
21201e04c3fSmrg                req.events[i] = query->hwperfmon->events[i];
21301e04c3fSmrg
21401e04c3fSmrg        req.ncounters = query->num_queries;
21501e04c3fSmrg        ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req);
21601e04c3fSmrg        if (ret)
21701e04c3fSmrg                return false;
21801e04c3fSmrg
21901e04c3fSmrg        query->hwperfmon->id = req.id;
22001e04c3fSmrg
22101e04c3fSmrg        /* Make sure all pendings jobs are flushed before activating the
22201e04c3fSmrg         * perfmon.
22301e04c3fSmrg         */
22401e04c3fSmrg        vc4_flush(pctx);
22501e04c3fSmrg        ctx->perfmon = query->hwperfmon;
22601e04c3fSmrg        return true;
22701e04c3fSmrg}
22801e04c3fSmrg
22901e04c3fSmrgstatic bool
23001e04c3fSmrgvc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
23101e04c3fSmrg{
23201e04c3fSmrg        struct vc4_query *query = (struct vc4_query *)pquery;
23301e04c3fSmrg        struct vc4_context *ctx = vc4_context(pctx);
23401e04c3fSmrg
23501e04c3fSmrg        if (!query->hwperfmon)
23601e04c3fSmrg                return true;
23701e04c3fSmrg
23801e04c3fSmrg        if (ctx->perfmon != query->hwperfmon)
23901e04c3fSmrg                return false;
24001e04c3fSmrg
24101e04c3fSmrg        /* Make sure all pendings jobs are flushed before deactivating the
24201e04c3fSmrg         * perfmon.
24301e04c3fSmrg         */
24401e04c3fSmrg        vc4_flush(pctx);
24501e04c3fSmrg        ctx->perfmon = NULL;
24601e04c3fSmrg        return true;
24701e04c3fSmrg}
24801e04c3fSmrg
2497ec681f3Smrgstatic bool
25001e04c3fSmrgvc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
2517ec681f3Smrg                     bool wait, union pipe_query_result *vresult)
25201e04c3fSmrg{
25301e04c3fSmrg        struct vc4_context *ctx = vc4_context(pctx);
25401e04c3fSmrg        struct vc4_query *query = (struct vc4_query *)pquery;
25501e04c3fSmrg        struct drm_vc4_perfmon_get_values req;
25601e04c3fSmrg        unsigned i;
25701e04c3fSmrg        int ret;
25801e04c3fSmrg
25901e04c3fSmrg        if (!query->hwperfmon) {
26001e04c3fSmrg                vresult->u64 = 0;
26101e04c3fSmrg                return true;
26201e04c3fSmrg        }
26301e04c3fSmrg
26401e04c3fSmrg        if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno,
26501e04c3fSmrg                            wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon"))
26601e04c3fSmrg                return false;
26701e04c3fSmrg
26801e04c3fSmrg        req.id = query->hwperfmon->id;
26901e04c3fSmrg        req.values_ptr = (uintptr_t)query->hwperfmon->counters;
27001e04c3fSmrg        ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req);
27101e04c3fSmrg        if (ret)
27201e04c3fSmrg                return false;
27301e04c3fSmrg
27401e04c3fSmrg        for (i = 0; i < query->num_queries; i++)
27501e04c3fSmrg                vresult->batch[i].u64 = query->hwperfmon->counters[i];
27601e04c3fSmrg
27701e04c3fSmrg        return true;
27801e04c3fSmrg}
27901e04c3fSmrg
28001e04c3fSmrgstatic void
2817ec681f3Smrgvc4_set_active_query_state(struct pipe_context *pctx, bool enable)
28201e04c3fSmrg{
28301e04c3fSmrg}
28401e04c3fSmrg
28501e04c3fSmrgvoid
28601e04c3fSmrgvc4_query_init(struct pipe_context *pctx)
28701e04c3fSmrg{
28801e04c3fSmrg        pctx->create_query = vc4_create_query;
28901e04c3fSmrg        pctx->create_batch_query = vc4_create_batch_query;
29001e04c3fSmrg        pctx->destroy_query = vc4_destroy_query;
29101e04c3fSmrg        pctx->begin_query = vc4_begin_query;
29201e04c3fSmrg        pctx->end_query = vc4_end_query;
29301e04c3fSmrg        pctx->get_query_result = vc4_get_query_result;
29401e04c3fSmrg        pctx->set_active_query_state = vc4_set_active_query_state;
29501e04c3fSmrg}
296