101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * Expose V3D HW perf counters. 2601e04c3fSmrg * 2701e04c3fSmrg * We also have code to fake support for occlusion queries. 2801e04c3fSmrg * Since we expose support for GL 2.0, we have to expose occlusion queries, 2901e04c3fSmrg * but the spec allows you to expose 0 query counter bits, so we just return 0 3001e04c3fSmrg * as the result of all our queries. 3101e04c3fSmrg */ 3201e04c3fSmrg#include "vc4_context.h" 3301e04c3fSmrg 3401e04c3fSmrgstruct vc4_query 3501e04c3fSmrg{ 3601e04c3fSmrg unsigned num_queries; 3701e04c3fSmrg struct vc4_hwperfmon *hwperfmon; 3801e04c3fSmrg}; 3901e04c3fSmrg 4001e04c3fSmrgstatic const char *v3d_counter_names[] = { 4101e04c3fSmrg "FEP-valid-primitives-no-rendered-pixels", 4201e04c3fSmrg "FEP-valid-primitives-rendered-pixels", 4301e04c3fSmrg "FEP-clipped-quads", 4401e04c3fSmrg "FEP-valid-quads", 4501e04c3fSmrg "TLB-quads-not-passing-stencil-test", 4601e04c3fSmrg "TLB-quads-not-passing-z-and-stencil-test", 4701e04c3fSmrg "TLB-quads-passing-z-and-stencil-test", 4801e04c3fSmrg "TLB-quads-with-zero-coverage", 4901e04c3fSmrg "TLB-quads-with-non-zero-coverage", 5001e04c3fSmrg "TLB-quads-written-to-color-buffer", 5101e04c3fSmrg "PTB-primitives-discarded-outside-viewport", 5201e04c3fSmrg "PTB-primitives-need-clipping", 5301e04c3fSmrg "PTB-primitives-discared-reversed", 5401e04c3fSmrg "QPU-total-idle-clk-cycles", 5501e04c3fSmrg "QPU-total-clk-cycles-vertex-coord-shading", 5601e04c3fSmrg "QPU-total-clk-cycles-fragment-shading", 5701e04c3fSmrg "QPU-total-clk-cycles-executing-valid-instr", 5801e04c3fSmrg "QPU-total-clk-cycles-waiting-TMU", 5901e04c3fSmrg "QPU-total-clk-cycles-waiting-scoreboard", 6001e04c3fSmrg "QPU-total-clk-cycles-waiting-varyings", 6101e04c3fSmrg "QPU-total-instr-cache-hit", 6201e04c3fSmrg "QPU-total-instr-cache-miss", 6301e04c3fSmrg "QPU-total-uniform-cache-hit", 6401e04c3fSmrg "QPU-total-uniform-cache-miss", 6501e04c3fSmrg "TMU-total-text-quads-processed", 6601e04c3fSmrg "TMU-total-text-cache-miss", 6701e04c3fSmrg "VPM-total-clk-cycles-VDW-stalled", 6801e04c3fSmrg "VPM-total-clk-cycles-VCD-stalled", 6901e04c3fSmrg "L2C-total-cache-hit", 7001e04c3fSmrg "L2C-total-cache-miss", 7101e04c3fSmrg}; 7201e04c3fSmrg 7301e04c3fSmrgint vc4_get_driver_query_group_info(struct pipe_screen *pscreen, 7401e04c3fSmrg unsigned index, 7501e04c3fSmrg struct pipe_driver_query_group_info *info) 7601e04c3fSmrg{ 7701e04c3fSmrg struct vc4_screen *screen = vc4_screen(pscreen); 7801e04c3fSmrg 7901e04c3fSmrg if (!screen->has_perfmon_ioctl) 8001e04c3fSmrg return 0; 8101e04c3fSmrg 8201e04c3fSmrg if (!info) 8301e04c3fSmrg return 1; 8401e04c3fSmrg 8501e04c3fSmrg if (index > 0) 8601e04c3fSmrg return 0; 8701e04c3fSmrg 8801e04c3fSmrg info->name = "V3D counters"; 8901e04c3fSmrg info->max_active_queries = DRM_VC4_MAX_PERF_COUNTERS; 9001e04c3fSmrg info->num_queries = ARRAY_SIZE(v3d_counter_names); 9101e04c3fSmrg return 1; 9201e04c3fSmrg} 9301e04c3fSmrg 9401e04c3fSmrgint vc4_get_driver_query_info(struct pipe_screen *pscreen, unsigned index, 9501e04c3fSmrg struct pipe_driver_query_info *info) 9601e04c3fSmrg{ 9701e04c3fSmrg struct vc4_screen *screen = vc4_screen(pscreen); 9801e04c3fSmrg 9901e04c3fSmrg if (!screen->has_perfmon_ioctl) 10001e04c3fSmrg return 0; 10101e04c3fSmrg 10201e04c3fSmrg if (!info) 10301e04c3fSmrg return ARRAY_SIZE(v3d_counter_names); 10401e04c3fSmrg 10501e04c3fSmrg if (index >= ARRAY_SIZE(v3d_counter_names)) 10601e04c3fSmrg return 0; 10701e04c3fSmrg 10801e04c3fSmrg info->group_id = 0; 10901e04c3fSmrg info->name = v3d_counter_names[index]; 11001e04c3fSmrg info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; 11101e04c3fSmrg info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; 11201e04c3fSmrg info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 11301e04c3fSmrg info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 11401e04c3fSmrg return 1; 11501e04c3fSmrg} 11601e04c3fSmrg 11701e04c3fSmrgstatic struct pipe_query * 11801e04c3fSmrgvc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries, 11901e04c3fSmrg unsigned *query_types) 12001e04c3fSmrg{ 12101e04c3fSmrg struct vc4_query *query = calloc(1, sizeof(*query)); 12201e04c3fSmrg struct vc4_hwperfmon *hwperfmon; 12301e04c3fSmrg unsigned i, nhwqueries = 0; 12401e04c3fSmrg 12501e04c3fSmrg if (!query) 12601e04c3fSmrg return NULL; 12701e04c3fSmrg 12801e04c3fSmrg for (i = 0; i < num_queries; i++) { 12901e04c3fSmrg if (query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC) 13001e04c3fSmrg nhwqueries++; 13101e04c3fSmrg } 13201e04c3fSmrg 13301e04c3fSmrg /* We can't mix HW and non-HW queries. */ 13401e04c3fSmrg if (nhwqueries && nhwqueries != num_queries) 13501e04c3fSmrg goto err_free_query; 13601e04c3fSmrg 13701e04c3fSmrg if (!nhwqueries) 13801e04c3fSmrg return (struct pipe_query *)query; 13901e04c3fSmrg 14001e04c3fSmrg hwperfmon = calloc(1, sizeof(*hwperfmon)); 14101e04c3fSmrg if (!hwperfmon) 14201e04c3fSmrg goto err_free_query; 14301e04c3fSmrg 14401e04c3fSmrg for (i = 0; i < num_queries; i++) 14501e04c3fSmrg hwperfmon->events[i] = query_types[i] - 14601e04c3fSmrg PIPE_QUERY_DRIVER_SPECIFIC; 14701e04c3fSmrg 14801e04c3fSmrg query->hwperfmon = hwperfmon; 14901e04c3fSmrg query->num_queries = num_queries; 15001e04c3fSmrg 15101e04c3fSmrg /* Note that struct pipe_query isn't actually defined anywhere. */ 15201e04c3fSmrg return (struct pipe_query *)query; 15301e04c3fSmrg 15401e04c3fSmrgerr_free_query: 15501e04c3fSmrg free(query); 15601e04c3fSmrg 15701e04c3fSmrg return NULL; 15801e04c3fSmrg} 15901e04c3fSmrg 16001e04c3fSmrgstatic struct pipe_query * 16101e04c3fSmrgvc4_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) 16201e04c3fSmrg{ 16301e04c3fSmrg return vc4_create_batch_query(ctx, 1, &query_type); 16401e04c3fSmrg} 16501e04c3fSmrg 16601e04c3fSmrgstatic void 16701e04c3fSmrgvc4_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery) 16801e04c3fSmrg{ 16901e04c3fSmrg struct vc4_context *ctx = vc4_context(pctx); 17001e04c3fSmrg struct vc4_query *query = (struct vc4_query *)pquery; 17101e04c3fSmrg 17201e04c3fSmrg if (query->hwperfmon && query->hwperfmon->id) { 17301e04c3fSmrg if (query->hwperfmon->id) { 17401e04c3fSmrg struct drm_vc4_perfmon_destroy req = { }; 17501e04c3fSmrg 17601e04c3fSmrg req.id = query->hwperfmon->id; 17701e04c3fSmrg vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, 17801e04c3fSmrg &req); 17901e04c3fSmrg } 18001e04c3fSmrg 18101e04c3fSmrg free(query->hwperfmon); 18201e04c3fSmrg } 18301e04c3fSmrg 18401e04c3fSmrg free(query); 18501e04c3fSmrg} 18601e04c3fSmrg 1877ec681f3Smrgstatic bool 18801e04c3fSmrgvc4_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) 18901e04c3fSmrg{ 19001e04c3fSmrg struct vc4_query *query = (struct vc4_query *)pquery; 19101e04c3fSmrg struct vc4_context *ctx = vc4_context(pctx); 19201e04c3fSmrg struct drm_vc4_perfmon_create req = { }; 19301e04c3fSmrg unsigned i; 19401e04c3fSmrg int ret; 19501e04c3fSmrg 19601e04c3fSmrg if (!query->hwperfmon) 19701e04c3fSmrg return true; 19801e04c3fSmrg 19901e04c3fSmrg /* Only one perfmon can be activated per context. */ 20001e04c3fSmrg if (ctx->perfmon) 20101e04c3fSmrg return false; 20201e04c3fSmrg 20301e04c3fSmrg /* Reset the counters by destroying the previously allocated perfmon */ 20401e04c3fSmrg if (query->hwperfmon->id) { 20501e04c3fSmrg struct drm_vc4_perfmon_destroy destroyreq = { }; 20601e04c3fSmrg 20701e04c3fSmrg destroyreq.id = query->hwperfmon->id; 20801e04c3fSmrg vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_DESTROY, &destroyreq); 20901e04c3fSmrg } 21001e04c3fSmrg 21101e04c3fSmrg for (i = 0; i < query->num_queries; i++) 21201e04c3fSmrg req.events[i] = query->hwperfmon->events[i]; 21301e04c3fSmrg 21401e04c3fSmrg req.ncounters = query->num_queries; 21501e04c3fSmrg ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_CREATE, &req); 21601e04c3fSmrg if (ret) 21701e04c3fSmrg return false; 21801e04c3fSmrg 21901e04c3fSmrg query->hwperfmon->id = req.id; 22001e04c3fSmrg 22101e04c3fSmrg /* Make sure all pendings jobs are flushed before activating the 22201e04c3fSmrg * perfmon. 22301e04c3fSmrg */ 22401e04c3fSmrg vc4_flush(pctx); 22501e04c3fSmrg ctx->perfmon = query->hwperfmon; 22601e04c3fSmrg return true; 22701e04c3fSmrg} 22801e04c3fSmrg 22901e04c3fSmrgstatic bool 23001e04c3fSmrgvc4_end_query(struct pipe_context *pctx, struct pipe_query *pquery) 23101e04c3fSmrg{ 23201e04c3fSmrg struct vc4_query *query = (struct vc4_query *)pquery; 23301e04c3fSmrg struct vc4_context *ctx = vc4_context(pctx); 23401e04c3fSmrg 23501e04c3fSmrg if (!query->hwperfmon) 23601e04c3fSmrg return true; 23701e04c3fSmrg 23801e04c3fSmrg if (ctx->perfmon != query->hwperfmon) 23901e04c3fSmrg return false; 24001e04c3fSmrg 24101e04c3fSmrg /* Make sure all pendings jobs are flushed before deactivating the 24201e04c3fSmrg * perfmon. 24301e04c3fSmrg */ 24401e04c3fSmrg vc4_flush(pctx); 24501e04c3fSmrg ctx->perfmon = NULL; 24601e04c3fSmrg return true; 24701e04c3fSmrg} 24801e04c3fSmrg 2497ec681f3Smrgstatic bool 25001e04c3fSmrgvc4_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, 2517ec681f3Smrg bool wait, union pipe_query_result *vresult) 25201e04c3fSmrg{ 25301e04c3fSmrg struct vc4_context *ctx = vc4_context(pctx); 25401e04c3fSmrg struct vc4_query *query = (struct vc4_query *)pquery; 25501e04c3fSmrg struct drm_vc4_perfmon_get_values req; 25601e04c3fSmrg unsigned i; 25701e04c3fSmrg int ret; 25801e04c3fSmrg 25901e04c3fSmrg if (!query->hwperfmon) { 26001e04c3fSmrg vresult->u64 = 0; 26101e04c3fSmrg return true; 26201e04c3fSmrg } 26301e04c3fSmrg 26401e04c3fSmrg if (!vc4_wait_seqno(ctx->screen, query->hwperfmon->last_seqno, 26501e04c3fSmrg wait ? PIPE_TIMEOUT_INFINITE : 0, "perfmon")) 26601e04c3fSmrg return false; 26701e04c3fSmrg 26801e04c3fSmrg req.id = query->hwperfmon->id; 26901e04c3fSmrg req.values_ptr = (uintptr_t)query->hwperfmon->counters; 27001e04c3fSmrg ret = vc4_ioctl(ctx->fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &req); 27101e04c3fSmrg if (ret) 27201e04c3fSmrg return false; 27301e04c3fSmrg 27401e04c3fSmrg for (i = 0; i < query->num_queries; i++) 27501e04c3fSmrg vresult->batch[i].u64 = query->hwperfmon->counters[i]; 27601e04c3fSmrg 27701e04c3fSmrg return true; 27801e04c3fSmrg} 27901e04c3fSmrg 28001e04c3fSmrgstatic void 2817ec681f3Smrgvc4_set_active_query_state(struct pipe_context *pctx, bool enable) 28201e04c3fSmrg{ 28301e04c3fSmrg} 28401e04c3fSmrg 28501e04c3fSmrgvoid 28601e04c3fSmrgvc4_query_init(struct pipe_context *pctx) 28701e04c3fSmrg{ 28801e04c3fSmrg pctx->create_query = vc4_create_query; 28901e04c3fSmrg pctx->create_batch_query = vc4_create_batch_query; 29001e04c3fSmrg pctx->destroy_query = vc4_destroy_query; 29101e04c3fSmrg pctx->begin_query = vc4_begin_query; 29201e04c3fSmrg pctx->end_query = vc4_end_query; 29301e04c3fSmrg pctx->get_query_result = vc4_get_query_result; 29401e04c3fSmrg pctx->set_active_query_state = vc4_set_active_query_state; 29501e04c3fSmrg} 296