1/* 2 * Copyright © 2021 Raspberry Pi 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * Gallium query object support for performance counters 26 * 27 * This contains the performance V3D counters queries. 28 */ 29 30#include "v3d_query.h" 31 32struct v3d_query_perfcnt 33{ 34 struct v3d_query base; 35 36 unsigned num_queries; 37 struct v3d_perfmon_state *perfmon; 38}; 39 40static const char *v3d_counter_names[] = { 41 "FEP-valid-primitives-no-rendered-pixels", 42 "FEP-valid-primitives-rendered-pixels", 43 "FEP-clipped-quads", 44 "FEP-valid-quads", 45 "TLB-quads-not-passing-stencil-test", 46 "TLB-quads-not-passing-z-and-stencil-test", 47 "TLB-quads-passing-z-and-stencil-test", 48 "TLB-quads-with-zero-coverage", 49 "TLB-quads-with-non-zero-coverage", 50 "TLB-quads-written-to-color-buffer", 51 "PTB-primitives-discarded-outside-viewport", 52 "PTB-primitives-need-clipping", 53 "PTB-primitives-discared-reversed", 54 "QPU-total-idle-clk-cycles", 55 "QPU-total-active-clk-cycles-vertex-coord-shading", 56 "QPU-total-active-clk-cycles-fragment-shading", 57 "QPU-total-clk-cycles-executing-valid-instr", 58 "QPU-total-clk-cycles-waiting-TMU", 59 "QPU-total-clk-cycles-waiting-scoreboard", 60 "QPU-total-clk-cycles-waiting-varyings", 61 "QPU-total-instr-cache-hit", 62 "QPU-total-instr-cache-miss", 63 "QPU-total-uniform-cache-hit", 64 "QPU-total-uniform-cache-miss", 65 "TMU-total-text-quads-access", 66 "TMU-total-text-cache-miss", 67 "VPM-total-clk-cycles-VDW-stalled", 68 "VPM-total-clk-cycles-VCD-stalled", 69 "CLE-bin-thread-active-cycles", 70 "CLE-render-thread-active-cycles", 71 "L2T-total-cache-hit", 72 "L2T-total-cache-miss", 73 "cycle-count", 74 "QPU-total-clk-cycles-waiting-vertex-coord-shading", 75 "QPU-total-clk-cycles-waiting-fragment-shading", 76 "PTB-primitives-binned", 77 "AXI-writes-seen-watch-0", 78 "AXI-reads-seen-watch-0", 79 "AXI-writes-stalled-seen-watch-0", 80 "AXI-reads-stalled-seen-watch-0", 81 "AXI-write-bytes-seen-watch-0", 82 "AXI-read-bytes-seen-watch-0", 83 "AXI-writes-seen-watch-1", 84 "AXI-reads-seen-watch-1", 85 "AXI-writes-stalled-seen-watch-1", 86 "AXI-reads-stalled-seen-watch-1", 87 "AXI-write-bytes-seen-watch-1", 88 "AXI-read-bytes-seen-watch-1", 89 "TLB-partial-quads-written-to-color-buffer", 90 "TMU-total-config-access", 91 "L2T-no-id-stalled", 92 "L2T-command-queue-stalled", 93 "L2T-TMU-writes", 94 "TMU-active-cycles", 95 "TMU-stalled-cycles", 96 "CLE-thread-active-cycles", 97 "L2T-TMU-reads", 98 "L2T-CLE-reads", 99 "L2T-VCD-reads", 100 "L2T-TMU-config-reads", 101 "L2T-SLC0-reads", 102 "L2T-SLC1-reads", 103 "L2T-SLC2-reads", 104 "L2T-TMU-write-miss", 105 "L2T-TMU-read-miss", 106 "L2T-CLE-read-miss", 107 "L2T-VCD-read-miss", 108 "L2T-TMU-config-read-miss", 109 "L2T-SLC0-read-miss", 110 "L2T-SLC1-read-miss", 111 "L2T-SLC2-read-miss", 112 "core-memory-writes", 113 "L2T-memory-writes", 114 "PTB-memory-writes", 115 "TLB-memory-writes", 116 "core-memory-reads", 117 "L2T-memory-reads", 118 "PTB-memory-reads", 119 "PSE-memory-reads", 120 "TLB-memory-reads", 121 "GMP-memory-reads", 122 "PTB-memory-words-writes", 123 "TLB-memory-words-writes", 124 "PSE-memory-words-reads", 125 "TLB-memory-words-reads", 126 "TMU-MRU-hits", 127 "compute-active-cycles", 128}; 129 130static void 131kperfmon_destroy(struct v3d_context *v3d, struct v3d_perfmon_state *perfmon) 132{ 133 struct drm_v3d_perfmon_destroy destroyreq; 134 135 destroyreq.id = perfmon->kperfmon_id; 136 int ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_DESTROY, &destroyreq); 137 if (ret != 0) 138 fprintf(stderr, "failed to destroy perfmon %d: %s\n", 139 perfmon->kperfmon_id, strerror(errno)); 140} 141 142int 143v3d_get_driver_query_group_info_perfcnt(struct v3d_screen *screen, unsigned index, 144 struct pipe_driver_query_group_info *info) 145{ 146 if (!screen->has_perfmon) 147 return 0; 148 149 if (!info) 150 return 1; 151 152 if (index > 0) 153 return 0; 154 155 info->name = "V3D counters"; 156 info->max_active_queries = DRM_V3D_MAX_PERF_COUNTERS; 157 info->num_queries = ARRAY_SIZE(v3d_counter_names); 158 159 return 1; 160} 161 162int 163v3d_get_driver_query_info_perfcnt(struct v3d_screen *screen, unsigned index, 164 struct pipe_driver_query_info *info) 165{ 166 if (!screen->has_perfmon) 167 return 0; 168 169 if (!info) 170 return ARRAY_SIZE(v3d_counter_names); 171 172 if (index >= ARRAY_SIZE(v3d_counter_names)) 173 return 0; 174 175 info->group_id = 0; 176 info->name = v3d_counter_names[index]; 177 info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index; 178 info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE; 179 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; 180 info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; 181 182 return 1; 183} 184 185static void 186v3d_destroy_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query) 187{ 188 struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; 189 190 assert(pquery->perfmon); 191 192 if (v3d->active_perfmon == pquery->perfmon) { 193 fprintf(stderr, "Query is active; end query before destroying\n"); 194 return; 195 } 196 if (pquery->perfmon->kperfmon_id) 197 kperfmon_destroy(v3d, pquery->perfmon); 198 199 v3d_fence_unreference(&pquery->perfmon->last_job_fence); 200 free(pquery->perfmon); 201 free(query); 202} 203 204static bool 205v3d_begin_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query) 206{ 207 struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; 208 struct drm_v3d_perfmon_create createreq = { 0 }; 209 int i, ret; 210 211 /* Only one perfmon can be activated per context */ 212 if (v3d->active_perfmon) { 213 fprintf(stderr, 214 "Another query is already active; " 215 "finish it before starting a new one\n"); 216 return false; 217 } 218 219 assert(pquery->perfmon); 220 221 /* Reset the counters by destroying the previously allocated perfmon */ 222 if (pquery->perfmon->kperfmon_id) 223 kperfmon_destroy(v3d, pquery->perfmon); 224 225 for (i = 0; i < pquery->num_queries; i++) 226 createreq.counters[i] = pquery->perfmon->counters[i]; 227 228 createreq.ncounters = pquery->num_queries; 229 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_CREATE, &createreq); 230 if (ret != 0) 231 return false; 232 233 pquery->perfmon->kperfmon_id = createreq.id; 234 pquery->perfmon->job_submitted = false; 235 v3d_fence_unreference(&pquery->perfmon->last_job_fence); 236 237 /* Ensure all pending jobs are flushed before activating the 238 * perfmon 239 */ 240 v3d_flush((struct pipe_context *)v3d); 241 v3d->active_perfmon = pquery->perfmon; 242 243 return true; 244} 245 246static bool 247v3d_end_query_perfcnt(struct v3d_context *v3d, struct v3d_query *query) 248{ 249 struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; 250 251 assert(pquery->perfmon); 252 253 if (v3d->active_perfmon != pquery->perfmon) { 254 fprintf(stderr, "This query is not active\n"); 255 return false; 256 } 257 258 /* Ensure all pending jobs are flushed before deactivating the 259 * perfmon 260 */ 261 v3d_flush((struct pipe_context *)v3d); 262 263 /* Get a copy of latest submitted job's fence to wait for its 264 * completion 265 */ 266 if (v3d->active_perfmon->job_submitted) 267 v3d->active_perfmon->last_job_fence = v3d_fence_create(v3d); 268 269 v3d->active_perfmon = NULL; 270 271 return true; 272} 273 274static bool 275v3d_get_query_result_perfcnt(struct v3d_context *v3d, struct v3d_query *query, 276 bool wait, union pipe_query_result *vresult) 277{ 278 struct v3d_query_perfcnt *pquery = (struct v3d_query_perfcnt *)query; 279 struct drm_v3d_perfmon_get_values req = { 0 }; 280 int i, ret; 281 282 assert(pquery->perfmon); 283 284 if (pquery->perfmon->job_submitted) { 285 if (!v3d_fence_wait(v3d->screen, 286 pquery->perfmon->last_job_fence, 287 wait ? PIPE_TIMEOUT_INFINITE : 0)) 288 return false; 289 290 req.id = pquery->perfmon->kperfmon_id; 291 req.values_ptr = (uintptr_t)pquery->perfmon->values; 292 ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_PERFMON_GET_VALUES, &req); 293 if (ret != 0) { 294 fprintf(stderr, "Can't request perfmon counters values\n"); 295 return false; 296 } 297 } 298 299 for (i = 0; i < pquery->num_queries; i++) 300 vresult->batch[i].u64 = pquery->perfmon->values[i]; 301 302 return true; 303} 304 305static const struct v3d_query_funcs perfcnt_query_funcs = { 306 .destroy_query = v3d_destroy_query_perfcnt, 307 .begin_query = v3d_begin_query_perfcnt, 308 .end_query = v3d_end_query_perfcnt, 309 .get_query_result = v3d_get_query_result_perfcnt, 310}; 311 312struct pipe_query * 313v3d_create_batch_query_perfcnt(struct v3d_context *v3d, unsigned num_queries, 314 unsigned *query_types) 315{ 316 struct v3d_query_perfcnt *pquery = NULL; 317 struct v3d_query *query; 318 struct v3d_perfmon_state *perfmon = NULL; 319 int i; 320 321 /* Validate queries */ 322 for (i = 0; i < num_queries; i++) { 323 if (query_types[i] < PIPE_QUERY_DRIVER_SPECIFIC || 324 query_types[i] >= PIPE_QUERY_DRIVER_SPECIFIC + 325 ARRAY_SIZE(v3d_counter_names)) { 326 fprintf(stderr, "Invalid query type\n"); 327 return NULL; 328 } 329 } 330 331 pquery = calloc(1, sizeof(*pquery)); 332 if (!pquery) 333 return NULL; 334 335 perfmon = calloc(1, sizeof(*perfmon)); 336 if (!perfmon) { 337 free(pquery); 338 return NULL; 339 } 340 341 for (i = 0; i < num_queries; i++) 342 perfmon->counters[i] = query_types[i] - PIPE_QUERY_DRIVER_SPECIFIC; 343 344 pquery->perfmon = perfmon; 345 pquery->num_queries = num_queries; 346 347 query = &pquery->base; 348 query->funcs = &perfcnt_query_funcs; 349 350 /* Note that struct pipe_query isn't actually defined anywhere. */ 351 return (struct pipe_query *)query; 352} 353