1/* 2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark <robclark@freedesktop.org> 26 */ 27 28/* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */ 29 30#include "freedreno_query_acc.h" 31#include "freedreno_resource.h" 32 33#include "fd6_context.h" 34#include "fd6_emit.h" 35#include "fd6_format.h" 36#include "fd6_query.h" 37 38struct PACKED fd6_query_sample { 39 uint64_t start; 40 uint64_t result; 41 uint64_t stop; 42}; 43 44/* offset of a single field of an array of fd6_query_sample: */ 45#define query_sample_idx(aq, idx, field) \ 46 fd_resource((aq)->prsc)->bo, \ 47 (idx * sizeof(struct fd6_query_sample)) + \ 48 offsetof(struct fd6_query_sample, field), \ 49 0, 0 50 51/* offset of a single field of fd6_query_sample: */ 52#define query_sample(aq, field) \ 53 query_sample_idx(aq, 0, field) 54 55/* 56 * Occlusion Query: 57 * 58 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they 59 * interpret results 60 */ 61 62static void 63occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) 64{ 65 struct fd_ringbuffer *ring = batch->draw; 66 67 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1); 68 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY); 69 70 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2); 71 OUT_RELOCW(ring, query_sample(aq, start)); 72 73 fd6_event_write(batch, ring, ZPASS_DONE, false); 74 75 fd6_context(batch->ctx)->samples_passed_queries++; 76} 77 78static void 79occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) 80{ 81 struct fd_ringbuffer *ring = batch->draw; 82 83 OUT_PKT7(ring, CP_MEM_WRITE, 4); 84 OUT_RELOCW(ring, query_sample(aq, stop)); 85 OUT_RING(ring, 0xffffffff); 86 OUT_RING(ring, 0xffffffff); 87 88 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); 89 90 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1); 91 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY); 92 93 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2); 94 OUT_RELOCW(ring, query_sample(aq, stop)); 95 96 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 97 OUT_RING(ring, ZPASS_DONE); 98 fd_reset_wfi(batch); 99 100 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); 101 OUT_RING(ring, 0x00000014); // XXX 102 OUT_RELOC(ring, query_sample(aq, stop)); 103 OUT_RING(ring, 0xffffffff); 104 OUT_RING(ring, 0xffffffff); 105 OUT_RING(ring, 0x00000010); // XXX 106 107 /* result += stop - start: */ 108 OUT_PKT7(ring, CP_MEM_TO_MEM, 9); 109 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | 110 CP_MEM_TO_MEM_0_NEG_C); 111 OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ 112 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ 113 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ 114 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ 115 116 fd6_context(batch->ctx)->samples_passed_queries--; 117} 118 119static void 120occlusion_counter_result(struct fd_acc_query *aq, void *buf, 121 union pipe_query_result *result) 122{ 123 struct fd6_query_sample *sp = buf; 124 result->u64 = sp->result; 125} 126 127static void 128occlusion_predicate_result(struct fd_acc_query *aq, void *buf, 129 union pipe_query_result *result) 130{ 131 struct fd6_query_sample *sp = buf; 132 result->b = !!sp->result; 133} 134 135static const struct fd_acc_sample_provider occlusion_counter = { 136 .query_type = PIPE_QUERY_OCCLUSION_COUNTER, 137 .active = FD_STAGE_DRAW, 138 .size = sizeof(struct fd6_query_sample), 139 .resume = occlusion_resume, 140 .pause = occlusion_pause, 141 .result = occlusion_counter_result, 142}; 143 144static const struct fd_acc_sample_provider occlusion_predicate = { 145 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE, 146 .active = FD_STAGE_DRAW, 147 .size = sizeof(struct fd6_query_sample), 148 .resume = occlusion_resume, 149 .pause = occlusion_pause, 150 .result = occlusion_predicate_result, 151}; 152 153static const struct fd_acc_sample_provider occlusion_predicate_conservative = { 154 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE, 155 .active = FD_STAGE_DRAW, 156 .size = sizeof(struct fd6_query_sample), 157 .resume = occlusion_resume, 158 .pause = occlusion_pause, 159 .result = occlusion_predicate_result, 160}; 161 162/* 163 * Timestamp Queries: 164 */ 165 166static void 167timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) 168{ 169 struct fd_ringbuffer *ring = batch->draw; 170 171 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 172 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | 173 CP_EVENT_WRITE_0_TIMESTAMP); 174 OUT_RELOCW(ring, query_sample(aq, start)); 175 OUT_RING(ring, 0x00000000); 176 177 fd_reset_wfi(batch); 178} 179 180static void 181timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) 182{ 183 struct fd_ringbuffer *ring = batch->draw; 184 185 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 186 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | 187 CP_EVENT_WRITE_0_TIMESTAMP); 188 OUT_RELOCW(ring, query_sample(aq, stop)); 189 OUT_RING(ring, 0x00000000); 190 191 fd_reset_wfi(batch); 192 fd_wfi(batch, ring); 193 194 /* result += stop - start: */ 195 OUT_PKT7(ring, CP_MEM_TO_MEM, 9); 196 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | 197 CP_MEM_TO_MEM_0_NEG_C); 198 OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ 199 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ 200 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ 201 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ 202} 203 204static uint64_t 205ticks_to_ns(uint32_t ts) 206{ 207 /* This is based on the 19.2MHz always-on rbbm timer. 208 * 209 * TODO we should probably query this value from kernel.. 210 */ 211 return ts * (1000000000 / 19200000); 212} 213 214static void 215time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf, 216 union pipe_query_result *result) 217{ 218 struct fd6_query_sample *sp = buf; 219 result->u64 = ticks_to_ns(sp->result); 220} 221 222static void 223timestamp_accumulate_result(struct fd_acc_query *aq, void *buf, 224 union pipe_query_result *result) 225{ 226 struct fd6_query_sample *sp = buf; 227 result->u64 = ticks_to_ns(sp->result); 228} 229 230static const struct fd_acc_sample_provider time_elapsed = { 231 .query_type = PIPE_QUERY_TIME_ELAPSED, 232 .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, 233 .size = sizeof(struct fd6_query_sample), 234 .resume = timestamp_resume, 235 .pause = timestamp_pause, 236 .result = time_elapsed_accumulate_result, 237}; 238 239/* NOTE: timestamp query isn't going to give terribly sensible results 240 * on a tiler. But it is needed by qapitrace profile heatmap. If you 241 * add in a binning pass, the results get even more non-sensical. So 242 * we just return the timestamp on the first tile and hope that is 243 * kind of good enough. 244 */ 245 246static const struct fd_acc_sample_provider timestamp = { 247 .query_type = PIPE_QUERY_TIMESTAMP, 248 .active = FD_STAGE_ALL, 249 .size = sizeof(struct fd6_query_sample), 250 .resume = timestamp_resume, 251 .pause = timestamp_pause, 252 .result = timestamp_accumulate_result, 253}; 254 255/* 256 * Performance Counter (batch) queries: 257 * 258 * Only one of these is active at a time, per design of the gallium 259 * batch_query API design. On perfcntr query tracks N query_types, 260 * each of which has a 'fd_batch_query_entry' that maps it back to 261 * the associated group and counter. 262 */ 263 264struct fd_batch_query_entry { 265 uint8_t gid; /* group-id */ 266 uint8_t cid; /* countable-id within the group */ 267}; 268 269struct fd_batch_query_data { 270 struct fd_screen *screen; 271 unsigned num_query_entries; 272 struct fd_batch_query_entry query_entries[]; 273}; 274 275static void 276perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) 277{ 278 struct fd_batch_query_data *data = aq->query_data; 279 struct fd_screen *screen = data->screen; 280 struct fd_ringbuffer *ring = batch->draw; 281 282 unsigned counters_per_group[screen->num_perfcntr_groups]; 283 memset(counters_per_group, 0, sizeof(counters_per_group)); 284 285 fd_wfi(batch, ring); 286 287 /* configure performance counters for the requested queries: */ 288 for (unsigned i = 0; i < data->num_query_entries; i++) { 289 struct fd_batch_query_entry *entry = &data->query_entries[i]; 290 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 291 unsigned counter_idx = counters_per_group[entry->gid]++; 292 293 debug_assert(counter_idx < g->num_counters); 294 295 OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1); 296 OUT_RING(ring, g->countables[entry->cid].selector); 297 } 298 299 memset(counters_per_group, 0, sizeof(counters_per_group)); 300 301 /* and snapshot the start values */ 302 for (unsigned i = 0; i < data->num_query_entries; i++) { 303 struct fd_batch_query_entry *entry = &data->query_entries[i]; 304 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 305 unsigned counter_idx = counters_per_group[entry->gid]++; 306 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; 307 308 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 309 OUT_RING(ring, CP_REG_TO_MEM_0_64B | 310 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); 311 OUT_RELOCW(ring, query_sample_idx(aq, i, start)); 312 } 313} 314 315static void 316perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) 317{ 318 struct fd_batch_query_data *data = aq->query_data; 319 struct fd_screen *screen = data->screen; 320 struct fd_ringbuffer *ring = batch->draw; 321 322 unsigned counters_per_group[screen->num_perfcntr_groups]; 323 memset(counters_per_group, 0, sizeof(counters_per_group)); 324 325 fd_wfi(batch, ring); 326 327 /* TODO do we need to bother to turn anything off? */ 328 329 /* snapshot the end values: */ 330 for (unsigned i = 0; i < data->num_query_entries; i++) { 331 struct fd_batch_query_entry *entry = &data->query_entries[i]; 332 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; 333 unsigned counter_idx = counters_per_group[entry->gid]++; 334 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; 335 336 OUT_PKT7(ring, CP_REG_TO_MEM, 3); 337 OUT_RING(ring, CP_REG_TO_MEM_0_64B | 338 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); 339 OUT_RELOCW(ring, query_sample_idx(aq, i, stop)); 340 } 341 342 /* and compute the result: */ 343 for (unsigned i = 0; i < data->num_query_entries; i++) { 344 /* result += stop - start: */ 345 OUT_PKT7(ring, CP_MEM_TO_MEM, 9); 346 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | 347 CP_MEM_TO_MEM_0_NEG_C); 348 OUT_RELOCW(ring, query_sample_idx(aq, i, result)); /* dst */ 349 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */ 350 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */ 351 OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */ 352 } 353} 354 355static void 356perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, 357 union pipe_query_result *result) 358{ 359 struct fd_batch_query_data *data = aq->query_data; 360 struct fd6_query_sample *sp = buf; 361 362 for (unsigned i = 0; i < data->num_query_entries; i++) { 363 result->batch[i].u64 = sp[i].result; 364 } 365} 366 367static const struct fd_acc_sample_provider perfcntr = { 368 .query_type = FD_QUERY_FIRST_PERFCNTR, 369 .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, 370 .resume = perfcntr_resume, 371 .pause = perfcntr_pause, 372 .result = perfcntr_accumulate_result, 373}; 374 375static struct pipe_query * 376fd6_create_batch_query(struct pipe_context *pctx, 377 unsigned num_queries, unsigned *query_types) 378{ 379 struct fd_context *ctx = fd_context(pctx); 380 struct fd_screen *screen = ctx->screen; 381 struct fd_query *q; 382 struct fd_acc_query *aq; 383 struct fd_batch_query_data *data; 384 385 data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data, 386 num_queries * sizeof(data->query_entries[0])); 387 388 data->screen = screen; 389 data->num_query_entries = num_queries; 390 391 /* validate the requested query_types and ensure we don't try 392 * to request more query_types of a given group than we have 393 * counters: 394 */ 395 unsigned counters_per_group[screen->num_perfcntr_groups]; 396 memset(counters_per_group, 0, sizeof(counters_per_group)); 397 398 for (unsigned i = 0; i < num_queries; i++) { 399 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; 400 401 /* verify valid query_type, ie. is it actually a perfcntr? */ 402 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || 403 (idx >= screen->num_perfcntr_queries)) { 404 debug_printf("invalid batch query query_type: %u\n", query_types[i]); 405 goto error; 406 } 407 408 struct fd_batch_query_entry *entry = &data->query_entries[i]; 409 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; 410 411 entry->gid = pq->group_id; 412 413 /* the perfcntr_queries[] table flattens all the countables 414 * for each group in series, ie: 415 * 416 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... 417 * 418 * So to find the countable index just step back through the 419 * table to find the first entry with the same group-id. 420 */ 421 while (pq > screen->perfcntr_queries) { 422 pq--; 423 if (pq->group_id == entry->gid) 424 entry->cid++; 425 } 426 427 if (counters_per_group[entry->gid] >= 428 screen->perfcntr_groups[entry->gid].num_counters) { 429 debug_printf("too many counters for group %u\n", entry->gid); 430 goto error; 431 } 432 433 counters_per_group[entry->gid]++; 434 } 435 436 q = fd_acc_create_query2(ctx, 0, &perfcntr); 437 aq = fd_acc_query(q); 438 439 /* sample buffer size is based on # of queries: */ 440 aq->size = num_queries * sizeof(struct fd6_query_sample); 441 aq->query_data = data; 442 443 return (struct pipe_query *)q; 444 445error: 446 free(data); 447 return NULL; 448} 449 450void 451fd6_query_context_init(struct pipe_context *pctx) 452{ 453 struct fd_context *ctx = fd_context(pctx); 454 455 ctx->create_query = fd_acc_create_query; 456 ctx->query_set_stage = fd_acc_query_set_stage; 457 458 pctx->create_batch_query = fd6_create_batch_query; 459 460 fd_acc_query_register_provider(pctx, &occlusion_counter); 461 fd_acc_query_register_provider(pctx, &occlusion_predicate); 462 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative); 463 464 fd_acc_query_register_provider(pctx, &time_elapsed); 465 fd_acc_query_register_provider(pctx, ×tamp); 466} 467