1#include "zink_query.h" 2 3#include "zink_context.h" 4#include "zink_fence.h" 5#include "zink_resource.h" 6#include "zink_screen.h" 7 8#include "util/hash_table.h" 9#include "util/set.h" 10#include "util/u_dump.h" 11#include "util/u_inlines.h" 12#include "util/u_memory.h" 13 14#if defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_PPC_64) || defined(PIPE_ARCH_AARCH64) || defined(PIPE_ARCH_MIPS64) 15#define NUM_QUERIES 5000 16#else 17#define NUM_QUERIES 500 18#endif 19 20struct zink_query_buffer { 21 struct list_head list; 22 unsigned num_results; 23 struct pipe_resource *buffer; 24 struct pipe_resource *xfb_buffers[PIPE_MAX_VERTEX_STREAMS - 1]; 25}; 26 27struct zink_query { 28 struct threaded_query base; 29 enum pipe_query_type type; 30 31 VkQueryPool query_pool; 32 VkQueryPool xfb_query_pool[PIPE_MAX_VERTEX_STREAMS - 1]; //stream 0 is in the base pool 33 unsigned curr_query, last_start; 34 35 VkQueryType vkqtype; 36 unsigned index; 37 bool precise; 38 bool xfb_running; 39 bool xfb_overflow; 40 41 bool active; /* query is considered active by vk */ 42 bool needs_reset; /* query is considered active by vk and cannot be destroyed */ 43 bool dead; /* query should be destroyed when its fence finishes */ 44 bool needs_update; /* query needs to update its qbos */ 45 46 struct list_head active_list; 47 48 struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */ 49 bool have_gs[NUM_QUERIES]; /* geometry shaders use GEOMETRY_SHADER_PRIMITIVES_BIT */ 50 bool have_xfb[NUM_QUERIES]; /* xfb was active during this query */ 51 52 struct zink_batch_usage *batch_id; //batch that the query was started in 53 54 struct list_head buffers; 55 union { 56 struct zink_query_buffer *curr_qbo; 57 struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED 58 }; 59 60 struct zink_resource *predicate; 61 bool predicate_dirty; 62}; 63 64static void 65update_qbo(struct zink_context *ctx, struct zink_query *q); 66static void 67reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q); 68 69static inline unsigned 70get_num_results(enum pipe_query_type query_type) 71{ 72 switch (query_type) { 73 case PIPE_QUERY_OCCLUSION_COUNTER: 74 case PIPE_QUERY_OCCLUSION_PREDICATE: 75 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 76 case PIPE_QUERY_TIME_ELAPSED: 77 case PIPE_QUERY_TIMESTAMP: 78 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: 79 return 1; 80 case PIPE_QUERY_PRIMITIVES_GENERATED: 81 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 82 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 83 case PIPE_QUERY_PRIMITIVES_EMITTED: 84 return 2; 85 default: 86 debug_printf("unknown query: %s\n", 87 util_str_query_type(query_type, true)); 88 unreachable("zink: unknown query type"); 89 } 90} 91 92static VkQueryPipelineStatisticFlags 93pipeline_statistic_convert(enum pipe_statistics_query_index idx) 94{ 95 unsigned map[] = { 96 [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT, 97 [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT, 98 [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT, 99 [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT, 100 [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT, 101 [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT, 102 [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT, 103 [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT, 104 [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT, 105 [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT, 106 [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT 107 }; 108 assert(idx < ARRAY_SIZE(map)); 109 return map[idx]; 110} 111 112static void 113timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp) 114{ 115 /* The number of valid bits in a timestamp value is determined by 116 * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written. 117 * - 17.5. Timestamp Queries 118 */ 119 if (screen->timestamp_valid_bits < 64) 120 *timestamp &= (1ull << screen->timestamp_valid_bits) - 1; 121 122 /* The number of nanoseconds it takes for a timestamp value to be incremented by 1 123 * can be obtained from VkPhysicalDeviceLimits::timestampPeriod 124 * - 17.5. Timestamp Queries 125 */ 126 *timestamp *= screen->info.props.limits.timestampPeriod; 127} 128 129static VkQueryType 130convert_query_type(unsigned query_type, bool *precise) 131{ 132 *precise = false; 133 switch (query_type) { 134 case PIPE_QUERY_OCCLUSION_COUNTER: 135 *precise = true; 136 FALLTHROUGH; 137 case PIPE_QUERY_OCCLUSION_PREDICATE: 138 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 139 return VK_QUERY_TYPE_OCCLUSION; 140 case PIPE_QUERY_TIME_ELAPSED: 141 case PIPE_QUERY_TIMESTAMP: 142 return VK_QUERY_TYPE_TIMESTAMP; 143 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: 144 case PIPE_QUERY_PRIMITIVES_GENERATED: 145 return VK_QUERY_TYPE_PIPELINE_STATISTICS; 146 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 147 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 148 case PIPE_QUERY_PRIMITIVES_EMITTED: 149 return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; 150 default: 151 debug_printf("unknown query: %s\n", 152 util_str_query_type(query_type, true)); 153 unreachable("zink: unknown query type"); 154 } 155} 156 157static bool 158needs_stats_list(struct zink_query *query) 159{ 160 return query->type == PIPE_QUERY_PRIMITIVES_GENERATED || 161 query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || 162 query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE; 163} 164 165static bool 166is_time_query(struct zink_query *query) 167{ 168 return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED; 169} 170 171static bool 172is_so_overflow_query(struct zink_query *query) 173{ 174 return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE; 175} 176 177static bool 178is_bool_query(struct zink_query *query) 179{ 180 return is_so_overflow_query(query) || 181 query->type == PIPE_QUERY_OCCLUSION_PREDICATE || 182 query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE || 183 query->type == PIPE_QUERY_GPU_FINISHED; 184} 185 186static bool 187qbo_append(struct pipe_screen *screen, struct zink_query *query) 188{ 189 if (query->curr_qbo && query->curr_qbo->list.next) 190 return true; 191 struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer); 192 if (!qbo) 193 return false; 194 qbo->buffer = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, 195 PIPE_USAGE_STAGING, 196 /* this is the maximum possible size of the results in a given buffer */ 197 NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t)); 198 if (!qbo->buffer) 199 goto fail; 200 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) { 201 /* need separate xfb buffer */ 202 qbo->xfb_buffers[0] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, 203 PIPE_USAGE_STAGING, 204 /* this is the maximum possible size of the results in a given buffer */ 205 NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t)); 206 if (!qbo->xfb_buffers[0]) 207 goto fail; 208 } else if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 209 /* need to monitor all xfb streams */ 210 for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) { 211 /* need separate xfb buffer */ 212 qbo->xfb_buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER, 213 PIPE_USAGE_STAGING, 214 /* this is the maximum possible size of the results in a given buffer */ 215 NUM_QUERIES * get_num_results(query->type) * sizeof(uint64_t)); 216 if (!qbo->xfb_buffers[i]) 217 goto fail; 218 } 219 } 220 list_addtail(&qbo->list, &query->buffers); 221 222 return true; 223fail: 224 pipe_resource_reference(&qbo->buffer, NULL); 225 for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) 226 pipe_resource_reference(&qbo->xfb_buffers[i], NULL); 227 FREE(qbo); 228 return false; 229} 230 231static void 232destroy_query(struct zink_screen *screen, struct zink_query *query) 233{ 234 assert(zink_screen_usage_check_completion(screen, query->batch_id)); 235 if (query->query_pool) 236 VKSCR(DestroyQueryPool)(screen->dev, query->query_pool, NULL); 237 struct zink_query_buffer *qbo, *next; 238 LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) { 239 pipe_resource_reference(&qbo->buffer, NULL); 240 for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers); i++) 241 pipe_resource_reference(&qbo->xfb_buffers[i], NULL); 242 FREE(qbo); 243 } 244 for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) { 245 if (query->xfb_query_pool[i]) 246 VKSCR(DestroyQueryPool)(screen->dev, query->xfb_query_pool[i], NULL); 247 } 248 pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL); 249 FREE(query); 250} 251 252static void 253reset_qbo(struct zink_query *q) 254{ 255 q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list); 256 q->curr_qbo->num_results = 0; 257} 258 259static struct pipe_query * 260zink_create_query(struct pipe_context *pctx, 261 unsigned query_type, unsigned index) 262{ 263 struct zink_screen *screen = zink_screen(pctx->screen); 264 struct zink_query *query = CALLOC_STRUCT(zink_query); 265 VkQueryPoolCreateInfo pool_create = {0}; 266 267 if (!query) 268 return NULL; 269 list_inithead(&query->buffers); 270 271 query->index = index; 272 query->type = query_type; 273 if (query->type == PIPE_QUERY_GPU_FINISHED) 274 return (struct pipe_query *)query; 275 query->vkqtype = convert_query_type(query_type, &query->precise); 276 if (query->vkqtype == -1) 277 return NULL; 278 279 assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION); 280 281 query->curr_query = 0; 282 283 pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; 284 pool_create.queryType = query->vkqtype; 285 pool_create.queryCount = NUM_QUERIES; 286 if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) 287 pool_create.pipelineStatistics = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | 288 VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT; 289 else if (query_type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE) 290 pool_create.pipelineStatistics = pipeline_statistic_convert(index); 291 292 VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->query_pool); 293 if (status != VK_SUCCESS) 294 goto fail; 295 if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) { 296 /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */ 297 pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; 298 pool_create.queryType = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT; 299 pool_create.queryCount = NUM_QUERIES; 300 301 status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[0]); 302 if (status != VK_SUCCESS) 303 goto fail; 304 } else if (query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 305 /* need to monitor all xfb streams */ 306 for (unsigned i = 0; i < ARRAY_SIZE(query->xfb_query_pool); i++) { 307 status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &query->xfb_query_pool[i]); 308 if (status != VK_SUCCESS) 309 goto fail; 310 } 311 } 312 if (!qbo_append(pctx->screen, query)) 313 goto fail; 314 struct zink_batch *batch = &zink_context(pctx)->batch; 315 batch->has_work = true; 316 query->needs_reset = true; 317 if (query->type == PIPE_QUERY_TIMESTAMP) { 318 query->active = true; 319 /* defer pool reset until end_query since we're guaranteed to be threadsafe then */ 320 reset_qbo(query); 321 } 322 return (struct pipe_query *)query; 323fail: 324 destroy_query(screen, query); 325 return NULL; 326} 327 328static void 329zink_destroy_query(struct pipe_context *pctx, 330 struct pipe_query *q) 331{ 332 struct zink_screen *screen = zink_screen(pctx->screen); 333 struct zink_query *query = (struct zink_query *)q; 334 335 /* only destroy if this query isn't active on any batches, 336 * otherwise just mark dead and wait 337 */ 338 if (query->batch_id) { 339 p_atomic_set(&query->dead, true); 340 return; 341 } 342 343 destroy_query(screen, query); 344} 345 346void 347zink_prune_query(struct zink_screen *screen, struct zink_batch_state *bs, struct zink_query *query) 348{ 349 if (!zink_batch_usage_matches(query->batch_id, bs)) 350 return; 351 query->batch_id = NULL; 352 if (p_atomic_read(&query->dead)) 353 destroy_query(screen, query); 354} 355 356static void 357check_query_results(struct zink_query *query, union pipe_query_result *result, 358 int num_results, uint64_t *results, uint64_t *xfb_results) 359{ 360 uint64_t last_val = 0; 361 int result_size = get_num_results(query->type); 362 for (int i = 0; i < num_results * result_size; i += result_size) { 363 switch (query->type) { 364 case PIPE_QUERY_OCCLUSION_PREDICATE: 365 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 366 case PIPE_QUERY_GPU_FINISHED: 367 result->b |= results[i] != 0; 368 break; 369 370 case PIPE_QUERY_TIME_ELAPSED: 371 case PIPE_QUERY_TIMESTAMP: 372 /* the application can sum the differences between all N queries to determine the total execution time. 373 * - 17.5. Timestamp Queries 374 */ 375 if (query->type != PIPE_QUERY_TIME_ELAPSED || i) 376 result->u64 += results[i] - last_val; 377 last_val = results[i]; 378 break; 379 case PIPE_QUERY_OCCLUSION_COUNTER: 380 result->u64 += results[i]; 381 break; 382 case PIPE_QUERY_PRIMITIVES_GENERATED: 383 if (query->have_xfb[query->last_start + i / 2] || query->index) 384 result->u64 += xfb_results[i + 1]; 385 else 386 /* if a given draw had a geometry shader, we need to use the second result */ 387 result->u64 += results[i + query->have_gs[query->last_start + i / 2]]; 388 break; 389 case PIPE_QUERY_PRIMITIVES_EMITTED: 390 /* A query pool created with this type will capture 2 integers - 391 * numPrimitivesWritten and numPrimitivesNeeded - 392 * for the specified vertex stream output from the last vertex processing stage. 393 * - from VK_EXT_transform_feedback spec 394 */ 395 result->u64 += results[i]; 396 break; 397 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 398 case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 399 /* A query pool created with this type will capture 2 integers - 400 * numPrimitivesWritten and numPrimitivesNeeded - 401 * for the specified vertex stream output from the last vertex processing stage. 402 * - from VK_EXT_transform_feedback spec 403 */ 404 if (query->have_xfb[query->last_start + i / 2]) 405 result->b |= results[i] != results[i + 1]; 406 break; 407 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: 408 result->u64 += results[i]; 409 break; 410 411 default: 412 debug_printf("unhandled query type: %s\n", 413 util_str_query_type(query->type, true)); 414 unreachable("unexpected query type"); 415 } 416 } 417} 418 419static bool 420get_query_result(struct pipe_context *pctx, 421 struct pipe_query *q, 422 bool wait, 423 union pipe_query_result *result) 424{ 425 struct zink_screen *screen = zink_screen(pctx->screen); 426 struct zink_query *query = (struct zink_query *)q; 427 unsigned flags = PIPE_MAP_READ; 428 429 if (!wait) 430 flags |= PIPE_MAP_DONTBLOCK; 431 if (query->base.flushed) 432 /* this is not a context-safe operation; ensure map doesn't use slab alloc */ 433 flags |= PIPE_MAP_THREAD_SAFE; 434 435 util_query_clear_result(result, query->type); 436 437 int num_results = query->curr_query - query->last_start; 438 int result_size = get_num_results(query->type) * sizeof(uint64_t); 439 440 struct zink_query_buffer *qbo; 441 struct pipe_transfer *xfer; 442 LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) { 443 uint64_t *xfb_results = NULL; 444 uint64_t *results; 445 bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT; 446 if (!qbo->num_results) 447 continue; 448 results = pipe_buffer_map_range(pctx, qbo->buffer, 0, 449 (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer); 450 if (!results) { 451 if (wait) 452 debug_printf("zink: qbo read failed!"); 453 return false; 454 } 455 struct pipe_transfer *xfb_xfer = NULL; 456 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED) { 457 xfb_results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[0], 0, 458 qbo->num_results * result_size, flags, &xfb_xfer); 459 if (!xfb_results) { 460 if (wait) 461 debug_printf("zink: xfb qbo read failed!"); 462 pipe_buffer_unmap(pctx, xfer); 463 return false; 464 } 465 } 466 check_query_results(query, result, is_timestamp ? 1 : qbo->num_results, results, xfb_results); 467 pipe_buffer_unmap(pctx, xfer); 468 if (xfb_xfer) 469 pipe_buffer_unmap(pctx, xfb_xfer); 470 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 471 for (unsigned i = 0; i < ARRAY_SIZE(qbo->xfb_buffers) && !result->b; i++) { 472 uint64_t *results = pipe_buffer_map_range(pctx, qbo->xfb_buffers[i], 473 0, 474 qbo->num_results * result_size, flags, &xfer); 475 if (!results) { 476 if (wait) 477 debug_printf("zink: qbo read failed!"); 478 return false; 479 } 480 check_query_results(query, result, num_results, results, xfb_results); 481 pipe_buffer_unmap(pctx, xfer); 482 } 483 /* if overflow is detected we can stop */ 484 if (result->b) 485 break; 486 } 487 } 488 489 if (is_time_query(query)) 490 timestamp_to_nanoseconds(screen, &result->u64); 491 492 return true; 493} 494 495static void 496force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset) 497{ 498 struct pipe_context *pctx = &ctx->base; 499 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t); 500 struct zink_query *query = (struct zink_query*)pquery; 501 union pipe_query_result result; 502 503 if (query->needs_update) 504 update_qbo(ctx, query); 505 506 bool success = get_query_result(pctx, pquery, true, &result); 507 if (!success) { 508 debug_printf("zink: getting query result failed\n"); 509 return; 510 } 511 512 if (result_type <= PIPE_QUERY_TYPE_U32) { 513 uint32_t u32; 514 uint32_t limit; 515 if (result_type == PIPE_QUERY_TYPE_I32) 516 limit = INT_MAX; 517 else 518 limit = UINT_MAX; 519 if (is_bool_query(query)) 520 u32 = result.b; 521 else 522 u32 = MIN2(limit, result.u64); 523 pipe_buffer_write(pctx, pres, offset, result_size, &u32); 524 } else { 525 uint64_t u64; 526 if (is_bool_query(query)) 527 u64 = result.b; 528 else 529 u64 = result.u64; 530 pipe_buffer_write(pctx, pres, offset, result_size, &u64); 531 } 532} 533 534static void 535copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool, 536 unsigned query_id, struct zink_resource *res, unsigned offset, 537 int num_results, VkQueryResultFlags flags) 538{ 539 struct zink_batch *batch = &ctx->batch; 540 unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t); 541 unsigned base_result_size = get_num_results(query->type) * type_size; 542 unsigned result_size = base_result_size * num_results; 543 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) 544 result_size += type_size; 545 zink_batch_no_rp(ctx); 546 /* if it's a single query that doesn't need special handling, we can copy it and be done */ 547 zink_batch_reference_resource_rw(batch, res, true); 548 zink_resource_buffer_barrier(ctx, res, VK_ACCESS_TRANSFER_WRITE_BIT, 0); 549 util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size); 550 assert(query_id < NUM_QUERIES); 551 VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer, 552 offset, base_result_size, flags); 553} 554 555static void 556copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags) 557{ 558 copy_pool_results_to_buffer(ctx, query, query->query_pool, query->last_start, res, offset, num_results, flags); 559} 560 561static void 562reset_pool(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q) 563{ 564 /* This command must only be called outside of a render pass instance 565 * 566 * - vkCmdResetQueryPool spec 567 */ 568 zink_batch_no_rp(ctx); 569 if (q->needs_update) 570 update_qbo(ctx, q); 571 572 VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->query_pool, 0, NUM_QUERIES); 573 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED) 574 VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[0], 0, NUM_QUERIES); 575 else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 576 for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) 577 VKCTX(CmdResetQueryPool)(batch->state->cmdbuf, q->xfb_query_pool[i], 0, NUM_QUERIES); 578 } 579 memset(q->have_gs, 0, sizeof(q->have_gs)); 580 memset(q->have_xfb, 0, sizeof(q->have_xfb)); 581 q->last_start = q->curr_query = 0; 582 q->needs_reset = false; 583 /* create new qbo for non-timestamp queries: 584 * timestamp queries should never need more than 2 entries in the qbo 585 */ 586 if (q->type == PIPE_QUERY_TIMESTAMP) 587 return; 588 if (qbo_append(ctx->base.screen, q)) 589 reset_qbo(q); 590 else 591 debug_printf("zink: qbo alloc failed on reset!"); 592} 593 594static inline unsigned 595get_buffer_offset(struct zink_query *q, struct pipe_resource *pres, unsigned query_id) 596{ 597 return (query_id - q->last_start) * get_num_results(q->type) * sizeof(uint64_t); 598} 599 600static void 601update_qbo(struct zink_context *ctx, struct zink_query *q) 602{ 603 struct zink_query_buffer *qbo = q->curr_qbo; 604 unsigned offset = 0; 605 uint32_t query_id = q->curr_query - 1; 606 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP || q->type == PIPE_QUERY_TIMESTAMP_DISJOINT; 607 /* timestamp queries just write to offset 0 always */ 608 if (!is_timestamp) 609 offset = get_buffer_offset(q, qbo->buffer, query_id); 610 copy_pool_results_to_buffer(ctx, q, q->query_pool, query_id, zink_resource(qbo->buffer), 611 offset, 612 1, VK_QUERY_RESULT_64_BIT); 613 614 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || 615 q->type == PIPE_QUERY_PRIMITIVES_GENERATED || 616 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { 617 copy_pool_results_to_buffer(ctx, q, 618 q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool, 619 query_id, 620 zink_resource(qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer), 621 get_buffer_offset(q, qbo->xfb_buffers[0] ? qbo->xfb_buffers[0] : qbo->buffer, query_id), 622 1, VK_QUERY_RESULT_64_BIT); 623 } 624 625 else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 626 for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) { 627 copy_pool_results_to_buffer(ctx, q, q->xfb_query_pool[i], query_id, zink_resource(qbo->xfb_buffers[i]), 628 get_buffer_offset(q, qbo->xfb_buffers[i], query_id), 629 1, VK_QUERY_RESULT_64_BIT); 630 } 631 } 632 633 if (!is_timestamp) 634 q->curr_qbo->num_results++; 635 else 636 q->curr_qbo->num_results = 1; 637 q->needs_update = false; 638} 639 640static void 641begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q) 642{ 643 VkQueryControlFlags flags = 0; 644 645 q->predicate_dirty = true; 646 if (q->needs_reset) 647 reset_pool(ctx, batch, q); 648 assert(q->curr_query < NUM_QUERIES); 649 q->active = true; 650 batch->has_work = true; 651 if (q->type == PIPE_QUERY_TIME_ELAPSED) { 652 VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, q->query_pool, q->curr_query); 653 q->curr_query++; 654 update_qbo(ctx, q); 655 zink_batch_usage_set(&q->batch_id, batch->state); 656 _mesa_set_add(batch->state->active_queries, q); 657 } 658 /* ignore the rest of begin_query for timestamps */ 659 if (is_time_query(q)) 660 return; 661 if (q->precise) 662 flags |= VK_QUERY_CONTROL_PRECISE_BIT; 663 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || 664 q->type == PIPE_QUERY_PRIMITIVES_GENERATED || 665 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { 666 VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, 667 q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool, 668 q->curr_query, 669 flags, 670 q->index); 671 q->xfb_running = true; 672 } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 673 VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, 674 q->query_pool, 675 q->curr_query, 676 flags, 677 0); 678 for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) 679 VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf, 680 q->xfb_query_pool[i], 681 q->curr_query, 682 flags, 683 i + 1); 684 q->xfb_running = true; 685 } 686 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT) 687 VKCTX(CmdBeginQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query, flags); 688 if (needs_stats_list(q)) 689 list_addtail(&q->stats_list, &ctx->primitives_generated_queries); 690 zink_batch_usage_set(&q->batch_id, batch->state); 691 _mesa_set_add(batch->state->active_queries, q); 692} 693 694static bool 695zink_begin_query(struct pipe_context *pctx, 696 struct pipe_query *q) 697{ 698 struct zink_query *query = (struct zink_query *)q; 699 struct zink_context *ctx = zink_context(pctx); 700 struct zink_batch *batch = &ctx->batch; 701 702 query->last_start = query->curr_query; 703 /* drop all past results */ 704 reset_qbo(query); 705 706 begin_query(ctx, batch, query); 707 708 return true; 709} 710 711static void 712update_query_id(struct zink_context *ctx, struct zink_query *q) 713{ 714 if (++q->curr_query == NUM_QUERIES) { 715 /* always reset on start; this ensures we can actually submit the batch that the current query is on */ 716 q->needs_reset = true; 717 } 718 ctx->batch.has_work = true; 719 720 if (ctx->batch.in_rp) 721 q->needs_update = true; 722 else 723 update_qbo(ctx, q); 724} 725 726static void 727end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q) 728{ 729 ASSERTED struct zink_query_buffer *qbo = q->curr_qbo; 730 assert(qbo); 731 assert(!is_time_query(q)); 732 q->active = false; 733 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED || 734 q->type == PIPE_QUERY_PRIMITIVES_GENERATED || 735 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) { 736 VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, 737 q->xfb_query_pool[0] ? q->xfb_query_pool[0] : q->query_pool, 738 q->curr_query, q->index); 739 } 740 741 else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 742 VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->query_pool, q->curr_query, 0); 743 for (unsigned i = 0; i < ARRAY_SIZE(q->xfb_query_pool); i++) { 744 VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf, q->xfb_query_pool[i], q->curr_query, i + 1); 745 } 746 } 747 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && !is_time_query(q)) 748 VKCTX(CmdEndQuery)(batch->state->cmdbuf, q->query_pool, q->curr_query); 749 750 if (needs_stats_list(q)) 751 list_delinit(&q->stats_list); 752 753 update_query_id(ctx, q); 754} 755 756static bool 757zink_end_query(struct pipe_context *pctx, 758 struct pipe_query *q) 759{ 760 struct zink_context *ctx = zink_context(pctx); 761 struct zink_query *query = (struct zink_query *)q; 762 struct zink_batch *batch = &ctx->batch; 763 764 if (query->type == PIPE_QUERY_GPU_FINISHED) { 765 pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED); 766 return true; 767 } 768 769 /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */ 770 threaded_context_unwrap_sync(pctx); 771 772 if (needs_stats_list(query)) 773 list_delinit(&query->stats_list); 774 if (is_time_query(query)) { 775 if (query->needs_reset) 776 reset_pool(ctx, batch, query); 777 VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 778 query->query_pool, query->curr_query); 779 zink_batch_usage_set(&query->batch_id, batch->state); 780 _mesa_set_add(batch->state->active_queries, query); 781 update_query_id(ctx, query); 782 } else if (query->active) 783 end_query(ctx, batch, query); 784 785 return true; 786} 787 788static bool 789zink_get_query_result(struct pipe_context *pctx, 790 struct pipe_query *q, 791 bool wait, 792 union pipe_query_result *result) 793{ 794 struct zink_query *query = (void*)q; 795 struct zink_context *ctx = zink_context(pctx); 796 797 if (query->type == PIPE_QUERY_GPU_FINISHED) { 798 struct pipe_screen *screen = pctx->screen; 799 800 result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx, 801 query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0); 802 return result->b; 803 } 804 805 if (query->needs_update) 806 update_qbo(ctx, query); 807 808 if (zink_batch_usage_is_unflushed(query->batch_id)) { 809 if (!threaded_query(q)->flushed) 810 pctx->flush(pctx, NULL, 0); 811 if (!wait) 812 return false; 813 } else if (!threaded_query(q)->flushed && 814 /* timeline drivers can wait during buffer map */ 815 !zink_screen(pctx->screen)->info.have_KHR_timeline_semaphore) 816 zink_batch_usage_check_completion(ctx, query->batch_id); 817 818 return get_query_result(pctx, q, wait, result); 819} 820 821void 822zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch) 823{ 824 set_foreach(batch->state->active_queries, entry) { 825 struct zink_query *query = (void*)entry->key; 826 /* if a query isn't active here then we don't need to reactivate it on the next batch */ 827 if (query->active && !is_time_query(query)) { 828 end_query(ctx, batch, query); 829 /* the fence is going to steal the set off the batch, so we have to copy 830 * the active queries onto a list 831 */ 832 list_addtail(&query->active_list, &ctx->suspended_queries); 833 } 834 if (query->needs_update) 835 update_qbo(ctx, query); 836 if (query->last_start && query->curr_query > NUM_QUERIES / 2) 837 reset_pool(ctx, batch, query); 838 } 839} 840 841void 842zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch) 843{ 844 struct zink_query *query, *next; 845 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) { 846 begin_query(ctx, batch, query); 847 list_delinit(&query->active_list); 848 } 849} 850 851void 852zink_query_update_gs_states(struct zink_context *ctx) 853{ 854 struct zink_query *query; 855 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) { 856 assert(query->curr_query < ARRAY_SIZE(query->have_gs)); 857 assert(query->active); 858 query->have_gs[query->curr_query] = !!ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; 859 query->have_xfb[query->curr_query] = !!ctx->num_so_targets; 860 } 861} 862 863static void 864zink_set_active_query_state(struct pipe_context *pctx, bool enable) 865{ 866 struct zink_context *ctx = zink_context(pctx); 867 ctx->queries_disabled = !enable; 868 869 struct zink_batch *batch = &ctx->batch; 870 if (ctx->queries_disabled) 871 zink_suspend_queries(ctx, batch); 872 else 873 zink_resume_queries(ctx, batch); 874} 875 876void 877zink_start_conditional_render(struct zink_context *ctx) 878{ 879 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering)) 880 return; 881 struct zink_batch *batch = &ctx->batch; 882 VkConditionalRenderingFlagsEXT begin_flags = 0; 883 if (ctx->render_condition.inverted) 884 begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; 885 VkConditionalRenderingBeginInfoEXT begin_info = {0}; 886 begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; 887 begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer; 888 begin_info.flags = begin_flags; 889 VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info); 890 zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false); 891} 892 893void 894zink_stop_conditional_render(struct zink_context *ctx) 895{ 896 struct zink_batch *batch = &ctx->batch; 897 zink_clear_apply_conditionals(ctx); 898 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering)) 899 return; 900 VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf); 901} 902 903bool 904zink_check_conditional_render(struct zink_context *ctx) 905{ 906 if (!ctx->render_condition_active) 907 return true; 908 assert(ctx->render_condition.query); 909 910 union pipe_query_result result; 911 zink_get_query_result(&ctx->base, (struct pipe_query*)ctx->render_condition.query, true, &result); 912 return is_bool_query(ctx->render_condition.query) ? 913 ctx->render_condition.inverted != result.b : 914 ctx->render_condition.inverted != !!result.u64; 915} 916 917static void 918zink_render_condition(struct pipe_context *pctx, 919 struct pipe_query *pquery, 920 bool condition, 921 enum pipe_render_cond_flag mode) 922{ 923 struct zink_context *ctx = zink_context(pctx); 924 struct zink_query *query = (struct zink_query *)pquery; 925 zink_batch_no_rp(ctx); 926 VkQueryResultFlagBits flags = 0; 927 928 if (query == NULL) { 929 /* force conditional clears if they exist */ 930 if (ctx->clears_enabled && !ctx->batch.in_rp) 931 zink_batch_rp(ctx); 932 if (ctx->batch.in_rp) 933 zink_stop_conditional_render(ctx); 934 ctx->render_condition_active = false; 935 ctx->render_condition.query = NULL; 936 return; 937 } 938 939 if (!query->predicate) { 940 struct pipe_resource *pres; 941 942 /* need to create a vulkan buffer to copy the data into */ 943 pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t)); 944 if (!pres) 945 return; 946 947 query->predicate = zink_resource(pres); 948 } 949 if (query->predicate_dirty) { 950 struct zink_resource *res = query->predicate; 951 952 if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT) 953 flags |= VK_QUERY_RESULT_WAIT_BIT; 954 955 flags |= VK_QUERY_RESULT_64_BIT; 956 int num_results = query->curr_query - query->last_start; 957 if (query->type != PIPE_QUERY_PRIMITIVES_GENERATED && 958 !is_so_overflow_query(query)) { 959 copy_results_to_buffer(ctx, query, res, 0, num_results, flags); 960 } else { 961 /* these need special handling */ 962 force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0); 963 } 964 query->predicate_dirty = false; 965 } 966 ctx->render_condition.inverted = condition; 967 ctx->render_condition_active = true; 968 ctx->render_condition.query = query; 969 if (ctx->batch.in_rp) 970 zink_start_conditional_render(ctx); 971} 972 973static void 974zink_get_query_result_resource(struct pipe_context *pctx, 975 struct pipe_query *pquery, 976 bool wait, 977 enum pipe_query_value_type result_type, 978 int index, 979 struct pipe_resource *pres, 980 unsigned offset) 981{ 982 struct zink_context *ctx = zink_context(pctx); 983 struct zink_screen *screen = zink_screen(pctx->screen); 984 struct zink_query *query = (struct zink_query*)pquery; 985 struct zink_resource *res = zink_resource(pres); 986 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t); 987 VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT; 988 unsigned num_queries = query->curr_query - query->last_start; 989 unsigned query_id = query->last_start; 990 991 if (index == -1) { 992 /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data 993 * in addition to the availability result, which is a problem if we're just trying to get availability data 994 * 995 * if we know that there's no valid buffer data in the preceding buffer range, then we can just 996 * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the 997 * buffer 998 */ 999 1000 VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT; 1001 unsigned src_offset = result_size * get_num_results(query->type); 1002 if (zink_batch_usage_check_completion(ctx, query->batch_id)) { 1003 uint64_t u64[4] = {0}; 1004 if (VKCTX(GetQueryPoolResults)(screen->dev, query->query_pool, query_id, 1, sizeof(u64), u64, 1005 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag) == VK_SUCCESS) { 1006 pipe_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset); 1007 return; 1008 } 1009 } 1010 struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size); 1011 copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag); 1012 zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query->type), result_size); 1013 pipe_resource_reference(&staging, NULL); 1014 return; 1015 } 1016 1017 if (!is_time_query(query) && !is_bool_query(query)) { 1018 if (num_queries == 1 && query->type != PIPE_QUERY_PRIMITIVES_GENERATED && 1019 query->type != PIPE_QUERY_PRIMITIVES_EMITTED && 1020 !is_bool_query(query)) { 1021 if (size_flags == VK_QUERY_RESULT_64_BIT) { 1022 if (query->needs_update) 1023 update_qbo(ctx, query); 1024 /* internal qbo always writes 64bit value so we can just direct copy */ 1025 zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffer), offset, 1026 get_buffer_offset(query, query->curr_qbo->buffer, query->last_start), 1027 result_size); 1028 } else 1029 /* have to do a new copy for 32bit */ 1030 copy_results_to_buffer(ctx, query, res, offset, 1, size_flags); 1031 return; 1032 } 1033 } 1034 1035 /* TODO: use CS to aggregate results */ 1036 1037 /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either 1038 * clobbering all but the last result or writing the results sequentially, so we have to manually write the result 1039 */ 1040 force_cpu_read(ctx, pquery, result_type, pres, offset); 1041} 1042 1043static uint64_t 1044zink_get_timestamp(struct pipe_context *pctx) 1045{ 1046 struct zink_screen *screen = zink_screen(pctx->screen); 1047 uint64_t timestamp, deviation; 1048 assert(screen->info.have_EXT_calibrated_timestamps); 1049 VkCalibratedTimestampInfoEXT cti = {0}; 1050 cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; 1051 cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; 1052 VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation); 1053 timestamp_to_nanoseconds(screen, ×tamp); 1054 return timestamp; 1055} 1056 1057void 1058zink_context_query_init(struct pipe_context *pctx) 1059{ 1060 struct zink_context *ctx = zink_context(pctx); 1061 list_inithead(&ctx->suspended_queries); 1062 list_inithead(&ctx->primitives_generated_queries); 1063 1064 pctx->create_query = zink_create_query; 1065 pctx->destroy_query = zink_destroy_query; 1066 pctx->begin_query = zink_begin_query; 1067 pctx->end_query = zink_end_query; 1068 pctx->get_query_result = zink_get_query_result; 1069 pctx->get_query_result_resource = zink_get_query_result_resource; 1070 pctx->set_active_query_state = zink_set_active_query_state; 1071 pctx->render_condition = zink_render_condition; 1072 pctx->get_timestamp = zink_get_timestamp; 1073} 1074