17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2018 Advanced Micro Devices, Inc. 37ec681f3Smrg * All Rights Reserved. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 97ec681f3Smrg * license, and/or sell copies of the Software, and to permit persons to whom 107ec681f3Smrg * the Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 207ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 217ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 227ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include "si_pipe.h" 267ec681f3Smrg#include "si_query.h" 277ec681f3Smrg#include "sid.h" 287ec681f3Smrg#include "util/u_memory.h" 297ec681f3Smrg#include "util/u_suballoc.h" 307ec681f3Smrg 317ec681f3Smrg#include <stddef.h> 327ec681f3Smrg 337ec681f3Smrgstatic void emit_shader_query(struct si_context *sctx) 347ec681f3Smrg{ 357ec681f3Smrg assert(!list_is_empty(&sctx->shader_query_buffers)); 367ec681f3Smrg 377ec681f3Smrg struct gfx10_sh_query_buffer *qbuf = 387ec681f3Smrg list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 397ec681f3Smrg qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem); 407ec681f3Smrg} 417ec681f3Smrg 427ec681f3Smrgstatic void gfx10_release_query_buffers(struct si_context *sctx, 437ec681f3Smrg struct gfx10_sh_query_buffer *first, 447ec681f3Smrg struct gfx10_sh_query_buffer *last) 457ec681f3Smrg{ 467ec681f3Smrg while (first) { 477ec681f3Smrg struct gfx10_sh_query_buffer *qbuf = first; 487ec681f3Smrg if (first != last) 497ec681f3Smrg first = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list); 507ec681f3Smrg else 517ec681f3Smrg first = NULL; 527ec681f3Smrg 537ec681f3Smrg qbuf->refcount--; 547ec681f3Smrg if (qbuf->refcount) 557ec681f3Smrg continue; 567ec681f3Smrg 577ec681f3Smrg if (qbuf->list.next == &sctx->shader_query_buffers) 587ec681f3Smrg continue; /* keep the most recent buffer; it may not be full yet */ 597ec681f3Smrg if (qbuf->list.prev == &sctx->shader_query_buffers) 607ec681f3Smrg continue; /* keep the oldest buffer for recycling */ 617ec681f3Smrg 627ec681f3Smrg list_del(&qbuf->list); 637ec681f3Smrg si_resource_reference(&qbuf->buf, NULL); 647ec681f3Smrg FREE(qbuf); 657ec681f3Smrg } 667ec681f3Smrg} 677ec681f3Smrg 687ec681f3Smrgstatic bool gfx10_alloc_query_buffer(struct si_context *sctx) 697ec681f3Smrg{ 707ec681f3Smrg if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) 717ec681f3Smrg return true; 727ec681f3Smrg 737ec681f3Smrg struct gfx10_sh_query_buffer *qbuf = NULL; 747ec681f3Smrg 757ec681f3Smrg if (!list_is_empty(&sctx->shader_query_buffers)) { 767ec681f3Smrg qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 777ec681f3Smrg if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0) 787ec681f3Smrg goto success; 797ec681f3Smrg 807ec681f3Smrg qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 817ec681f3Smrg if (!qbuf->refcount && 827ec681f3Smrg !si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) && 837ec681f3Smrg sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) { 847ec681f3Smrg /* Can immediately re-use the oldest buffer */ 857ec681f3Smrg list_del(&qbuf->list); 867ec681f3Smrg } else { 877ec681f3Smrg qbuf = NULL; 887ec681f3Smrg } 897ec681f3Smrg } 907ec681f3Smrg 917ec681f3Smrg if (!qbuf) { 927ec681f3Smrg qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer); 937ec681f3Smrg if (unlikely(!qbuf)) 947ec681f3Smrg return false; 957ec681f3Smrg 967ec681f3Smrg struct si_screen *screen = sctx->screen; 977ec681f3Smrg unsigned buf_size = 987ec681f3Smrg MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size); 997ec681f3Smrg qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size)); 1007ec681f3Smrg if (unlikely(!qbuf->buf)) { 1017ec681f3Smrg FREE(qbuf); 1027ec681f3Smrg return false; 1037ec681f3Smrg } 1047ec681f3Smrg } 1057ec681f3Smrg 1067ec681f3Smrg /* The buffer is currently unused by the GPU. Initialize it. 1077ec681f3Smrg * 1087ec681f3Smrg * We need to set the high bit of all the primitive counters for 1097ec681f3Smrg * compatibility with the SET_PREDICATION packet. 1107ec681f3Smrg */ 1117ec681f3Smrg uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, 1127ec681f3Smrg PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED); 1137ec681f3Smrg assert(results); 1147ec681f3Smrg 1157ec681f3Smrg for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e; 1167ec681f3Smrg ++i) { 1177ec681f3Smrg for (unsigned j = 0; j < 16; ++j) 1187ec681f3Smrg results[32 * i + j] = (uint64_t)1 << 63; 1197ec681f3Smrg results[32 * i + 16] = 0; 1207ec681f3Smrg } 1217ec681f3Smrg 1227ec681f3Smrg list_addtail(&qbuf->list, &sctx->shader_query_buffers); 1237ec681f3Smrg qbuf->head = 0; 1247ec681f3Smrg qbuf->refcount = sctx->num_active_shader_queries; 1257ec681f3Smrg 1267ec681f3Smrgsuccess:; 1277ec681f3Smrg struct pipe_shader_buffer sbuf; 1287ec681f3Smrg sbuf.buffer = &qbuf->buf->b.b; 1297ec681f3Smrg sbuf.buffer_offset = qbuf->head; 1307ec681f3Smrg sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem); 1317ec681f3Smrg si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf); 1327ec681f3Smrg sctx->current_vs_state |= S_VS_STATE_STREAMOUT_QUERY_ENABLED(1); 1337ec681f3Smrg 1347ec681f3Smrg si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query); 1357ec681f3Smrg return true; 1367ec681f3Smrg} 1377ec681f3Smrg 1387ec681f3Smrgstatic void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery) 1397ec681f3Smrg{ 1407ec681f3Smrg struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 1417ec681f3Smrg gfx10_release_query_buffers(sctx, query->first, query->last); 1427ec681f3Smrg FREE(query); 1437ec681f3Smrg} 1447ec681f3Smrg 1457ec681f3Smrgstatic bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery) 1467ec681f3Smrg{ 1477ec681f3Smrg struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 1487ec681f3Smrg 1497ec681f3Smrg gfx10_release_query_buffers(sctx, query->first, query->last); 1507ec681f3Smrg query->first = query->last = NULL; 1517ec681f3Smrg 1527ec681f3Smrg if (unlikely(!gfx10_alloc_query_buffer(sctx))) 1537ec681f3Smrg return false; 1547ec681f3Smrg 1557ec681f3Smrg query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 1567ec681f3Smrg query->first_begin = query->first->head; 1577ec681f3Smrg 1587ec681f3Smrg sctx->num_active_shader_queries++; 1597ec681f3Smrg query->first->refcount++; 1607ec681f3Smrg 1617ec681f3Smrg return true; 1627ec681f3Smrg} 1637ec681f3Smrg 1647ec681f3Smrgstatic bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery) 1657ec681f3Smrg{ 1667ec681f3Smrg struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 1677ec681f3Smrg 1687ec681f3Smrg if (unlikely(!query->first)) 1697ec681f3Smrg return false; /* earlier out of memory error */ 1707ec681f3Smrg 1717ec681f3Smrg query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 1727ec681f3Smrg query->last_end = query->last->head; 1737ec681f3Smrg 1747ec681f3Smrg /* Signal the fence of the previous chunk */ 1757ec681f3Smrg if (query->last_end != 0) { 1767ec681f3Smrg uint64_t fence_va = query->last->buf->gpu_address; 1777ec681f3Smrg fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem); 1787ec681f3Smrg fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence); 1797ec681f3Smrg si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, 1807ec681f3Smrg EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va, 1817ec681f3Smrg 0xffffffff, PIPE_QUERY_GPU_FINISHED); 1827ec681f3Smrg } 1837ec681f3Smrg 1847ec681f3Smrg sctx->num_active_shader_queries--; 1857ec681f3Smrg 1867ec681f3Smrg if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) { 1877ec681f3Smrg si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL); 1887ec681f3Smrg sctx->current_vs_state &= C_VS_STATE_STREAMOUT_QUERY_ENABLED; 1897ec681f3Smrg 1907ec681f3Smrg /* If a query_begin is followed by a query_end without a draw 1917ec681f3Smrg * in-between, we need to clear the atom to ensure that the 1927ec681f3Smrg * next query_begin will re-initialize the shader buffer. */ 1937ec681f3Smrg si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false); 1947ec681f3Smrg } 1957ec681f3Smrg 1967ec681f3Smrg return true; 1977ec681f3Smrg} 1987ec681f3Smrg 1997ec681f3Smrgstatic void gfx10_sh_query_add_result(struct gfx10_sh_query *query, 2007ec681f3Smrg struct gfx10_sh_query_buffer_mem *qmem, 2017ec681f3Smrg union pipe_query_result *result) 2027ec681f3Smrg{ 2037ec681f3Smrg static const uint64_t mask = ((uint64_t)1 << 63) - 1; 2047ec681f3Smrg 2057ec681f3Smrg switch (query->b.type) { 2067ec681f3Smrg case PIPE_QUERY_PRIMITIVES_EMITTED: 2077ec681f3Smrg result->u64 += qmem->stream[query->stream].emitted_primitives & mask; 2087ec681f3Smrg break; 2097ec681f3Smrg case PIPE_QUERY_PRIMITIVES_GENERATED: 2107ec681f3Smrg result->u64 += qmem->stream[query->stream].generated_primitives & mask; 2117ec681f3Smrg break; 2127ec681f3Smrg case PIPE_QUERY_SO_STATISTICS: 2137ec681f3Smrg result->so_statistics.num_primitives_written += 2147ec681f3Smrg qmem->stream[query->stream].emitted_primitives & mask; 2157ec681f3Smrg result->so_statistics.primitives_storage_needed += 2167ec681f3Smrg qmem->stream[query->stream].generated_primitives & mask; 2177ec681f3Smrg break; 2187ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 2197ec681f3Smrg result->b |= qmem->stream[query->stream].emitted_primitives != 2207ec681f3Smrg qmem->stream[query->stream].generated_primitives; 2217ec681f3Smrg break; 2227ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 2237ec681f3Smrg for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) { 2247ec681f3Smrg result->b |= qmem->stream[stream].emitted_primitives != 2257ec681f3Smrg qmem->stream[stream].generated_primitives; 2267ec681f3Smrg } 2277ec681f3Smrg break; 2287ec681f3Smrg default: 2297ec681f3Smrg assert(0); 2307ec681f3Smrg } 2317ec681f3Smrg} 2327ec681f3Smrg 2337ec681f3Smrgstatic bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait, 2347ec681f3Smrg union pipe_query_result *result) 2357ec681f3Smrg{ 2367ec681f3Smrg struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 2377ec681f3Smrg 2387ec681f3Smrg util_query_clear_result(result, query->b.type); 2397ec681f3Smrg 2407ec681f3Smrg if (unlikely(!query->first)) 2417ec681f3Smrg return false; /* earlier out of memory error */ 2427ec681f3Smrg assert(query->last); 2437ec681f3Smrg 2447ec681f3Smrg for (struct gfx10_sh_query_buffer *qbuf = query->last;; 2457ec681f3Smrg qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.prev, list)) { 2467ec681f3Smrg unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK); 2477ec681f3Smrg void *map; 2487ec681f3Smrg 2497ec681f3Smrg if (rquery->b.flushed) 2507ec681f3Smrg map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage); 2517ec681f3Smrg else 2527ec681f3Smrg map = si_buffer_map(sctx, qbuf->buf, usage); 2537ec681f3Smrg 2547ec681f3Smrg if (!map) 2557ec681f3Smrg return false; 2567ec681f3Smrg 2577ec681f3Smrg unsigned results_begin = 0; 2587ec681f3Smrg unsigned results_end = qbuf->head; 2597ec681f3Smrg if (qbuf == query->first) 2607ec681f3Smrg results_begin = query->first_begin; 2617ec681f3Smrg if (qbuf == query->last) 2627ec681f3Smrg results_end = query->last_end; 2637ec681f3Smrg 2647ec681f3Smrg while (results_begin != results_end) { 2657ec681f3Smrg struct gfx10_sh_query_buffer_mem *qmem = map + results_begin; 2667ec681f3Smrg results_begin += sizeof(*qmem); 2677ec681f3Smrg 2687ec681f3Smrg gfx10_sh_query_add_result(query, qmem, result); 2697ec681f3Smrg } 2707ec681f3Smrg 2717ec681f3Smrg if (qbuf == query->first) 2727ec681f3Smrg break; 2737ec681f3Smrg } 2747ec681f3Smrg 2757ec681f3Smrg return true; 2767ec681f3Smrg} 2777ec681f3Smrg 2787ec681f3Smrgstatic void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery, 2797ec681f3Smrg bool wait, enum pipe_query_value_type result_type, 2807ec681f3Smrg int index, struct pipe_resource *resource, 2817ec681f3Smrg unsigned offset) 2827ec681f3Smrg{ 2837ec681f3Smrg struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery; 2847ec681f3Smrg struct si_qbo_state saved_state = {}; 2857ec681f3Smrg struct pipe_resource *tmp_buffer = NULL; 2867ec681f3Smrg unsigned tmp_buffer_offset = 0; 2877ec681f3Smrg 2887ec681f3Smrg if (!sctx->sh_query_result_shader) { 2897ec681f3Smrg sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx); 2907ec681f3Smrg if (!sctx->sh_query_result_shader) 2917ec681f3Smrg return; 2927ec681f3Smrg } 2937ec681f3Smrg 2947ec681f3Smrg if (query->first != query->last) { 2957ec681f3Smrg u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer); 2967ec681f3Smrg if (!tmp_buffer) 2977ec681f3Smrg return; 2987ec681f3Smrg } 2997ec681f3Smrg 3007ec681f3Smrg si_save_qbo_state(sctx, &saved_state); 3017ec681f3Smrg 3027ec681f3Smrg /* Pre-fill the constants configuring the shader behavior. */ 3037ec681f3Smrg struct { 3047ec681f3Smrg uint32_t config; 3057ec681f3Smrg uint32_t offset; 3067ec681f3Smrg uint32_t chain; 3077ec681f3Smrg uint32_t result_count; 3087ec681f3Smrg } consts; 3097ec681f3Smrg struct pipe_constant_buffer constant_buffer = {}; 3107ec681f3Smrg 3117ec681f3Smrg if (index >= 0) { 3127ec681f3Smrg switch (query->b.type) { 3137ec681f3Smrg case PIPE_QUERY_PRIMITIVES_GENERATED: 3147ec681f3Smrg consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t); 3157ec681f3Smrg consts.config = 0; 3167ec681f3Smrg break; 3177ec681f3Smrg case PIPE_QUERY_PRIMITIVES_EMITTED: 3187ec681f3Smrg consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t); 3197ec681f3Smrg consts.config = 0; 3207ec681f3Smrg break; 3217ec681f3Smrg case PIPE_QUERY_SO_STATISTICS: 3227ec681f3Smrg consts.offset = sizeof(uint32_t) * (4 * index + query->stream); 3237ec681f3Smrg consts.config = 0; 3247ec681f3Smrg break; 3257ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 3267ec681f3Smrg consts.offset = 4 * sizeof(uint64_t) * query->stream; 3277ec681f3Smrg consts.config = 2; 3287ec681f3Smrg break; 3297ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 3307ec681f3Smrg consts.offset = 0; 3317ec681f3Smrg consts.config = 3; 3327ec681f3Smrg break; 3337ec681f3Smrg default: 3347ec681f3Smrg unreachable("bad query type"); 3357ec681f3Smrg } 3367ec681f3Smrg } else { 3377ec681f3Smrg /* Check result availability. */ 3387ec681f3Smrg consts.offset = 0; 3397ec681f3Smrg consts.config = 1; 3407ec681f3Smrg } 3417ec681f3Smrg 3427ec681f3Smrg if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64) 3437ec681f3Smrg consts.config |= 8; 3447ec681f3Smrg 3457ec681f3Smrg constant_buffer.buffer_size = sizeof(consts); 3467ec681f3Smrg constant_buffer.user_buffer = &consts; 3477ec681f3Smrg 3487ec681f3Smrg /* Pre-fill the SSBOs and grid. */ 3497ec681f3Smrg struct pipe_shader_buffer ssbo[3]; 3507ec681f3Smrg struct pipe_grid_info grid = {}; 3517ec681f3Smrg 3527ec681f3Smrg ssbo[1].buffer = tmp_buffer; 3537ec681f3Smrg ssbo[1].buffer_offset = tmp_buffer_offset; 3547ec681f3Smrg ssbo[1].buffer_size = 16; 3557ec681f3Smrg 3567ec681f3Smrg ssbo[2] = ssbo[1]; 3577ec681f3Smrg 3587ec681f3Smrg grid.block[0] = 1; 3597ec681f3Smrg grid.block[1] = 1; 3607ec681f3Smrg grid.block[2] = 1; 3617ec681f3Smrg grid.grid[0] = 1; 3627ec681f3Smrg grid.grid[1] = 1; 3637ec681f3Smrg grid.grid[2] = 1; 3647ec681f3Smrg 3657ec681f3Smrg struct gfx10_sh_query_buffer *qbuf = query->first; 3667ec681f3Smrg for (;;) { 3677ec681f3Smrg unsigned begin = qbuf == query->first ? query->first_begin : 0; 3687ec681f3Smrg unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0; 3697ec681f3Smrg if (!end) 3707ec681f3Smrg continue; 3717ec681f3Smrg 3727ec681f3Smrg ssbo[0].buffer = &qbuf->buf->b.b; 3737ec681f3Smrg ssbo[0].buffer_offset = begin; 3747ec681f3Smrg ssbo[0].buffer_size = end - begin; 3757ec681f3Smrg 3767ec681f3Smrg consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem); 3777ec681f3Smrg consts.chain = 0; 3787ec681f3Smrg if (qbuf != query->first) 3797ec681f3Smrg consts.chain |= 1; 3807ec681f3Smrg if (qbuf != query->last) 3817ec681f3Smrg consts.chain |= 2; 3827ec681f3Smrg 3837ec681f3Smrg if (qbuf == query->last) { 3847ec681f3Smrg ssbo[2].buffer = resource; 3857ec681f3Smrg ssbo[2].buffer_offset = offset; 3867ec681f3Smrg ssbo[2].buffer_size = 8; 3877ec681f3Smrg } 3887ec681f3Smrg 3897ec681f3Smrg sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer); 3907ec681f3Smrg 3917ec681f3Smrg if (wait) { 3927ec681f3Smrg uint64_t va; 3937ec681f3Smrg 3947ec681f3Smrg /* Wait for result availability. Wait only for readiness 3957ec681f3Smrg * of the last entry, since the fence writes should be 3967ec681f3Smrg * serialized in the CP. 3977ec681f3Smrg */ 3987ec681f3Smrg va = qbuf->buf->gpu_address; 3997ec681f3Smrg va += end - sizeof(struct gfx10_sh_query_buffer_mem); 4007ec681f3Smrg va += offsetof(struct gfx10_sh_query_buffer_mem, fence); 4017ec681f3Smrg 4027ec681f3Smrg si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0); 4037ec681f3Smrg } 4047ec681f3Smrg 4057ec681f3Smrg si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader, 4067ec681f3Smrg SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER, 4077ec681f3Smrg 3, ssbo, 0x6); 4087ec681f3Smrg 4097ec681f3Smrg if (qbuf == query->last) 4107ec681f3Smrg break; 4117ec681f3Smrg qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list); 4127ec681f3Smrg } 4137ec681f3Smrg 4147ec681f3Smrg si_restore_qbo_state(sctx, &saved_state); 4157ec681f3Smrg pipe_resource_reference(&tmp_buffer, NULL); 4167ec681f3Smrg} 4177ec681f3Smrg 4187ec681f3Smrgstatic const struct si_query_ops gfx10_sh_query_ops = { 4197ec681f3Smrg .destroy = gfx10_sh_query_destroy, 4207ec681f3Smrg .begin = gfx10_sh_query_begin, 4217ec681f3Smrg .end = gfx10_sh_query_end, 4227ec681f3Smrg .get_result = gfx10_sh_query_get_result, 4237ec681f3Smrg .get_result_resource = gfx10_sh_query_get_result_resource, 4247ec681f3Smrg}; 4257ec681f3Smrg 4267ec681f3Smrgstruct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type, 4277ec681f3Smrg unsigned index) 4287ec681f3Smrg{ 4297ec681f3Smrg struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query); 4307ec681f3Smrg if (unlikely(!query)) 4317ec681f3Smrg return NULL; 4327ec681f3Smrg 4337ec681f3Smrg query->b.ops = &gfx10_sh_query_ops; 4347ec681f3Smrg query->b.type = query_type; 4357ec681f3Smrg query->stream = index; 4367ec681f3Smrg 4377ec681f3Smrg return (struct pipe_query *)query; 4387ec681f3Smrg} 4397ec681f3Smrg 4407ec681f3Smrgvoid gfx10_init_query(struct si_context *sctx) 4417ec681f3Smrg{ 4427ec681f3Smrg list_inithead(&sctx->shader_query_buffers); 4437ec681f3Smrg sctx->atoms.s.shader_query.emit = emit_shader_query; 4447ec681f3Smrg} 4457ec681f3Smrg 4467ec681f3Smrgvoid gfx10_destroy_query(struct si_context *sctx) 4477ec681f3Smrg{ 4487ec681f3Smrg while (!list_is_empty(&sctx->shader_query_buffers)) { 4497ec681f3Smrg struct gfx10_sh_query_buffer *qbuf = 4507ec681f3Smrg list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list); 4517ec681f3Smrg list_del(&qbuf->list); 4527ec681f3Smrg 4537ec681f3Smrg assert(!qbuf->refcount); 4547ec681f3Smrg si_resource_reference(&qbuf->buf, NULL); 4557ec681f3Smrg FREE(qbuf); 4567ec681f3Smrg } 4577ec681f3Smrg} 458