17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2017 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice shall be included 127ec681f3Smrg * in all copies or substantial portions of the Software. 137ec681f3Smrg * 147ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 157ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 167ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 177ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 187ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 197ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 207ec681f3Smrg * DEALINGS IN THE SOFTWARE. 217ec681f3Smrg */ 227ec681f3Smrg 237ec681f3Smrg/** 247ec681f3Smrg * @file crocus_query.c 257ec681f3Smrg * 267ec681f3Smrg * ============================= GENXML CODE ============================= 277ec681f3Smrg * [This file is compiled once per generation.] 287ec681f3Smrg * ======================================================================= 297ec681f3Smrg * 307ec681f3Smrg * Query object support. This allows measuring various simple statistics 317ec681f3Smrg * via counters on the GPU. We use GenX code for MI_MATH calculations. 327ec681f3Smrg */ 337ec681f3Smrg 347ec681f3Smrg#include <stdio.h> 357ec681f3Smrg#include <errno.h> 367ec681f3Smrg#include "perf/intel_perf.h" 377ec681f3Smrg#include "pipe/p_defines.h" 387ec681f3Smrg#include "pipe/p_state.h" 397ec681f3Smrg#include "pipe/p_context.h" 407ec681f3Smrg#include "pipe/p_screen.h" 417ec681f3Smrg#include "util/u_inlines.h" 427ec681f3Smrg#include "util/u_upload_mgr.h" 437ec681f3Smrg#include "crocus_context.h" 447ec681f3Smrg#include "crocus_defines.h" 457ec681f3Smrg#include "crocus_fence.h" 467ec681f3Smrg#include "crocus_monitor.h" 477ec681f3Smrg#include "crocus_resource.h" 487ec681f3Smrg#include "crocus_screen.h" 497ec681f3Smrg 507ec681f3Smrg#include "crocus_genx_macros.h" 517ec681f3Smrg 527ec681f3Smrg#if GFX_VER == 6 537ec681f3Smrg// TOOD: Add these to genxml? 547ec681f3Smrg#define SO_PRIM_STORAGE_NEEDED(n) (0x2280) 557ec681f3Smrg#define SO_NUM_PRIMS_WRITTEN(n) (0x2288) 567ec681f3Smrg 577ec681f3Smrg// TODO: remove HS/DS/CS 587ec681f3Smrg#define GFX6_IA_VERTICES_COUNT_num 0x2310 597ec681f3Smrg#define GFX6_IA_PRIMITIVES_COUNT_num 0x2318 607ec681f3Smrg#define GFX6_VS_INVOCATION_COUNT_num 0x2320 617ec681f3Smrg#define GFX6_HS_INVOCATION_COUNT_num 0x2300 627ec681f3Smrg#define GFX6_DS_INVOCATION_COUNT_num 0x2308 637ec681f3Smrg#define GFX6_GS_INVOCATION_COUNT_num 0x2328 647ec681f3Smrg#define GFX6_GS_PRIMITIVES_COUNT_num 0x2330 657ec681f3Smrg#define GFX6_CL_INVOCATION_COUNT_num 0x2338 667ec681f3Smrg#define GFX6_CL_PRIMITIVES_COUNT_num 0x2340 677ec681f3Smrg#define GFX6_PS_INVOCATION_COUNT_num 0x2348 687ec681f3Smrg#define GFX6_CS_INVOCATION_COUNT_num 0x2290 697ec681f3Smrg#define GFX6_PS_DEPTH_COUNT_num 0x2350 707ec681f3Smrg 717ec681f3Smrg#elif GFX_VER >= 7 727ec681f3Smrg#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8) 737ec681f3Smrg#define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8) 747ec681f3Smrg#endif 757ec681f3Smrg 767ec681f3Smrgstruct crocus_query { 777ec681f3Smrg struct threaded_query b; 787ec681f3Smrg 797ec681f3Smrg enum pipe_query_type type; 807ec681f3Smrg int index; 817ec681f3Smrg 827ec681f3Smrg bool ready; 837ec681f3Smrg 847ec681f3Smrg bool stalled; 857ec681f3Smrg 867ec681f3Smrg uint64_t result; 877ec681f3Smrg 887ec681f3Smrg struct crocus_state_ref query_state_ref; 897ec681f3Smrg struct crocus_query_snapshots *map; 907ec681f3Smrg struct crocus_syncobj *syncobj; 917ec681f3Smrg 927ec681f3Smrg int batch_idx; 937ec681f3Smrg 947ec681f3Smrg struct crocus_monitor_object *monitor; 957ec681f3Smrg 967ec681f3Smrg /* Fence for PIPE_QUERY_GPU_FINISHED. */ 977ec681f3Smrg struct pipe_fence_handle *fence; 987ec681f3Smrg}; 997ec681f3Smrg 1007ec681f3Smrgstruct crocus_query_snapshots { 1017ec681f3Smrg /** crocus_render_condition's saved MI_PREDICATE_RESULT value. */ 1027ec681f3Smrg uint64_t predicate_result; 1037ec681f3Smrg 1047ec681f3Smrg /** Have the start/end snapshots landed? */ 1057ec681f3Smrg uint64_t snapshots_landed; 1067ec681f3Smrg 1077ec681f3Smrg /** Starting and ending counter snapshots */ 1087ec681f3Smrg uint64_t start; 1097ec681f3Smrg uint64_t end; 1107ec681f3Smrg}; 1117ec681f3Smrg 1127ec681f3Smrgstruct crocus_query_so_overflow { 1137ec681f3Smrg uint64_t predicate_result; 1147ec681f3Smrg uint64_t snapshots_landed; 1157ec681f3Smrg 1167ec681f3Smrg struct { 1177ec681f3Smrg uint64_t prim_storage_needed[2]; 1187ec681f3Smrg uint64_t num_prims[2]; 1197ec681f3Smrg } stream[4]; 1207ec681f3Smrg}; 1217ec681f3Smrg 1227ec681f3Smrg#if GFX_VERx10 >= 75 1237ec681f3Smrgstatic struct mi_value 1247ec681f3Smrgquery_mem64(struct crocus_query *q, uint32_t offset) 1257ec681f3Smrg{ 1267ec681f3Smrg return mi_mem64(rw_bo(crocus_resource_bo(q->query_state_ref.res), 1277ec681f3Smrg q->query_state_ref.offset + offset)); 1287ec681f3Smrg} 1297ec681f3Smrg#endif 1307ec681f3Smrg 1317ec681f3Smrg/** 1327ec681f3Smrg * Is this type of query written by PIPE_CONTROL? 1337ec681f3Smrg */ 1347ec681f3Smrgstatic bool 1357ec681f3Smrgcrocus_is_query_pipelined(struct crocus_query *q) 1367ec681f3Smrg{ 1377ec681f3Smrg switch (q->type) { 1387ec681f3Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 1397ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 1407ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 1417ec681f3Smrg case PIPE_QUERY_TIMESTAMP: 1427ec681f3Smrg case PIPE_QUERY_TIMESTAMP_DISJOINT: 1437ec681f3Smrg case PIPE_QUERY_TIME_ELAPSED: 1447ec681f3Smrg return true; 1457ec681f3Smrg 1467ec681f3Smrg default: 1477ec681f3Smrg return false; 1487ec681f3Smrg } 1497ec681f3Smrg} 1507ec681f3Smrg 1517ec681f3Smrgstatic void 1527ec681f3Smrgmark_available(struct crocus_context *ice, struct crocus_query *q) 1537ec681f3Smrg{ 1547ec681f3Smrg#if GFX_VERx10 >= 75 1557ec681f3Smrg struct crocus_batch *batch = &ice->batches[q->batch_idx]; 1567ec681f3Smrg struct crocus_screen *screen = batch->screen; 1577ec681f3Smrg unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE; 1587ec681f3Smrg unsigned offset = offsetof(struct crocus_query_snapshots, snapshots_landed); 1597ec681f3Smrg struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res); 1607ec681f3Smrg offset += q->query_state_ref.offset; 1617ec681f3Smrg 1627ec681f3Smrg if (!crocus_is_query_pipelined(q)) { 1637ec681f3Smrg screen->vtbl.store_data_imm64(batch, bo, offset, true); 1647ec681f3Smrg } else { 1657ec681f3Smrg /* Order available *after* the query results. */ 1667ec681f3Smrg flags |= PIPE_CONTROL_FLUSH_ENABLE; 1677ec681f3Smrg crocus_emit_pipe_control_write(batch, "query: mark available", 1687ec681f3Smrg flags, bo, offset, true); 1697ec681f3Smrg } 1707ec681f3Smrg#endif 1717ec681f3Smrg} 1727ec681f3Smrg 1737ec681f3Smrg/** 1747ec681f3Smrg * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL. 1757ec681f3Smrg */ 1767ec681f3Smrgstatic void 1777ec681f3Smrgcrocus_pipelined_write(struct crocus_batch *batch, 1787ec681f3Smrg struct crocus_query *q, 1797ec681f3Smrg enum pipe_control_flags flags, 1807ec681f3Smrg unsigned offset) 1817ec681f3Smrg{ 1827ec681f3Smrg struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res); 1837ec681f3Smrg 1847ec681f3Smrg crocus_emit_pipe_control_write(batch, "query: pipelined snapshot write", 1857ec681f3Smrg flags, 1867ec681f3Smrg bo, offset, 0ull); 1877ec681f3Smrg} 1887ec681f3Smrg 1897ec681f3Smrgstatic void 1907ec681f3Smrgwrite_value(struct crocus_context *ice, struct crocus_query *q, unsigned offset) 1917ec681f3Smrg{ 1927ec681f3Smrg struct crocus_batch *batch = &ice->batches[q->batch_idx]; 1937ec681f3Smrg#if GFX_VER >= 6 1947ec681f3Smrg struct crocus_screen *screen = batch->screen; 1957ec681f3Smrg struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res); 1967ec681f3Smrg#endif 1977ec681f3Smrg 1987ec681f3Smrg if (!crocus_is_query_pipelined(q)) { 1997ec681f3Smrg crocus_emit_pipe_control_flush(batch, 2007ec681f3Smrg "query: non-pipelined snapshot write", 2017ec681f3Smrg PIPE_CONTROL_CS_STALL | 2027ec681f3Smrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 2037ec681f3Smrg q->stalled = true; 2047ec681f3Smrg } 2057ec681f3Smrg 2067ec681f3Smrg switch (q->type) { 2077ec681f3Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 2087ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 2097ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 2107ec681f3Smrg crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q, 2117ec681f3Smrg PIPE_CONTROL_WRITE_DEPTH_COUNT | 2127ec681f3Smrg PIPE_CONTROL_DEPTH_STALL, 2137ec681f3Smrg offset); 2147ec681f3Smrg break; 2157ec681f3Smrg case PIPE_QUERY_TIME_ELAPSED: 2167ec681f3Smrg case PIPE_QUERY_TIMESTAMP: 2177ec681f3Smrg case PIPE_QUERY_TIMESTAMP_DISJOINT: 2187ec681f3Smrg crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q, 2197ec681f3Smrg PIPE_CONTROL_WRITE_TIMESTAMP, 2207ec681f3Smrg offset); 2217ec681f3Smrg break; 2227ec681f3Smrg case PIPE_QUERY_PRIMITIVES_GENERATED: 2237ec681f3Smrg#if GFX_VER >= 6 2247ec681f3Smrg screen->vtbl.store_register_mem64(batch, 2257ec681f3Smrg q->index == 0 ? 2267ec681f3Smrg GENX(CL_INVOCATION_COUNT_num) : 2277ec681f3Smrg SO_PRIM_STORAGE_NEEDED(q->index), 2287ec681f3Smrg bo, offset, false); 2297ec681f3Smrg#endif 2307ec681f3Smrg break; 2317ec681f3Smrg case PIPE_QUERY_PRIMITIVES_EMITTED: 2327ec681f3Smrg#if GFX_VER >= 6 2337ec681f3Smrg screen->vtbl.store_register_mem64(batch, 2347ec681f3Smrg SO_NUM_PRIMS_WRITTEN(q->index), 2357ec681f3Smrg bo, offset, false); 2367ec681f3Smrg#endif 2377ec681f3Smrg break; 2387ec681f3Smrg case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: { 2397ec681f3Smrg#if GFX_VER >= 6 2407ec681f3Smrg static const uint32_t index_to_reg[] = { 2417ec681f3Smrg GENX(IA_VERTICES_COUNT_num), 2427ec681f3Smrg GENX(IA_PRIMITIVES_COUNT_num), 2437ec681f3Smrg GENX(VS_INVOCATION_COUNT_num), 2447ec681f3Smrg GENX(GS_INVOCATION_COUNT_num), 2457ec681f3Smrg GENX(GS_PRIMITIVES_COUNT_num), 2467ec681f3Smrg GENX(CL_INVOCATION_COUNT_num), 2477ec681f3Smrg GENX(CL_PRIMITIVES_COUNT_num), 2487ec681f3Smrg GENX(PS_INVOCATION_COUNT_num), 2497ec681f3Smrg GENX(HS_INVOCATION_COUNT_num), 2507ec681f3Smrg GENX(DS_INVOCATION_COUNT_num), 2517ec681f3Smrg GENX(CS_INVOCATION_COUNT_num), 2527ec681f3Smrg }; 2537ec681f3Smrg uint32_t reg = index_to_reg[q->index]; 2547ec681f3Smrg 2557ec681f3Smrg#if GFX_VER == 6 2567ec681f3Smrg /* Gfx6 GS code counts full primitives, that is, it won't count individual 2577ec681f3Smrg * triangles in a triangle strip. Use CL_INVOCATION_COUNT for that. 2587ec681f3Smrg */ 2597ec681f3Smrg if (q->index == PIPE_STAT_QUERY_GS_PRIMITIVES) 2607ec681f3Smrg reg = GENX(CL_INVOCATION_COUNT_num); 2617ec681f3Smrg#endif 2627ec681f3Smrg 2637ec681f3Smrg screen->vtbl.store_register_mem64(batch, reg, bo, offset, false); 2647ec681f3Smrg#endif 2657ec681f3Smrg break; 2667ec681f3Smrg } 2677ec681f3Smrg default: 2687ec681f3Smrg assert(false); 2697ec681f3Smrg } 2707ec681f3Smrg} 2717ec681f3Smrg 2727ec681f3Smrg#if GFX_VER >= 6 2737ec681f3Smrgstatic void 2747ec681f3Smrgwrite_overflow_values(struct crocus_context *ice, struct crocus_query *q, bool end) 2757ec681f3Smrg{ 2767ec681f3Smrg struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 2777ec681f3Smrg struct crocus_screen *screen = batch->screen; 2787ec681f3Smrg uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4; 2797ec681f3Smrg struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res); 2807ec681f3Smrg uint32_t offset = q->query_state_ref.offset; 2817ec681f3Smrg crocus_emit_pipe_control_flush(batch, 2827ec681f3Smrg "query: write SO overflow snapshots", 2837ec681f3Smrg PIPE_CONTROL_CS_STALL | 2847ec681f3Smrg PIPE_CONTROL_STALL_AT_SCOREBOARD); 2857ec681f3Smrg for (uint32_t i = 0; i < count; i++) { 2867ec681f3Smrg int s = q->index + i; 2877ec681f3Smrg int g_idx = offset + offsetof(struct crocus_query_so_overflow, 2887ec681f3Smrg stream[s].num_prims[end]); 2897ec681f3Smrg int w_idx = offset + offsetof(struct crocus_query_so_overflow, 2907ec681f3Smrg stream[s].prim_storage_needed[end]); 2917ec681f3Smrg screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s), 2927ec681f3Smrg bo, g_idx, false); 2937ec681f3Smrg screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s), 2947ec681f3Smrg bo, w_idx, false); 2957ec681f3Smrg } 2967ec681f3Smrg} 2977ec681f3Smrg#endif 2987ec681f3Smrgstatic uint64_t 2997ec681f3Smrgcrocus_raw_timestamp_delta(uint64_t time0, uint64_t time1) 3007ec681f3Smrg{ 3017ec681f3Smrg if (time0 > time1) { 3027ec681f3Smrg return (1ULL << TIMESTAMP_BITS) + time1 - time0; 3037ec681f3Smrg } else { 3047ec681f3Smrg return time1 - time0; 3057ec681f3Smrg } 3067ec681f3Smrg} 3077ec681f3Smrg 3087ec681f3Smrgstatic bool 3097ec681f3Smrgstream_overflowed(struct crocus_query_so_overflow *so, int s) 3107ec681f3Smrg{ 3117ec681f3Smrg return (so->stream[s].prim_storage_needed[1] - 3127ec681f3Smrg so->stream[s].prim_storage_needed[0]) != 3137ec681f3Smrg (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]); 3147ec681f3Smrg} 3157ec681f3Smrg 3167ec681f3Smrgstatic void 3177ec681f3Smrgcalculate_result_on_cpu(const struct intel_device_info *devinfo, 3187ec681f3Smrg struct crocus_query *q) 3197ec681f3Smrg{ 3207ec681f3Smrg switch (q->type) { 3217ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 3227ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 3237ec681f3Smrg q->result = q->map->end != q->map->start; 3247ec681f3Smrg break; 3257ec681f3Smrg case PIPE_QUERY_TIMESTAMP: 3267ec681f3Smrg case PIPE_QUERY_TIMESTAMP_DISJOINT: 3277ec681f3Smrg /* The timestamp is the single starting snapshot. */ 3287ec681f3Smrg q->result = intel_device_info_timebase_scale(devinfo, q->map->start); 3297ec681f3Smrg q->result &= (1ull << TIMESTAMP_BITS) - 1; 3307ec681f3Smrg break; 3317ec681f3Smrg case PIPE_QUERY_TIME_ELAPSED: 3327ec681f3Smrg q->result = crocus_raw_timestamp_delta(q->map->start, q->map->end); 3337ec681f3Smrg q->result = intel_device_info_timebase_scale(devinfo, q->result); 3347ec681f3Smrg q->result &= (1ull << TIMESTAMP_BITS) - 1; 3357ec681f3Smrg break; 3367ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 3377ec681f3Smrg q->result = stream_overflowed((void *) q->map, q->index); 3387ec681f3Smrg break; 3397ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 3407ec681f3Smrg q->result = false; 3417ec681f3Smrg for (int i = 0; i < MAX_VERTEX_STREAMS; i++) 3427ec681f3Smrg q->result |= stream_overflowed((void *) q->map, i); 3437ec681f3Smrg break; 3447ec681f3Smrg case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: 3457ec681f3Smrg q->result = q->map->end - q->map->start; 3467ec681f3Smrg 3477ec681f3Smrg /* WaDividePSInvocationCountBy4:HSW,BDW */ 3487ec681f3Smrg if (GFX_VERx10 >= 75 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS) 3497ec681f3Smrg q->result /= 4; 3507ec681f3Smrg break; 3517ec681f3Smrg case PIPE_QUERY_OCCLUSION_COUNTER: 3527ec681f3Smrg case PIPE_QUERY_PRIMITIVES_GENERATED: 3537ec681f3Smrg case PIPE_QUERY_PRIMITIVES_EMITTED: 3547ec681f3Smrg default: 3557ec681f3Smrg q->result = q->map->end - q->map->start; 3567ec681f3Smrg break; 3577ec681f3Smrg } 3587ec681f3Smrg 3597ec681f3Smrg q->ready = true; 3607ec681f3Smrg} 3617ec681f3Smrg 3627ec681f3Smrg#if GFX_VERx10 >= 75 3637ec681f3Smrg/** 3647ec681f3Smrg * Calculate the streamout overflow for stream \p idx: 3657ec681f3Smrg * 3667ec681f3Smrg * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0]) 3677ec681f3Smrg */ 3687ec681f3Smrgstatic struct mi_value 3697ec681f3Smrgcalc_overflow_for_stream(struct mi_builder *b, 3707ec681f3Smrg struct crocus_query *q, 3717ec681f3Smrg int idx) 3727ec681f3Smrg{ 3737ec681f3Smrg#define C(counter, i) query_mem64(q, \ 3747ec681f3Smrg offsetof(struct crocus_query_so_overflow, stream[idx].counter[i])) 3757ec681f3Smrg 3767ec681f3Smrg return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)), 3777ec681f3Smrg mi_isub(b, C(prim_storage_needed, 1), 3787ec681f3Smrg C(prim_storage_needed, 0))); 3797ec681f3Smrg#undef C 3807ec681f3Smrg} 3817ec681f3Smrg 3827ec681f3Smrg/** 3837ec681f3Smrg * Calculate whether any stream has overflowed. 3847ec681f3Smrg */ 3857ec681f3Smrgstatic struct mi_value 3867ec681f3Smrgcalc_overflow_any_stream(struct mi_builder *b, struct crocus_query *q) 3877ec681f3Smrg{ 3887ec681f3Smrg struct mi_value stream_result[MAX_VERTEX_STREAMS]; 3897ec681f3Smrg for (int i = 0; i < MAX_VERTEX_STREAMS; i++) 3907ec681f3Smrg stream_result[i] = calc_overflow_for_stream(b, q, i); 3917ec681f3Smrg 3927ec681f3Smrg struct mi_value result = stream_result[0]; 3937ec681f3Smrg for (int i = 1; i < MAX_VERTEX_STREAMS; i++) 3947ec681f3Smrg result = mi_ior(b, result, stream_result[i]); 3957ec681f3Smrg 3967ec681f3Smrg return result; 3977ec681f3Smrg} 3987ec681f3Smrg 3997ec681f3Smrg 4007ec681f3Smrgstatic bool 4017ec681f3Smrgquery_is_boolean(enum pipe_query_type type) 4027ec681f3Smrg{ 4037ec681f3Smrg switch (type) { 4047ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE: 4057ec681f3Smrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 4067ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 4077ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 4087ec681f3Smrg return true; 4097ec681f3Smrg default: 4107ec681f3Smrg return false; 4117ec681f3Smrg } 4127ec681f3Smrg} 4137ec681f3Smrg 4147ec681f3Smrg/** 4157ec681f3Smrg * Calculate the result using MI_MATH. 4167ec681f3Smrg */ 4177ec681f3Smrgstatic struct mi_value 4187ec681f3Smrgcalculate_result_on_gpu(const struct intel_device_info *devinfo, 4197ec681f3Smrg struct mi_builder *b, 4207ec681f3Smrg struct crocus_query *q) 4217ec681f3Smrg{ 4227ec681f3Smrg struct mi_value result; 4237ec681f3Smrg struct mi_value start_val = 4247ec681f3Smrg query_mem64(q, offsetof(struct crocus_query_snapshots, start)); 4257ec681f3Smrg struct mi_value end_val = 4267ec681f3Smrg query_mem64(q, offsetof(struct crocus_query_snapshots, end)); 4277ec681f3Smrg 4287ec681f3Smrg switch (q->type) { 4297ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 4307ec681f3Smrg result = calc_overflow_for_stream(b, q, q->index); 4317ec681f3Smrg break; 4327ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 4337ec681f3Smrg result = calc_overflow_any_stream(b, q); 4347ec681f3Smrg break; 4357ec681f3Smrg case PIPE_QUERY_TIMESTAMP: { 4367ec681f3Smrg /* TODO: This discards any fractional bits of the timebase scale. 4377ec681f3Smrg * We would need to do a bit of fixed point math on the CS ALU, or 4387ec681f3Smrg * launch an actual shader to calculate this with full precision. 4397ec681f3Smrg */ 4407ec681f3Smrg uint32_t scale = 1000000000ull / devinfo->timestamp_frequency; 4417ec681f3Smrg result = mi_iand(b, mi_imm((1ull << 36) - 1), 4427ec681f3Smrg mi_imul_imm(b, start_val, scale)); 4437ec681f3Smrg break; 4447ec681f3Smrg } 4457ec681f3Smrg case PIPE_QUERY_TIME_ELAPSED: { 4467ec681f3Smrg /* TODO: This discards fractional bits (see above). */ 4477ec681f3Smrg uint32_t scale = 1000000000ull / devinfo->timestamp_frequency; 4487ec681f3Smrg result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale); 4497ec681f3Smrg break; 4507ec681f3Smrg } 4517ec681f3Smrg default: 4527ec681f3Smrg result = mi_isub(b, end_val, start_val); 4537ec681f3Smrg break; 4547ec681f3Smrg } 4557ec681f3Smrg /* WaDividePSInvocationCountBy4:HSW,BDW */ 4567ec681f3Smrg if (GFX_VERx10 >= 75 && 4577ec681f3Smrg q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && 4587ec681f3Smrg q->index == PIPE_STAT_QUERY_PS_INVOCATIONS) 4597ec681f3Smrg result = mi_ushr32_imm(b, result, 2); 4607ec681f3Smrg 4617ec681f3Smrg if (query_is_boolean(q->type)) 4627ec681f3Smrg result = mi_iand(b, mi_nz(b, result), mi_imm(1)); 4637ec681f3Smrg 4647ec681f3Smrg return result; 4657ec681f3Smrg} 4667ec681f3Smrg#endif 4677ec681f3Smrg 4687ec681f3Smrgstatic struct pipe_query * 4697ec681f3Smrgcrocus_create_query(struct pipe_context *ctx, 4707ec681f3Smrg unsigned query_type, 4717ec681f3Smrg unsigned index) 4727ec681f3Smrg{ 4737ec681f3Smrg struct crocus_query *q = calloc(1, sizeof(struct crocus_query)); 4747ec681f3Smrg 4757ec681f3Smrg q->type = query_type; 4767ec681f3Smrg q->index = index; 4777ec681f3Smrg q->monitor = NULL; 4787ec681f3Smrg 4797ec681f3Smrg if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && 4807ec681f3Smrg q->index == PIPE_STAT_QUERY_CS_INVOCATIONS) 4817ec681f3Smrg q->batch_idx = CROCUS_BATCH_COMPUTE; 4827ec681f3Smrg else 4837ec681f3Smrg q->batch_idx = CROCUS_BATCH_RENDER; 4847ec681f3Smrg return (struct pipe_query *) q; 4857ec681f3Smrg} 4867ec681f3Smrg 4877ec681f3Smrgstatic struct pipe_query * 4887ec681f3Smrgcrocus_create_batch_query(struct pipe_context *ctx, 4897ec681f3Smrg unsigned num_queries, 4907ec681f3Smrg unsigned *query_types) 4917ec681f3Smrg{ 4927ec681f3Smrg struct crocus_context *ice = (void *) ctx; 4937ec681f3Smrg struct crocus_query *q = calloc(1, sizeof(struct crocus_query)); 4947ec681f3Smrg if (unlikely(!q)) 4957ec681f3Smrg return NULL; 4967ec681f3Smrg q->type = PIPE_QUERY_DRIVER_SPECIFIC; 4977ec681f3Smrg q->index = -1; 4987ec681f3Smrg q->monitor = crocus_create_monitor_object(ice, num_queries, query_types); 4997ec681f3Smrg if (unlikely(!q->monitor)) { 5007ec681f3Smrg free(q); 5017ec681f3Smrg return NULL; 5027ec681f3Smrg } 5037ec681f3Smrg 5047ec681f3Smrg return (struct pipe_query *) q; 5057ec681f3Smrg} 5067ec681f3Smrg 5077ec681f3Smrgstatic void 5087ec681f3Smrgcrocus_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query) 5097ec681f3Smrg{ 5107ec681f3Smrg struct crocus_query *query = (void *) p_query; 5117ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 5127ec681f3Smrg if (query->monitor) { 5137ec681f3Smrg crocus_destroy_monitor_object(ctx, query->monitor); 5147ec681f3Smrg query->monitor = NULL; 5157ec681f3Smrg } else { 5167ec681f3Smrg crocus_syncobj_reference(screen, &query->syncobj, NULL); 5177ec681f3Smrg screen->base.fence_reference(ctx->screen, &query->fence, NULL); 5187ec681f3Smrg } 5197ec681f3Smrg free(query); 5207ec681f3Smrg} 5217ec681f3Smrg 5227ec681f3Smrg 5237ec681f3Smrgstatic bool 5247ec681f3Smrgcrocus_begin_query(struct pipe_context *ctx, struct pipe_query *query) 5257ec681f3Smrg{ 5267ec681f3Smrg struct crocus_context *ice = (void *) ctx; 5277ec681f3Smrg struct crocus_query *q = (void *) query; 5287ec681f3Smrg 5297ec681f3Smrg if (q->monitor) 5307ec681f3Smrg return crocus_begin_monitor(ctx, q->monitor); 5317ec681f3Smrg 5327ec681f3Smrg void *ptr = NULL; 5337ec681f3Smrg uint32_t size; 5347ec681f3Smrg 5357ec681f3Smrg if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 5367ec681f3Smrg q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 5377ec681f3Smrg size = sizeof(struct crocus_query_so_overflow); 5387ec681f3Smrg else 5397ec681f3Smrg size = sizeof(struct crocus_query_snapshots); 5407ec681f3Smrg 5417ec681f3Smrg u_upload_alloc(ice->query_buffer_uploader, 0, 5427ec681f3Smrg size, size, &q->query_state_ref.offset, 5437ec681f3Smrg &q->query_state_ref.res, &ptr); 5447ec681f3Smrg 5457ec681f3Smrg if (!crocus_resource_bo(q->query_state_ref.res)) 5467ec681f3Smrg return false; 5477ec681f3Smrg 5487ec681f3Smrg q->map = ptr; 5497ec681f3Smrg if (!q->map) 5507ec681f3Smrg return false; 5517ec681f3Smrg 5527ec681f3Smrg q->result = 0ull; 5537ec681f3Smrg q->ready = false; 5547ec681f3Smrg WRITE_ONCE(q->map->snapshots_landed, false); 5557ec681f3Smrg 5567ec681f3Smrg if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) { 5577ec681f3Smrg ice->state.prims_generated_query_active = true; 5587ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP; 5597ec681f3Smrg } 5607ec681f3Smrg 5617ec681f3Smrg#if GFX_VER <= 5 5627ec681f3Smrg if (q->type == PIPE_QUERY_OCCLUSION_COUNTER || 5637ec681f3Smrg q->type == PIPE_QUERY_OCCLUSION_PREDICATE) { 5647ec681f3Smrg ice->state.stats_wm++; 5657ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE; 5667ec681f3Smrg } 5677ec681f3Smrg#endif 5687ec681f3Smrg#if GFX_VER >= 6 5697ec681f3Smrg if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 5707ec681f3Smrg q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 5717ec681f3Smrg write_overflow_values(ice, q, false); 5727ec681f3Smrg else 5737ec681f3Smrg#endif 5747ec681f3Smrg write_value(ice, q, 5757ec681f3Smrg q->query_state_ref.offset + 5767ec681f3Smrg offsetof(struct crocus_query_snapshots, start)); 5777ec681f3Smrg 5787ec681f3Smrg return true; 5797ec681f3Smrg} 5807ec681f3Smrg 5817ec681f3Smrgstatic bool 5827ec681f3Smrgcrocus_end_query(struct pipe_context *ctx, struct pipe_query *query) 5837ec681f3Smrg{ 5847ec681f3Smrg struct crocus_context *ice = (void *) ctx; 5857ec681f3Smrg struct crocus_query *q = (void *) query; 5867ec681f3Smrg 5877ec681f3Smrg if (q->monitor) 5887ec681f3Smrg return crocus_end_monitor(ctx, q->monitor); 5897ec681f3Smrg 5907ec681f3Smrg if (q->type == PIPE_QUERY_GPU_FINISHED) { 5917ec681f3Smrg ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED); 5927ec681f3Smrg return true; 5937ec681f3Smrg } 5947ec681f3Smrg 5957ec681f3Smrg struct crocus_batch *batch = &ice->batches[q->batch_idx]; 5967ec681f3Smrg 5977ec681f3Smrg if (q->type == PIPE_QUERY_TIMESTAMP) { 5987ec681f3Smrg crocus_begin_query(ctx, query); 5997ec681f3Smrg crocus_batch_reference_signal_syncobj(batch, &q->syncobj); 6007ec681f3Smrg mark_available(ice, q); 6017ec681f3Smrg return true; 6027ec681f3Smrg } 6037ec681f3Smrg 6047ec681f3Smrg#if GFX_VER <= 5 6057ec681f3Smrg if (q->type == PIPE_QUERY_OCCLUSION_COUNTER || 6067ec681f3Smrg q->type == PIPE_QUERY_OCCLUSION_PREDICATE) { 6077ec681f3Smrg ice->state.stats_wm--; 6087ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE; 6097ec681f3Smrg } 6107ec681f3Smrg#endif 6117ec681f3Smrg if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) { 6127ec681f3Smrg ice->state.prims_generated_query_active = false; 6137ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP; 6147ec681f3Smrg } 6157ec681f3Smrg 6167ec681f3Smrg#if GFX_VER >= 6 6177ec681f3Smrg if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 6187ec681f3Smrg q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 6197ec681f3Smrg write_overflow_values(ice, q, true); 6207ec681f3Smrg else 6217ec681f3Smrg#endif 6227ec681f3Smrg write_value(ice, q, 6237ec681f3Smrg q->query_state_ref.offset + 6247ec681f3Smrg offsetof(struct crocus_query_snapshots, end)); 6257ec681f3Smrg 6267ec681f3Smrg crocus_batch_reference_signal_syncobj(batch, &q->syncobj); 6277ec681f3Smrg mark_available(ice, q); 6287ec681f3Smrg 6297ec681f3Smrg return true; 6307ec681f3Smrg} 6317ec681f3Smrg 6327ec681f3Smrg/** 6337ec681f3Smrg * See if the snapshots have landed for a query, and if so, compute the 6347ec681f3Smrg * result and mark it ready. Does not flush (unlike crocus_get_query_result). 6357ec681f3Smrg */ 6367ec681f3Smrgstatic void 6377ec681f3Smrgcrocus_check_query_no_flush(struct crocus_context *ice, struct crocus_query *q) 6387ec681f3Smrg{ 6397ec681f3Smrg struct crocus_screen *screen = (void *) ice->ctx.screen; 6407ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 6417ec681f3Smrg 6427ec681f3Smrg if (!q->ready && READ_ONCE(q->map->snapshots_landed)) { 6437ec681f3Smrg calculate_result_on_cpu(devinfo, q); 6447ec681f3Smrg } 6457ec681f3Smrg} 6467ec681f3Smrg 6477ec681f3Smrgstatic bool 6487ec681f3Smrgcrocus_get_query_result(struct pipe_context *ctx, 6497ec681f3Smrg struct pipe_query *query, 6507ec681f3Smrg bool wait, 6517ec681f3Smrg union pipe_query_result *result) 6527ec681f3Smrg{ 6537ec681f3Smrg struct crocus_context *ice = (void *) ctx; 6547ec681f3Smrg struct crocus_query *q = (void *) query; 6557ec681f3Smrg 6567ec681f3Smrg if (q->monitor) 6577ec681f3Smrg return crocus_get_monitor_result(ctx, q->monitor, wait, result->batch); 6587ec681f3Smrg 6597ec681f3Smrg struct crocus_screen *screen = (void *) ctx->screen; 6607ec681f3Smrg const struct intel_device_info *devinfo = &screen->devinfo; 6617ec681f3Smrg 6627ec681f3Smrg if (unlikely(screen->devinfo.no_hw)) { 6637ec681f3Smrg result->u64 = 0; 6647ec681f3Smrg return true; 6657ec681f3Smrg } 6667ec681f3Smrg 6677ec681f3Smrg if (!q->ready) { 6687ec681f3Smrg struct crocus_batch *batch = &ice->batches[q->batch_idx]; 6697ec681f3Smrg if (q->syncobj == crocus_batch_get_signal_syncobj(batch)) 6707ec681f3Smrg crocus_batch_flush(batch); 6717ec681f3Smrg 6727ec681f3Smrg#if GFX_VERx10 >= 75 6737ec681f3Smrg while (!READ_ONCE(q->map->snapshots_landed)) { 6747ec681f3Smrg if (wait) 6757ec681f3Smrg crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX); 6767ec681f3Smrg else 6777ec681f3Smrg return false; 6787ec681f3Smrg } 6797ec681f3Smrg assert(READ_ONCE(q->map->snapshots_landed)); 6807ec681f3Smrg#else 6817ec681f3Smrg if (crocus_wait_syncobj(ctx->screen, q->syncobj, wait ? INT64_MAX : 0)) { 6827ec681f3Smrg /* if we've waited and timedout, just set the query to ready to avoid infinite loop */ 6837ec681f3Smrg if (wait) 6847ec681f3Smrg q->ready = true; 6857ec681f3Smrg return false; 6867ec681f3Smrg } 6877ec681f3Smrg#endif 6887ec681f3Smrg calculate_result_on_cpu(devinfo, q); 6897ec681f3Smrg } 6907ec681f3Smrg 6917ec681f3Smrg assert(q->ready); 6927ec681f3Smrg 6937ec681f3Smrg result->u64 = q->result; 6947ec681f3Smrg 6957ec681f3Smrg return true; 6967ec681f3Smrg} 6977ec681f3Smrg 6987ec681f3Smrg#if GFX_VER >= 7 6997ec681f3Smrgstatic void 7007ec681f3Smrgcrocus_get_query_result_resource(struct pipe_context *ctx, 7017ec681f3Smrg struct pipe_query *query, 7027ec681f3Smrg bool wait, 7037ec681f3Smrg enum pipe_query_value_type result_type, 7047ec681f3Smrg int index, 7057ec681f3Smrg struct pipe_resource *p_res, 7067ec681f3Smrg unsigned offset) 7077ec681f3Smrg{ 7087ec681f3Smrg struct crocus_context *ice = (void *) ctx; 7097ec681f3Smrg struct crocus_query *q = (void *) query; 7107ec681f3Smrg struct crocus_batch *batch = &ice->batches[q->batch_idx]; 7117ec681f3Smrg struct crocus_screen *screen = batch->screen; 7127ec681f3Smrg const struct intel_device_info *devinfo = &batch->screen->devinfo; 7137ec681f3Smrg struct crocus_resource *res = (void *) p_res; 7147ec681f3Smrg struct crocus_bo *query_bo = crocus_resource_bo(q->query_state_ref.res); 7157ec681f3Smrg struct crocus_bo *dst_bo = crocus_resource_bo(p_res); 7167ec681f3Smrg unsigned snapshots_landed_offset = 7177ec681f3Smrg offsetof(struct crocus_query_snapshots, snapshots_landed); 7187ec681f3Smrg 7197ec681f3Smrg res->bind_history |= PIPE_BIND_QUERY_BUFFER; 7207ec681f3Smrg 7217ec681f3Smrg if (index == -1) { 7227ec681f3Smrg /* They're asking for the availability of the result. If we still 7237ec681f3Smrg * have commands queued up which produce the result, submit them 7247ec681f3Smrg * now so that progress happens. Either way, copy the snapshots 7257ec681f3Smrg * landed field to the destination resource. 7267ec681f3Smrg */ 7277ec681f3Smrg if (q->syncobj == crocus_batch_get_signal_syncobj(batch)) 7287ec681f3Smrg crocus_batch_flush(batch); 7297ec681f3Smrg 7307ec681f3Smrg screen->vtbl.copy_mem_mem(batch, dst_bo, offset, 7317ec681f3Smrg query_bo, snapshots_landed_offset, 7327ec681f3Smrg result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8); 7337ec681f3Smrg return; 7347ec681f3Smrg } 7357ec681f3Smrg 7367ec681f3Smrg if (!q->ready && READ_ONCE(q->map->snapshots_landed)) { 7377ec681f3Smrg /* The final snapshots happen to have landed, so let's just compute 7387ec681f3Smrg * the result on the CPU now... 7397ec681f3Smrg */ 7407ec681f3Smrg calculate_result_on_cpu(devinfo, q); 7417ec681f3Smrg } 7427ec681f3Smrg 7437ec681f3Smrg if (q->ready) { 7447ec681f3Smrg /* We happen to have the result on the CPU, so just copy it. */ 7457ec681f3Smrg if (result_type <= PIPE_QUERY_TYPE_U32) { 7467ec681f3Smrg screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result); 7477ec681f3Smrg } else { 7487ec681f3Smrg screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result); 7497ec681f3Smrg } 7507ec681f3Smrg 7517ec681f3Smrg /* Make sure the result lands before they use bind the QBO elsewhere 7527ec681f3Smrg * and use the result. 7537ec681f3Smrg */ 7547ec681f3Smrg // XXX: Why? i965 doesn't do this. 7557ec681f3Smrg crocus_emit_pipe_control_flush(batch, 7567ec681f3Smrg "query: unknown QBO flushing hack", 7577ec681f3Smrg PIPE_CONTROL_CS_STALL); 7587ec681f3Smrg return; 7597ec681f3Smrg } 7607ec681f3Smrg 7617ec681f3Smrg#if GFX_VERx10 >= 75 7627ec681f3Smrg bool predicated = !wait && !q->stalled; 7637ec681f3Smrg 7647ec681f3Smrg struct mi_builder b; 7657ec681f3Smrg mi_builder_init(&b, &batch->screen->devinfo, batch); 7667ec681f3Smrg 7677ec681f3Smrg struct mi_value result = calculate_result_on_gpu(devinfo, &b, q); 7687ec681f3Smrg struct mi_value dst = 7697ec681f3Smrg result_type <= PIPE_QUERY_TYPE_U32 ? mi_mem32(rw_bo(dst_bo, offset)) 7707ec681f3Smrg : mi_mem64(rw_bo(dst_bo, offset)); 7717ec681f3Smrg 7727ec681f3Smrg if (predicated) { 7737ec681f3Smrg mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), 7747ec681f3Smrg mi_mem64(ro_bo(query_bo, snapshots_landed_offset))); 7757ec681f3Smrg mi_store_if(&b, dst, result); 7767ec681f3Smrg } else { 7777ec681f3Smrg mi_store(&b, dst, result); 7787ec681f3Smrg } 7797ec681f3Smrg#endif 7807ec681f3Smrg} 7817ec681f3Smrg#endif 7827ec681f3Smrg 7837ec681f3Smrgstatic void 7847ec681f3Smrgcrocus_set_active_query_state(struct pipe_context *ctx, bool enable) 7857ec681f3Smrg{ 7867ec681f3Smrg struct crocus_context *ice = (void *) ctx; 7877ec681f3Smrg 7887ec681f3Smrg if (ice->state.statistics_counters_enabled == enable) 7897ec681f3Smrg return; 7907ec681f3Smrg 7917ec681f3Smrg // XXX: most packets aren't paying attention to this yet, because it'd 7927ec681f3Smrg // have to be done dynamically at draw time, which is a pain 7937ec681f3Smrg ice->state.statistics_counters_enabled = enable; 7947ec681f3Smrg ice->state.dirty |= CROCUS_DIRTY_CLIP | 7957ec681f3Smrg CROCUS_DIRTY_RASTER | 7967ec681f3Smrg CROCUS_DIRTY_STREAMOUT | 7977ec681f3Smrg CROCUS_DIRTY_WM; 7987ec681f3Smrg ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS | 7997ec681f3Smrg CROCUS_STAGE_DIRTY_TCS | 8007ec681f3Smrg CROCUS_STAGE_DIRTY_TES | 8017ec681f3Smrg CROCUS_STAGE_DIRTY_VS; 8027ec681f3Smrg} 8037ec681f3Smrg 8047ec681f3Smrgstatic void 8057ec681f3Smrgset_predicate_enable(struct crocus_context *ice, bool value) 8067ec681f3Smrg{ 8077ec681f3Smrg if (value) 8087ec681f3Smrg ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER; 8097ec681f3Smrg else 8107ec681f3Smrg ice->state.predicate = CROCUS_PREDICATE_STATE_DONT_RENDER; 8117ec681f3Smrg} 8127ec681f3Smrg 8137ec681f3Smrg#if GFX_VER >= 7 8147ec681f3Smrgstatic void 8157ec681f3Smrgset_predicate_for_result(struct crocus_context *ice, 8167ec681f3Smrg struct crocus_query *q, 8177ec681f3Smrg bool inverted) 8187ec681f3Smrg{ 8197ec681f3Smrg struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 8207ec681f3Smrg struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res); 8217ec681f3Smrg 8227ec681f3Smrg#if GFX_VERx10 < 75 8237ec681f3Smrg /* IVB doesn't have enough MI for this */ 8247ec681f3Smrg if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 8257ec681f3Smrg q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 8267ec681f3Smrg ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY; 8277ec681f3Smrg return; 8287ec681f3Smrg } 8297ec681f3Smrg#endif 8307ec681f3Smrg 8317ec681f3Smrg /* The CPU doesn't have the query result yet; use hardware predication */ 8327ec681f3Smrg ice->state.predicate = CROCUS_PREDICATE_STATE_USE_BIT; 8337ec681f3Smrg 8347ec681f3Smrg /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */ 8357ec681f3Smrg crocus_emit_pipe_control_flush(batch, 8367ec681f3Smrg "conditional rendering: set predicate", 8377ec681f3Smrg PIPE_CONTROL_FLUSH_ENABLE); 8387ec681f3Smrg q->stalled = true; 8397ec681f3Smrg 8407ec681f3Smrg#if GFX_VERx10 < 75 8417ec681f3Smrg struct crocus_screen *screen = batch->screen; 8427ec681f3Smrg screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo, 8437ec681f3Smrg q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, start)); 8447ec681f3Smrg screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo, 8457ec681f3Smrg q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, end)); 8467ec681f3Smrg 8477ec681f3Smrg uint32_t mi_predicate = MI_PREDICATE | MI_PREDICATE_COMBINEOP_SET | 8487ec681f3Smrg MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 8497ec681f3Smrg if (inverted) 8507ec681f3Smrg mi_predicate |= MI_PREDICATE_LOADOP_LOAD; 8517ec681f3Smrg else 8527ec681f3Smrg mi_predicate |= MI_PREDICATE_LOADOP_LOADINV; 8537ec681f3Smrg crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); 8547ec681f3Smrg#else 8557ec681f3Smrg struct mi_builder b; 8567ec681f3Smrg mi_builder_init(&b, &batch->screen->devinfo, batch); 8577ec681f3Smrg 8587ec681f3Smrg struct mi_value result; 8597ec681f3Smrg 8607ec681f3Smrg switch (q->type) { 8617ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 8627ec681f3Smrg result = calc_overflow_for_stream(&b, q, q->index); 8637ec681f3Smrg break; 8647ec681f3Smrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 8657ec681f3Smrg result = calc_overflow_any_stream(&b, q); 8667ec681f3Smrg break; 8677ec681f3Smrg default: { 8687ec681f3Smrg /* PIPE_QUERY_OCCLUSION_* */ 8697ec681f3Smrg struct mi_value start = 8707ec681f3Smrg query_mem64(q, offsetof(struct crocus_query_snapshots, start)); 8717ec681f3Smrg struct mi_value end = 8727ec681f3Smrg query_mem64(q, offsetof(struct crocus_query_snapshots, end)); 8737ec681f3Smrg result = mi_isub(&b, end, start); 8747ec681f3Smrg break; 8757ec681f3Smrg } 8767ec681f3Smrg } 8777ec681f3Smrg 8787ec681f3Smrg result = inverted ? mi_z(&b, result) : mi_nz(&b, result); 8797ec681f3Smrg result = mi_iand(&b, result, mi_imm(1)); 8807ec681f3Smrg 8817ec681f3Smrg /* We immediately set the predicate on the render batch, as all the 8827ec681f3Smrg * counters come from 3D operations. However, we may need to predicate 8837ec681f3Smrg * a compute dispatch, which executes in a different GEM context and has 8847ec681f3Smrg * a different MI_PREDICATE_RESULT register. So, we save the result to 8857ec681f3Smrg * memory and reload it in crocus_launch_grid. 8867ec681f3Smrg */ 8877ec681f3Smrg mi_value_ref(&b, result); 8887ec681f3Smrg 8897ec681f3Smrg mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), result); 8907ec681f3Smrg mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0)); 8917ec681f3Smrg 8927ec681f3Smrg unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | 8937ec681f3Smrg MI_PREDICATE_COMBINEOP_SET | 8947ec681f3Smrg MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 8957ec681f3Smrg 8967ec681f3Smrg crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); 8977ec681f3Smrg mi_store(&b, query_mem64(q, offsetof(struct crocus_query_snapshots, 8987ec681f3Smrg predicate_result)), result); 8997ec681f3Smrg#endif 9007ec681f3Smrg ice->state.compute_predicate = bo; 9017ec681f3Smrg} 9027ec681f3Smrg#endif 9037ec681f3Smrg 9047ec681f3Smrgstatic void 9057ec681f3Smrgcrocus_render_condition(struct pipe_context *ctx, 9067ec681f3Smrg struct pipe_query *query, 9077ec681f3Smrg bool condition, 9087ec681f3Smrg enum pipe_render_cond_flag mode) 9097ec681f3Smrg{ 9107ec681f3Smrg struct crocus_context *ice = (void *) ctx; 9117ec681f3Smrg struct crocus_query *q = (void *) query; 9127ec681f3Smrg 9137ec681f3Smrg /* The old condition isn't relevant; we'll update it if necessary */ 9147ec681f3Smrg ice->state.compute_predicate = NULL; 9157ec681f3Smrg ice->condition.query = q; 9167ec681f3Smrg ice->condition.condition = condition; 9177ec681f3Smrg ice->condition.mode = mode; 9187ec681f3Smrg 9197ec681f3Smrg if (!q) { 9207ec681f3Smrg ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER; 9217ec681f3Smrg return; 9227ec681f3Smrg } 9237ec681f3Smrg 9247ec681f3Smrg crocus_check_query_no_flush(ice, q); 9257ec681f3Smrg 9267ec681f3Smrg if (q->result || q->ready) { 9277ec681f3Smrg set_predicate_enable(ice, (q->result != 0) ^ condition); 9287ec681f3Smrg } else { 9297ec681f3Smrg if (mode == PIPE_RENDER_COND_NO_WAIT || 9307ec681f3Smrg mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) { 9317ec681f3Smrg perf_debug(&ice->dbg, "Conditional rendering demoted from " 9327ec681f3Smrg "\"no wait\" to \"wait\"."); 9337ec681f3Smrg } 9347ec681f3Smrg#if GFX_VER >= 7 9357ec681f3Smrg set_predicate_for_result(ice, q, condition); 9367ec681f3Smrg#else 9377ec681f3Smrg ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY; 9387ec681f3Smrg#endif 9397ec681f3Smrg } 9407ec681f3Smrg} 9417ec681f3Smrg 9427ec681f3Smrgstatic void 9437ec681f3Smrgcrocus_resolve_conditional_render(struct crocus_context *ice) 9447ec681f3Smrg{ 9457ec681f3Smrg struct pipe_context *ctx = (void *) ice; 9467ec681f3Smrg struct crocus_query *q = ice->condition.query; 9477ec681f3Smrg struct pipe_query *query = (void *) q; 9487ec681f3Smrg union pipe_query_result result; 9497ec681f3Smrg 9507ec681f3Smrg if (ice->state.predicate != CROCUS_PREDICATE_STATE_USE_BIT) 9517ec681f3Smrg return; 9527ec681f3Smrg 9537ec681f3Smrg assert(q); 9547ec681f3Smrg 9557ec681f3Smrg crocus_get_query_result(ctx, query, true, &result); 9567ec681f3Smrg set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition); 9577ec681f3Smrg} 9587ec681f3Smrg 9597ec681f3Smrg#if GFX_VER >= 7 9607ec681f3Smrgstatic void 9617ec681f3Smrgcrocus_emit_compute_predicate(struct crocus_batch *batch) 9627ec681f3Smrg{ 9637ec681f3Smrg struct crocus_context *ice = batch->ice; 9647ec681f3Smrg struct crocus_screen *screen = batch->screen; 9657ec681f3Smrg screen->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0, 9667ec681f3Smrg ice->state.compute_predicate, 0); 9677ec681f3Smrg screen->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC1, 0); 9687ec681f3Smrg unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | 9697ec681f3Smrg MI_PREDICATE_COMBINEOP_SET | 9707ec681f3Smrg MI_PREDICATE_COMPAREOP_SRCS_EQUAL; 9717ec681f3Smrg 9727ec681f3Smrg crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t)); 9737ec681f3Smrg} 9747ec681f3Smrg#endif 9757ec681f3Smrg 9767ec681f3Smrgvoid 9777ec681f3SmrggenX(crocus_init_screen_query)(struct crocus_screen *screen) 9787ec681f3Smrg{ 9797ec681f3Smrg screen->vtbl.resolve_conditional_render = crocus_resolve_conditional_render; 9807ec681f3Smrg#if GFX_VER >= 7 9817ec681f3Smrg screen->vtbl.emit_compute_predicate = crocus_emit_compute_predicate; 9827ec681f3Smrg#endif 9837ec681f3Smrg} 9847ec681f3Smrg 9857ec681f3Smrgvoid 9867ec681f3SmrggenX(crocus_init_query)(struct crocus_context *ice) 9877ec681f3Smrg{ 9887ec681f3Smrg struct pipe_context *ctx = &ice->ctx; 9897ec681f3Smrg 9907ec681f3Smrg ctx->create_query = crocus_create_query; 9917ec681f3Smrg ctx->create_batch_query = crocus_create_batch_query; 9927ec681f3Smrg ctx->destroy_query = crocus_destroy_query; 9937ec681f3Smrg ctx->begin_query = crocus_begin_query; 9947ec681f3Smrg ctx->end_query = crocus_end_query; 9957ec681f3Smrg ctx->get_query_result = crocus_get_query_result; 9967ec681f3Smrg#if GFX_VER >= 7 9977ec681f3Smrg ctx->get_query_result_resource = crocus_get_query_result_resource; 9987ec681f3Smrg#endif 9997ec681f3Smrg ctx->set_active_query_state = crocus_set_active_query_state; 10007ec681f3Smrg ctx->render_condition = crocus_render_condition; 10017ec681f3Smrg 10027ec681f3Smrg} 1003