17e995a2eSmrg/* 27e995a2eSmrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 37e995a2eSmrg * Copyright 2014 Marek Olšák <marek.olsak@amd.com> 47e995a2eSmrg * 57e995a2eSmrg * Permission is hereby granted, free of charge, to any person obtaining a 67e995a2eSmrg * copy of this software and associated documentation files (the "Software"), 77e995a2eSmrg * to deal in the Software without restriction, including without limitation 87e995a2eSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 97e995a2eSmrg * license, and/or sell copies of the Software, and to permit persons to whom 107e995a2eSmrg * the Software is furnished to do so, subject to the following conditions: 117e995a2eSmrg * 127e995a2eSmrg * The above copyright notice and this permission notice (including the next 137e995a2eSmrg * paragraph) shall be included in all copies or substantial portions of the 147e995a2eSmrg * Software. 157e995a2eSmrg * 167e995a2eSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177e995a2eSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187e995a2eSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 197e995a2eSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 207e995a2eSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 217e995a2eSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 227e995a2eSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 237e995a2eSmrg */ 247e995a2eSmrg 257e995a2eSmrg#include "r600_query.h" 267e995a2eSmrg#include "r600_pipe.h" 277e995a2eSmrg#include "r600_cs.h" 287e995a2eSmrg#include "util/u_memory.h" 297e995a2eSmrg#include "util/u_upload_mgr.h" 307e995a2eSmrg#include "util/os_time.h" 317e995a2eSmrg#include "tgsi/tgsi_text.h" 327e995a2eSmrg 337e995a2eSmrg#define R600_MAX_STREAMS 4 347e995a2eSmrg 357e995a2eSmrgstruct r600_hw_query_params { 367e995a2eSmrg unsigned start_offset; 377e995a2eSmrg unsigned end_offset; 387e995a2eSmrg unsigned fence_offset; 397e995a2eSmrg unsigned pair_stride; 407e995a2eSmrg unsigned pair_count; 417e995a2eSmrg}; 427e995a2eSmrg 437e995a2eSmrg/* Queries without buffer handling or suspend/resume. */ 447e995a2eSmrgstruct r600_query_sw { 457e995a2eSmrg struct r600_query b; 467e995a2eSmrg 477e995a2eSmrg uint64_t begin_result; 487e995a2eSmrg uint64_t end_result; 497e995a2eSmrg 507e995a2eSmrg uint64_t begin_time; 517e995a2eSmrg uint64_t end_time; 527e995a2eSmrg 537e995a2eSmrg /* Fence for GPU_FINISHED. */ 547e995a2eSmrg struct pipe_fence_handle *fence; 557e995a2eSmrg}; 567e995a2eSmrg 577e995a2eSmrgstatic void r600_query_sw_destroy(struct r600_common_screen *rscreen, 587e995a2eSmrg struct r600_query *rquery) 597e995a2eSmrg{ 607e995a2eSmrg struct r600_query_sw *query = (struct r600_query_sw *)rquery; 617e995a2eSmrg 627e995a2eSmrg rscreen->b.fence_reference(&rscreen->b, &query->fence, NULL); 637e995a2eSmrg FREE(query); 647e995a2eSmrg} 657e995a2eSmrg 667e995a2eSmrgstatic enum radeon_value_id winsys_id_from_type(unsigned type) 677e995a2eSmrg{ 687e995a2eSmrg switch (type) { 697e995a2eSmrg case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY; 707e995a2eSmrg case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY; 717e995a2eSmrg case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM; 727e995a2eSmrg case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT; 737e995a2eSmrg case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS; 747e995a2eSmrg case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS; 757e995a2eSmrg case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS; 767e995a2eSmrg case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS; 777e995a2eSmrg case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER; 787e995a2eSmrg case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED; 797e995a2eSmrg case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS; 807e995a2eSmrg case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS; 817e995a2eSmrg case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE; 827e995a2eSmrg case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE; 837e995a2eSmrg case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE; 847e995a2eSmrg case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE; 857e995a2eSmrg case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK; 867e995a2eSmrg case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK; 877e995a2eSmrg case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME; 887e995a2eSmrg default: unreachable("query type does not correspond to winsys id"); 897e995a2eSmrg } 907e995a2eSmrg} 917e995a2eSmrg 927e995a2eSmrgstatic bool r600_query_sw_begin(struct r600_common_context *rctx, 937e995a2eSmrg struct r600_query *rquery) 947e995a2eSmrg{ 957e995a2eSmrg struct r600_query_sw *query = (struct r600_query_sw *)rquery; 967e995a2eSmrg enum radeon_value_id ws_id; 977e995a2eSmrg 987e995a2eSmrg switch(query->b.type) { 997e995a2eSmrg case PIPE_QUERY_TIMESTAMP_DISJOINT: 1007e995a2eSmrg case PIPE_QUERY_GPU_FINISHED: 1017e995a2eSmrg break; 1027e995a2eSmrg case R600_QUERY_DRAW_CALLS: 1037e995a2eSmrg query->begin_result = rctx->num_draw_calls; 1047e995a2eSmrg break; 1057e995a2eSmrg case R600_QUERY_DECOMPRESS_CALLS: 1067e995a2eSmrg query->begin_result = rctx->num_decompress_calls; 1077e995a2eSmrg break; 1087e995a2eSmrg case R600_QUERY_MRT_DRAW_CALLS: 1097e995a2eSmrg query->begin_result = rctx->num_mrt_draw_calls; 1107e995a2eSmrg break; 1117e995a2eSmrg case R600_QUERY_PRIM_RESTART_CALLS: 1127e995a2eSmrg query->begin_result = rctx->num_prim_restart_calls; 1137e995a2eSmrg break; 1147e995a2eSmrg case R600_QUERY_SPILL_DRAW_CALLS: 1157e995a2eSmrg query->begin_result = rctx->num_spill_draw_calls; 1167e995a2eSmrg break; 1177e995a2eSmrg case R600_QUERY_COMPUTE_CALLS: 1187e995a2eSmrg query->begin_result = rctx->num_compute_calls; 1197e995a2eSmrg break; 1207e995a2eSmrg case R600_QUERY_SPILL_COMPUTE_CALLS: 1217e995a2eSmrg query->begin_result = rctx->num_spill_compute_calls; 1227e995a2eSmrg break; 1237e995a2eSmrg case R600_QUERY_DMA_CALLS: 1247e995a2eSmrg query->begin_result = rctx->num_dma_calls; 1257e995a2eSmrg break; 1267e995a2eSmrg case R600_QUERY_CP_DMA_CALLS: 1277e995a2eSmrg query->begin_result = rctx->num_cp_dma_calls; 1287e995a2eSmrg break; 1297e995a2eSmrg case R600_QUERY_NUM_VS_FLUSHES: 1307e995a2eSmrg query->begin_result = rctx->num_vs_flushes; 1317e995a2eSmrg break; 1327e995a2eSmrg case R600_QUERY_NUM_PS_FLUSHES: 1337e995a2eSmrg query->begin_result = rctx->num_ps_flushes; 1347e995a2eSmrg break; 1357e995a2eSmrg case R600_QUERY_NUM_CS_FLUSHES: 1367e995a2eSmrg query->begin_result = rctx->num_cs_flushes; 1377e995a2eSmrg break; 1387e995a2eSmrg case R600_QUERY_NUM_CB_CACHE_FLUSHES: 1397e995a2eSmrg query->begin_result = rctx->num_cb_cache_flushes; 1407e995a2eSmrg break; 1417e995a2eSmrg case R600_QUERY_NUM_DB_CACHE_FLUSHES: 1427e995a2eSmrg query->begin_result = rctx->num_db_cache_flushes; 1437e995a2eSmrg break; 1447e995a2eSmrg case R600_QUERY_NUM_RESIDENT_HANDLES: 1457e995a2eSmrg query->begin_result = rctx->num_resident_handles; 1467e995a2eSmrg break; 1477e995a2eSmrg case R600_QUERY_TC_OFFLOADED_SLOTS: 1487e995a2eSmrg query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0; 1497e995a2eSmrg break; 1507e995a2eSmrg case R600_QUERY_TC_DIRECT_SLOTS: 1517e995a2eSmrg query->begin_result = rctx->tc ? rctx->tc->num_direct_slots : 0; 1527e995a2eSmrg break; 1537e995a2eSmrg case R600_QUERY_TC_NUM_SYNCS: 1547e995a2eSmrg query->begin_result = rctx->tc ? rctx->tc->num_syncs : 0; 1557e995a2eSmrg break; 1567e995a2eSmrg case R600_QUERY_REQUESTED_VRAM: 1577e995a2eSmrg case R600_QUERY_REQUESTED_GTT: 1587e995a2eSmrg case R600_QUERY_MAPPED_VRAM: 1597e995a2eSmrg case R600_QUERY_MAPPED_GTT: 1607e995a2eSmrg case R600_QUERY_VRAM_USAGE: 1617e995a2eSmrg case R600_QUERY_VRAM_VIS_USAGE: 1627e995a2eSmrg case R600_QUERY_GTT_USAGE: 1637e995a2eSmrg case R600_QUERY_GPU_TEMPERATURE: 1647e995a2eSmrg case R600_QUERY_CURRENT_GPU_SCLK: 1657e995a2eSmrg case R600_QUERY_CURRENT_GPU_MCLK: 1667e995a2eSmrg case R600_QUERY_NUM_MAPPED_BUFFERS: 1677e995a2eSmrg query->begin_result = 0; 1687e995a2eSmrg break; 1697e995a2eSmrg case R600_QUERY_BUFFER_WAIT_TIME: 1707e995a2eSmrg case R600_QUERY_NUM_GFX_IBS: 1717e995a2eSmrg case R600_QUERY_NUM_SDMA_IBS: 1727e995a2eSmrg case R600_QUERY_NUM_BYTES_MOVED: 1737e995a2eSmrg case R600_QUERY_NUM_EVICTIONS: 1747e995a2eSmrg case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: { 1757e995a2eSmrg enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); 1767e995a2eSmrg query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); 1777e995a2eSmrg break; 1787e995a2eSmrg } 1797e995a2eSmrg case R600_QUERY_GFX_BO_LIST_SIZE: 1807e995a2eSmrg ws_id = winsys_id_from_type(query->b.type); 1817e995a2eSmrg query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); 1827e995a2eSmrg query->begin_time = rctx->ws->query_value(rctx->ws, 1837e995a2eSmrg RADEON_NUM_GFX_IBS); 1847e995a2eSmrg break; 1857e995a2eSmrg case R600_QUERY_CS_THREAD_BUSY: 1867e995a2eSmrg ws_id = winsys_id_from_type(query->b.type); 1877e995a2eSmrg query->begin_result = rctx->ws->query_value(rctx->ws, ws_id); 1887e995a2eSmrg query->begin_time = os_time_get_nano(); 1897e995a2eSmrg break; 1907e995a2eSmrg case R600_QUERY_GALLIUM_THREAD_BUSY: 1917e995a2eSmrg query->begin_result = 1927e995a2eSmrg rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0; 1937e995a2eSmrg query->begin_time = os_time_get_nano(); 1947e995a2eSmrg break; 1957e995a2eSmrg case R600_QUERY_GPU_LOAD: 1967e995a2eSmrg case R600_QUERY_GPU_SHADERS_BUSY: 1977e995a2eSmrg case R600_QUERY_GPU_TA_BUSY: 1987e995a2eSmrg case R600_QUERY_GPU_GDS_BUSY: 1997e995a2eSmrg case R600_QUERY_GPU_VGT_BUSY: 2007e995a2eSmrg case R600_QUERY_GPU_IA_BUSY: 2017e995a2eSmrg case R600_QUERY_GPU_SX_BUSY: 2027e995a2eSmrg case R600_QUERY_GPU_WD_BUSY: 2037e995a2eSmrg case R600_QUERY_GPU_BCI_BUSY: 2047e995a2eSmrg case R600_QUERY_GPU_SC_BUSY: 2057e995a2eSmrg case R600_QUERY_GPU_PA_BUSY: 2067e995a2eSmrg case R600_QUERY_GPU_DB_BUSY: 2077e995a2eSmrg case R600_QUERY_GPU_CP_BUSY: 2087e995a2eSmrg case R600_QUERY_GPU_CB_BUSY: 2097e995a2eSmrg case R600_QUERY_GPU_SDMA_BUSY: 2107e995a2eSmrg case R600_QUERY_GPU_PFP_BUSY: 2117e995a2eSmrg case R600_QUERY_GPU_MEQ_BUSY: 2127e995a2eSmrg case R600_QUERY_GPU_ME_BUSY: 2137e995a2eSmrg case R600_QUERY_GPU_SURF_SYNC_BUSY: 2147e995a2eSmrg case R600_QUERY_GPU_CP_DMA_BUSY: 2157e995a2eSmrg case R600_QUERY_GPU_SCRATCH_RAM_BUSY: 2167e995a2eSmrg query->begin_result = r600_begin_counter(rctx->screen, 2177e995a2eSmrg query->b.type); 2187e995a2eSmrg break; 2197e995a2eSmrg case R600_QUERY_NUM_COMPILATIONS: 2207e995a2eSmrg query->begin_result = p_atomic_read(&rctx->screen->num_compilations); 2217e995a2eSmrg break; 2227e995a2eSmrg case R600_QUERY_NUM_SHADERS_CREATED: 2237e995a2eSmrg query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created); 2247e995a2eSmrg break; 2257e995a2eSmrg case R600_QUERY_NUM_SHADER_CACHE_HITS: 2267e995a2eSmrg query->begin_result = 2277e995a2eSmrg p_atomic_read(&rctx->screen->num_shader_cache_hits); 2287e995a2eSmrg break; 2297e995a2eSmrg case R600_QUERY_GPIN_ASIC_ID: 2307e995a2eSmrg case R600_QUERY_GPIN_NUM_SIMD: 2317e995a2eSmrg case R600_QUERY_GPIN_NUM_RB: 2327e995a2eSmrg case R600_QUERY_GPIN_NUM_SPI: 2337e995a2eSmrg case R600_QUERY_GPIN_NUM_SE: 2347e995a2eSmrg break; 2357e995a2eSmrg default: 2367e995a2eSmrg unreachable("r600_query_sw_begin: bad query type"); 2377e995a2eSmrg } 2387e995a2eSmrg 2397e995a2eSmrg return true; 2407e995a2eSmrg} 2417e995a2eSmrg 2427e995a2eSmrgstatic bool r600_query_sw_end(struct r600_common_context *rctx, 2437e995a2eSmrg struct r600_query *rquery) 2447e995a2eSmrg{ 2457e995a2eSmrg struct r600_query_sw *query = (struct r600_query_sw *)rquery; 2467e995a2eSmrg enum radeon_value_id ws_id; 2477e995a2eSmrg 2487e995a2eSmrg switch(query->b.type) { 2497e995a2eSmrg case PIPE_QUERY_TIMESTAMP_DISJOINT: 2507e995a2eSmrg break; 2517e995a2eSmrg case PIPE_QUERY_GPU_FINISHED: 2527e995a2eSmrg rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED); 2537e995a2eSmrg break; 2547e995a2eSmrg case R600_QUERY_DRAW_CALLS: 2557e995a2eSmrg query->end_result = rctx->num_draw_calls; 2567e995a2eSmrg break; 2577e995a2eSmrg case R600_QUERY_DECOMPRESS_CALLS: 2587e995a2eSmrg query->end_result = rctx->num_decompress_calls; 2597e995a2eSmrg break; 2607e995a2eSmrg case R600_QUERY_MRT_DRAW_CALLS: 2617e995a2eSmrg query->end_result = rctx->num_mrt_draw_calls; 2627e995a2eSmrg break; 2637e995a2eSmrg case R600_QUERY_PRIM_RESTART_CALLS: 2647e995a2eSmrg query->end_result = rctx->num_prim_restart_calls; 2657e995a2eSmrg break; 2667e995a2eSmrg case R600_QUERY_SPILL_DRAW_CALLS: 2677e995a2eSmrg query->end_result = rctx->num_spill_draw_calls; 2687e995a2eSmrg break; 2697e995a2eSmrg case R600_QUERY_COMPUTE_CALLS: 2707e995a2eSmrg query->end_result = rctx->num_compute_calls; 2717e995a2eSmrg break; 2727e995a2eSmrg case R600_QUERY_SPILL_COMPUTE_CALLS: 2737e995a2eSmrg query->end_result = rctx->num_spill_compute_calls; 2747e995a2eSmrg break; 2757e995a2eSmrg case R600_QUERY_DMA_CALLS: 2767e995a2eSmrg query->end_result = rctx->num_dma_calls; 2777e995a2eSmrg break; 2787e995a2eSmrg case R600_QUERY_CP_DMA_CALLS: 2797e995a2eSmrg query->end_result = rctx->num_cp_dma_calls; 2807e995a2eSmrg break; 2817e995a2eSmrg case R600_QUERY_NUM_VS_FLUSHES: 2827e995a2eSmrg query->end_result = rctx->num_vs_flushes; 2837e995a2eSmrg break; 2847e995a2eSmrg case R600_QUERY_NUM_PS_FLUSHES: 2857e995a2eSmrg query->end_result = rctx->num_ps_flushes; 2867e995a2eSmrg break; 2877e995a2eSmrg case R600_QUERY_NUM_CS_FLUSHES: 2887e995a2eSmrg query->end_result = rctx->num_cs_flushes; 2897e995a2eSmrg break; 2907e995a2eSmrg case R600_QUERY_NUM_CB_CACHE_FLUSHES: 2917e995a2eSmrg query->end_result = rctx->num_cb_cache_flushes; 2927e995a2eSmrg break; 2937e995a2eSmrg case R600_QUERY_NUM_DB_CACHE_FLUSHES: 2947e995a2eSmrg query->end_result = rctx->num_db_cache_flushes; 2957e995a2eSmrg break; 2967e995a2eSmrg case R600_QUERY_NUM_RESIDENT_HANDLES: 2977e995a2eSmrg query->end_result = rctx->num_resident_handles; 2987e995a2eSmrg break; 2997e995a2eSmrg case R600_QUERY_TC_OFFLOADED_SLOTS: 3007e995a2eSmrg query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0; 3017e995a2eSmrg break; 3027e995a2eSmrg case R600_QUERY_TC_DIRECT_SLOTS: 3037e995a2eSmrg query->end_result = rctx->tc ? rctx->tc->num_direct_slots : 0; 3047e995a2eSmrg break; 3057e995a2eSmrg case R600_QUERY_TC_NUM_SYNCS: 3067e995a2eSmrg query->end_result = rctx->tc ? rctx->tc->num_syncs : 0; 3077e995a2eSmrg break; 3087e995a2eSmrg case R600_QUERY_REQUESTED_VRAM: 3097e995a2eSmrg case R600_QUERY_REQUESTED_GTT: 3107e995a2eSmrg case R600_QUERY_MAPPED_VRAM: 3117e995a2eSmrg case R600_QUERY_MAPPED_GTT: 3127e995a2eSmrg case R600_QUERY_VRAM_USAGE: 3137e995a2eSmrg case R600_QUERY_VRAM_VIS_USAGE: 3147e995a2eSmrg case R600_QUERY_GTT_USAGE: 3157e995a2eSmrg case R600_QUERY_GPU_TEMPERATURE: 3167e995a2eSmrg case R600_QUERY_CURRENT_GPU_SCLK: 3177e995a2eSmrg case R600_QUERY_CURRENT_GPU_MCLK: 3187e995a2eSmrg case R600_QUERY_BUFFER_WAIT_TIME: 3197e995a2eSmrg case R600_QUERY_NUM_MAPPED_BUFFERS: 3207e995a2eSmrg case R600_QUERY_NUM_GFX_IBS: 3217e995a2eSmrg case R600_QUERY_NUM_SDMA_IBS: 3227e995a2eSmrg case R600_QUERY_NUM_BYTES_MOVED: 3237e995a2eSmrg case R600_QUERY_NUM_EVICTIONS: 3247e995a2eSmrg case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: { 3257e995a2eSmrg enum radeon_value_id ws_id = winsys_id_from_type(query->b.type); 3267e995a2eSmrg query->end_result = rctx->ws->query_value(rctx->ws, ws_id); 3277e995a2eSmrg break; 3287e995a2eSmrg } 3297e995a2eSmrg case R600_QUERY_GFX_BO_LIST_SIZE: 3307e995a2eSmrg ws_id = winsys_id_from_type(query->b.type); 3317e995a2eSmrg query->end_result = rctx->ws->query_value(rctx->ws, ws_id); 3327e995a2eSmrg query->end_time = rctx->ws->query_value(rctx->ws, 3337e995a2eSmrg RADEON_NUM_GFX_IBS); 3347e995a2eSmrg break; 3357e995a2eSmrg case R600_QUERY_CS_THREAD_BUSY: 3367e995a2eSmrg ws_id = winsys_id_from_type(query->b.type); 3377e995a2eSmrg query->end_result = rctx->ws->query_value(rctx->ws, ws_id); 3387e995a2eSmrg query->end_time = os_time_get_nano(); 3397e995a2eSmrg break; 3407e995a2eSmrg case R600_QUERY_GALLIUM_THREAD_BUSY: 3417e995a2eSmrg query->end_result = 3427e995a2eSmrg rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0; 3437e995a2eSmrg query->end_time = os_time_get_nano(); 3447e995a2eSmrg break; 3457e995a2eSmrg case R600_QUERY_GPU_LOAD: 3467e995a2eSmrg case R600_QUERY_GPU_SHADERS_BUSY: 3477e995a2eSmrg case R600_QUERY_GPU_TA_BUSY: 3487e995a2eSmrg case R600_QUERY_GPU_GDS_BUSY: 3497e995a2eSmrg case R600_QUERY_GPU_VGT_BUSY: 3507e995a2eSmrg case R600_QUERY_GPU_IA_BUSY: 3517e995a2eSmrg case R600_QUERY_GPU_SX_BUSY: 3527e995a2eSmrg case R600_QUERY_GPU_WD_BUSY: 3537e995a2eSmrg case R600_QUERY_GPU_BCI_BUSY: 3547e995a2eSmrg case R600_QUERY_GPU_SC_BUSY: 3557e995a2eSmrg case R600_QUERY_GPU_PA_BUSY: 3567e995a2eSmrg case R600_QUERY_GPU_DB_BUSY: 3577e995a2eSmrg case R600_QUERY_GPU_CP_BUSY: 3587e995a2eSmrg case R600_QUERY_GPU_CB_BUSY: 3597e995a2eSmrg case R600_QUERY_GPU_SDMA_BUSY: 3607e995a2eSmrg case R600_QUERY_GPU_PFP_BUSY: 3617e995a2eSmrg case R600_QUERY_GPU_MEQ_BUSY: 3627e995a2eSmrg case R600_QUERY_GPU_ME_BUSY: 3637e995a2eSmrg case R600_QUERY_GPU_SURF_SYNC_BUSY: 3647e995a2eSmrg case R600_QUERY_GPU_CP_DMA_BUSY: 3657e995a2eSmrg case R600_QUERY_GPU_SCRATCH_RAM_BUSY: 3667e995a2eSmrg query->end_result = r600_end_counter(rctx->screen, 3677e995a2eSmrg query->b.type, 3687e995a2eSmrg query->begin_result); 3697e995a2eSmrg query->begin_result = 0; 3707e995a2eSmrg break; 3717e995a2eSmrg case R600_QUERY_NUM_COMPILATIONS: 3727e995a2eSmrg query->end_result = p_atomic_read(&rctx->screen->num_compilations); 3737e995a2eSmrg break; 3747e995a2eSmrg case R600_QUERY_NUM_SHADERS_CREATED: 3757e995a2eSmrg query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); 3767e995a2eSmrg break; 3777e995a2eSmrg case R600_QUERY_NUM_SHADER_CACHE_HITS: 3787e995a2eSmrg query->end_result = 3797e995a2eSmrg p_atomic_read(&rctx->screen->num_shader_cache_hits); 3807e995a2eSmrg break; 3817e995a2eSmrg case R600_QUERY_GPIN_ASIC_ID: 3827e995a2eSmrg case R600_QUERY_GPIN_NUM_SIMD: 3837e995a2eSmrg case R600_QUERY_GPIN_NUM_RB: 3847e995a2eSmrg case R600_QUERY_GPIN_NUM_SPI: 3857e995a2eSmrg case R600_QUERY_GPIN_NUM_SE: 3867e995a2eSmrg break; 3877e995a2eSmrg default: 3887e995a2eSmrg unreachable("r600_query_sw_end: bad query type"); 3897e995a2eSmrg } 3907e995a2eSmrg 3917e995a2eSmrg return true; 3927e995a2eSmrg} 3937e995a2eSmrg 3947e995a2eSmrgstatic bool r600_query_sw_get_result(struct r600_common_context *rctx, 3957e995a2eSmrg struct r600_query *rquery, 3967e995a2eSmrg bool wait, 3977e995a2eSmrg union pipe_query_result *result) 3987e995a2eSmrg{ 3997e995a2eSmrg struct r600_query_sw *query = (struct r600_query_sw *)rquery; 4007e995a2eSmrg 4017e995a2eSmrg switch (query->b.type) { 4027e995a2eSmrg case PIPE_QUERY_TIMESTAMP_DISJOINT: 4037e995a2eSmrg /* Convert from cycles per millisecond to cycles per second (Hz). */ 4047e995a2eSmrg result->timestamp_disjoint.frequency = 4057e995a2eSmrg (uint64_t)rctx->screen->info.clock_crystal_freq * 1000; 4067e995a2eSmrg result->timestamp_disjoint.disjoint = false; 4077e995a2eSmrg return true; 4087e995a2eSmrg case PIPE_QUERY_GPU_FINISHED: { 4097e995a2eSmrg struct pipe_screen *screen = rctx->b.screen; 4107e995a2eSmrg struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b; 4117e995a2eSmrg 4127e995a2eSmrg result->b = screen->fence_finish(screen, ctx, query->fence, 4137e995a2eSmrg wait ? PIPE_TIMEOUT_INFINITE : 0); 4147e995a2eSmrg return result->b; 4157e995a2eSmrg } 4167e995a2eSmrg 4177e995a2eSmrg case R600_QUERY_GFX_BO_LIST_SIZE: 4187e995a2eSmrg result->u64 = (query->end_result - query->begin_result) / 4197e995a2eSmrg (query->end_time - query->begin_time); 4207e995a2eSmrg return true; 4217e995a2eSmrg case R600_QUERY_CS_THREAD_BUSY: 4227e995a2eSmrg case R600_QUERY_GALLIUM_THREAD_BUSY: 4237e995a2eSmrg result->u64 = (query->end_result - query->begin_result) * 100 / 4247e995a2eSmrg (query->end_time - query->begin_time); 4257e995a2eSmrg return true; 4267e995a2eSmrg case R600_QUERY_GPIN_ASIC_ID: 4277e995a2eSmrg result->u32 = 0; 4287e995a2eSmrg return true; 4297e995a2eSmrg case R600_QUERY_GPIN_NUM_SIMD: 4307e995a2eSmrg result->u32 = rctx->screen->info.num_good_compute_units; 4317e995a2eSmrg return true; 4327e995a2eSmrg case R600_QUERY_GPIN_NUM_RB: 4331463c08dSmrg result->u32 = rctx->screen->info.max_render_backends; 4347e995a2eSmrg return true; 4357e995a2eSmrg case R600_QUERY_GPIN_NUM_SPI: 4367e995a2eSmrg result->u32 = 1; /* all supported chips have one SPI per SE */ 4377e995a2eSmrg return true; 4387e995a2eSmrg case R600_QUERY_GPIN_NUM_SE: 4397e995a2eSmrg result->u32 = rctx->screen->info.max_se; 4407e995a2eSmrg return true; 4417e995a2eSmrg } 4427e995a2eSmrg 4437e995a2eSmrg result->u64 = query->end_result - query->begin_result; 4447e995a2eSmrg 4457e995a2eSmrg switch (query->b.type) { 4467e995a2eSmrg case R600_QUERY_BUFFER_WAIT_TIME: 4477e995a2eSmrg case R600_QUERY_GPU_TEMPERATURE: 4487e995a2eSmrg result->u64 /= 1000; 4497e995a2eSmrg break; 4507e995a2eSmrg case R600_QUERY_CURRENT_GPU_SCLK: 4517e995a2eSmrg case R600_QUERY_CURRENT_GPU_MCLK: 4527e995a2eSmrg result->u64 *= 1000000; 4537e995a2eSmrg break; 4547e995a2eSmrg } 4557e995a2eSmrg 4567e995a2eSmrg return true; 4577e995a2eSmrg} 4587e995a2eSmrg 4597e995a2eSmrg 4607e995a2eSmrgstatic struct r600_query_ops sw_query_ops = { 4617e995a2eSmrg .destroy = r600_query_sw_destroy, 4627e995a2eSmrg .begin = r600_query_sw_begin, 4637e995a2eSmrg .end = r600_query_sw_end, 4647e995a2eSmrg .get_result = r600_query_sw_get_result, 4657e995a2eSmrg .get_result_resource = NULL 4667e995a2eSmrg}; 4677e995a2eSmrg 4687e995a2eSmrgstatic struct pipe_query *r600_query_sw_create(unsigned query_type) 4697e995a2eSmrg{ 4707e995a2eSmrg struct r600_query_sw *query; 4717e995a2eSmrg 4727e995a2eSmrg query = CALLOC_STRUCT(r600_query_sw); 4737e995a2eSmrg if (!query) 4747e995a2eSmrg return NULL; 4757e995a2eSmrg 4767e995a2eSmrg query->b.type = query_type; 4777e995a2eSmrg query->b.ops = &sw_query_ops; 4787e995a2eSmrg 4797e995a2eSmrg return (struct pipe_query *)query; 4807e995a2eSmrg} 4817e995a2eSmrg 4827e995a2eSmrgvoid r600_query_hw_destroy(struct r600_common_screen *rscreen, 4837e995a2eSmrg struct r600_query *rquery) 4847e995a2eSmrg{ 4857e995a2eSmrg struct r600_query_hw *query = (struct r600_query_hw *)rquery; 4867e995a2eSmrg struct r600_query_buffer *prev = query->buffer.previous; 4877e995a2eSmrg 4887e995a2eSmrg /* Release all query buffers. */ 4897e995a2eSmrg while (prev) { 4907e995a2eSmrg struct r600_query_buffer *qbuf = prev; 4917e995a2eSmrg prev = prev->previous; 4927e995a2eSmrg r600_resource_reference(&qbuf->buf, NULL); 4937e995a2eSmrg FREE(qbuf); 4947e995a2eSmrg } 4957e995a2eSmrg 4967e995a2eSmrg r600_resource_reference(&query->buffer.buf, NULL); 4977e995a2eSmrg FREE(rquery); 4987e995a2eSmrg} 4997e995a2eSmrg 5007e995a2eSmrgstatic struct r600_resource *r600_new_query_buffer(struct r600_common_screen *rscreen, 5017e995a2eSmrg struct r600_query_hw *query) 5027e995a2eSmrg{ 5037e995a2eSmrg unsigned buf_size = MAX2(query->result_size, 5047e995a2eSmrg rscreen->info.min_alloc_size); 5057e995a2eSmrg 5067e995a2eSmrg /* Queries are normally read by the CPU after 5077e995a2eSmrg * being written by the gpu, hence staging is probably a good 5087e995a2eSmrg * usage pattern. 5097e995a2eSmrg */ 5107e995a2eSmrg struct r600_resource *buf = (struct r600_resource*) 5117e995a2eSmrg pipe_buffer_create(&rscreen->b, 0, 5127e995a2eSmrg PIPE_USAGE_STAGING, buf_size); 5137e995a2eSmrg if (!buf) 5147e995a2eSmrg return NULL; 5157e995a2eSmrg 5167e995a2eSmrg if (!query->ops->prepare_buffer(rscreen, query, buf)) { 5177e995a2eSmrg r600_resource_reference(&buf, NULL); 5187e995a2eSmrg return NULL; 5197e995a2eSmrg } 5207e995a2eSmrg 5217e995a2eSmrg return buf; 5227e995a2eSmrg} 5237e995a2eSmrg 5247e995a2eSmrgstatic bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen, 5257e995a2eSmrg struct r600_query_hw *query, 5267e995a2eSmrg struct r600_resource *buffer) 5277e995a2eSmrg{ 5287e995a2eSmrg /* Callers ensure that the buffer is currently unused by the GPU. */ 5291463c08dSmrg uint32_t *results = rscreen->ws->buffer_map(rscreen->ws, buffer->buf, NULL, 5301463c08dSmrg PIPE_MAP_WRITE | 5311463c08dSmrg PIPE_MAP_UNSYNCHRONIZED); 5327e995a2eSmrg if (!results) 5337e995a2eSmrg return false; 5347e995a2eSmrg 5357e995a2eSmrg memset(results, 0, buffer->b.b.width0); 5367e995a2eSmrg 5377e995a2eSmrg if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || 5387e995a2eSmrg query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || 5397e995a2eSmrg query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { 5401463c08dSmrg unsigned max_rbs = rscreen->info.max_render_backends; 5417e995a2eSmrg unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask; 5427e995a2eSmrg unsigned num_results; 5437e995a2eSmrg unsigned i, j; 5447e995a2eSmrg 5457e995a2eSmrg /* Set top bits for unused backends. */ 5467e995a2eSmrg num_results = buffer->b.b.width0 / query->result_size; 5477e995a2eSmrg for (j = 0; j < num_results; j++) { 5487e995a2eSmrg for (i = 0; i < max_rbs; i++) { 5497e995a2eSmrg if (!(enabled_rb_mask & (1<<i))) { 5507e995a2eSmrg results[(i * 4)+1] = 0x80000000; 5517e995a2eSmrg results[(i * 4)+3] = 0x80000000; 5527e995a2eSmrg } 5537e995a2eSmrg } 5547e995a2eSmrg results += 4 * max_rbs; 5557e995a2eSmrg } 5567e995a2eSmrg } 5577e995a2eSmrg 5587e995a2eSmrg return true; 5597e995a2eSmrg} 5607e995a2eSmrg 5617e995a2eSmrgstatic void r600_query_hw_get_result_resource(struct r600_common_context *rctx, 5627e995a2eSmrg struct r600_query *rquery, 5637e995a2eSmrg bool wait, 5647e995a2eSmrg enum pipe_query_value_type result_type, 5657e995a2eSmrg int index, 5667e995a2eSmrg struct pipe_resource *resource, 5677e995a2eSmrg unsigned offset); 5687e995a2eSmrg 5697e995a2eSmrgstatic struct r600_query_ops query_hw_ops = { 5707e995a2eSmrg .destroy = r600_query_hw_destroy, 5717e995a2eSmrg .begin = r600_query_hw_begin, 5727e995a2eSmrg .end = r600_query_hw_end, 5737e995a2eSmrg .get_result = r600_query_hw_get_result, 5747e995a2eSmrg .get_result_resource = r600_query_hw_get_result_resource, 5757e995a2eSmrg}; 5767e995a2eSmrg 5777e995a2eSmrgstatic void r600_query_hw_do_emit_start(struct r600_common_context *ctx, 5787e995a2eSmrg struct r600_query_hw *query, 5797e995a2eSmrg struct r600_resource *buffer, 5807e995a2eSmrg uint64_t va); 5817e995a2eSmrgstatic void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, 5827e995a2eSmrg struct r600_query_hw *query, 5837e995a2eSmrg struct r600_resource *buffer, 5847e995a2eSmrg uint64_t va); 5857e995a2eSmrgstatic void r600_query_hw_add_result(struct r600_common_screen *rscreen, 5867e995a2eSmrg struct r600_query_hw *, void *buffer, 5877e995a2eSmrg union pipe_query_result *result); 5887e995a2eSmrgstatic void r600_query_hw_clear_result(struct r600_query_hw *, 5897e995a2eSmrg union pipe_query_result *); 5907e995a2eSmrg 5917e995a2eSmrgstatic struct r600_query_hw_ops query_hw_default_hw_ops = { 5927e995a2eSmrg .prepare_buffer = r600_query_hw_prepare_buffer, 5937e995a2eSmrg .emit_start = r600_query_hw_do_emit_start, 5947e995a2eSmrg .emit_stop = r600_query_hw_do_emit_stop, 5957e995a2eSmrg .clear_result = r600_query_hw_clear_result, 5967e995a2eSmrg .add_result = r600_query_hw_add_result, 5977e995a2eSmrg}; 5987e995a2eSmrg 5997e995a2eSmrgbool r600_query_hw_init(struct r600_common_screen *rscreen, 6007e995a2eSmrg struct r600_query_hw *query) 6017e995a2eSmrg{ 6027e995a2eSmrg query->buffer.buf = r600_new_query_buffer(rscreen, query); 6037e995a2eSmrg if (!query->buffer.buf) 6047e995a2eSmrg return false; 6057e995a2eSmrg 6067e995a2eSmrg return true; 6077e995a2eSmrg} 6087e995a2eSmrg 6097e995a2eSmrgstatic struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscreen, 6107e995a2eSmrg unsigned query_type, 6117e995a2eSmrg unsigned index) 6127e995a2eSmrg{ 6137e995a2eSmrg struct r600_query_hw *query = CALLOC_STRUCT(r600_query_hw); 6147e995a2eSmrg if (!query) 6157e995a2eSmrg return NULL; 6167e995a2eSmrg 6177e995a2eSmrg query->b.type = query_type; 6187e995a2eSmrg query->b.ops = &query_hw_ops; 6197e995a2eSmrg query->ops = &query_hw_default_hw_ops; 6207e995a2eSmrg 6217e995a2eSmrg switch (query_type) { 6227e995a2eSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 6237e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 6247e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 6251463c08dSmrg query->result_size = 16 * rscreen->info.max_render_backends; 6267e995a2eSmrg query->result_size += 16; /* for the fence + alignment */ 6277e995a2eSmrg query->num_cs_dw_begin = 6; 6287e995a2eSmrg query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen); 6297e995a2eSmrg break; 6307e995a2eSmrg case PIPE_QUERY_TIME_ELAPSED: 6317e995a2eSmrg query->result_size = 24; 6327e995a2eSmrg query->num_cs_dw_begin = 8; 6337e995a2eSmrg query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen); 6347e995a2eSmrg break; 6357e995a2eSmrg case PIPE_QUERY_TIMESTAMP: 6367e995a2eSmrg query->result_size = 16; 6377e995a2eSmrg query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rscreen); 6387e995a2eSmrg query->flags = R600_QUERY_HW_FLAG_NO_START; 6397e995a2eSmrg break; 6407e995a2eSmrg case PIPE_QUERY_PRIMITIVES_EMITTED: 6417e995a2eSmrg case PIPE_QUERY_PRIMITIVES_GENERATED: 6427e995a2eSmrg case PIPE_QUERY_SO_STATISTICS: 6437e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 6447e995a2eSmrg /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ 6457e995a2eSmrg query->result_size = 32; 6467e995a2eSmrg query->num_cs_dw_begin = 6; 6477e995a2eSmrg query->num_cs_dw_end = 6; 6487e995a2eSmrg query->stream = index; 6497e995a2eSmrg break; 6507e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 6517e995a2eSmrg /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ 6527e995a2eSmrg query->result_size = 32 * R600_MAX_STREAMS; 6537e995a2eSmrg query->num_cs_dw_begin = 6 * R600_MAX_STREAMS; 6547e995a2eSmrg query->num_cs_dw_end = 6 * R600_MAX_STREAMS; 6557e995a2eSmrg break; 6567e995a2eSmrg case PIPE_QUERY_PIPELINE_STATISTICS: 6577e995a2eSmrg /* 11 values on EG, 8 on R600. */ 6587e995a2eSmrg query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16; 6597e995a2eSmrg query->result_size += 8; /* for the fence + alignment */ 6607e995a2eSmrg query->num_cs_dw_begin = 6; 6617e995a2eSmrg query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rscreen); 6627e995a2eSmrg break; 6637e995a2eSmrg default: 6647e995a2eSmrg assert(0); 6657e995a2eSmrg FREE(query); 6667e995a2eSmrg return NULL; 6677e995a2eSmrg } 6687e995a2eSmrg 6697e995a2eSmrg if (!r600_query_hw_init(rscreen, query)) { 6707e995a2eSmrg FREE(query); 6717e995a2eSmrg return NULL; 6727e995a2eSmrg } 6737e995a2eSmrg 6747e995a2eSmrg return (struct pipe_query *)query; 6757e995a2eSmrg} 6767e995a2eSmrg 6777e995a2eSmrgstatic void r600_update_occlusion_query_state(struct r600_common_context *rctx, 6787e995a2eSmrg unsigned type, int diff) 6797e995a2eSmrg{ 6807e995a2eSmrg if (type == PIPE_QUERY_OCCLUSION_COUNTER || 6817e995a2eSmrg type == PIPE_QUERY_OCCLUSION_PREDICATE || 6827e995a2eSmrg type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { 6837e995a2eSmrg bool old_enable = rctx->num_occlusion_queries != 0; 6847e995a2eSmrg bool old_perfect_enable = 6857e995a2eSmrg rctx->num_perfect_occlusion_queries != 0; 6867e995a2eSmrg bool enable, perfect_enable; 6877e995a2eSmrg 6887e995a2eSmrg rctx->num_occlusion_queries += diff; 6897e995a2eSmrg assert(rctx->num_occlusion_queries >= 0); 6907e995a2eSmrg 6917e995a2eSmrg if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { 6927e995a2eSmrg rctx->num_perfect_occlusion_queries += diff; 6937e995a2eSmrg assert(rctx->num_perfect_occlusion_queries >= 0); 6947e995a2eSmrg } 6957e995a2eSmrg 6967e995a2eSmrg enable = rctx->num_occlusion_queries != 0; 6977e995a2eSmrg perfect_enable = rctx->num_perfect_occlusion_queries != 0; 6987e995a2eSmrg 6997e995a2eSmrg if (enable != old_enable || perfect_enable != old_perfect_enable) { 7007e995a2eSmrg struct r600_context *ctx = (struct r600_context*)rctx; 7017e995a2eSmrg r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom); 7027e995a2eSmrg } 7037e995a2eSmrg } 7047e995a2eSmrg} 7057e995a2eSmrg 7067e995a2eSmrgstatic unsigned event_type_for_stream(unsigned stream) 7077e995a2eSmrg{ 7087e995a2eSmrg switch (stream) { 7097e995a2eSmrg default: 7107e995a2eSmrg case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS; 7117e995a2eSmrg case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1; 7127e995a2eSmrg case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2; 7137e995a2eSmrg case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3; 7147e995a2eSmrg } 7157e995a2eSmrg} 7167e995a2eSmrg 7177e995a2eSmrgstatic void emit_sample_streamout(struct radeon_cmdbuf *cs, uint64_t va, 7187e995a2eSmrg unsigned stream) 7197e995a2eSmrg{ 7207e995a2eSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 7217e995a2eSmrg radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3)); 7227e995a2eSmrg radeon_emit(cs, va); 7237e995a2eSmrg radeon_emit(cs, va >> 32); 7247e995a2eSmrg} 7257e995a2eSmrg 7267e995a2eSmrgstatic void r600_query_hw_do_emit_start(struct r600_common_context *ctx, 7277e995a2eSmrg struct r600_query_hw *query, 7287e995a2eSmrg struct r600_resource *buffer, 7297e995a2eSmrg uint64_t va) 7307e995a2eSmrg{ 7311463c08dSmrg struct radeon_cmdbuf *cs = &ctx->gfx.cs; 7327e995a2eSmrg 7337e995a2eSmrg switch (query->b.type) { 7347e995a2eSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 7357e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 7367e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 7377e995a2eSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 7387e995a2eSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 7397e995a2eSmrg radeon_emit(cs, va); 7407e995a2eSmrg radeon_emit(cs, va >> 32); 7417e995a2eSmrg break; 7427e995a2eSmrg case PIPE_QUERY_PRIMITIVES_EMITTED: 7437e995a2eSmrg case PIPE_QUERY_PRIMITIVES_GENERATED: 7447e995a2eSmrg case PIPE_QUERY_SO_STATISTICS: 7457e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 7467e995a2eSmrg emit_sample_streamout(cs, va, query->stream); 7477e995a2eSmrg break; 7487e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 7497e995a2eSmrg for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) 7507e995a2eSmrg emit_sample_streamout(cs, va + 32 * stream, stream); 7517e995a2eSmrg break; 7527e995a2eSmrg case PIPE_QUERY_TIME_ELAPSED: 7537e995a2eSmrg /* Write the timestamp after the last draw is done. 7547e995a2eSmrg * (bottom-of-pipe) 7557e995a2eSmrg */ 7567e995a2eSmrg r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 7577e995a2eSmrg 0, EOP_DATA_SEL_TIMESTAMP, 7587e995a2eSmrg NULL, va, 0, query->b.type); 7597e995a2eSmrg break; 7607e995a2eSmrg case PIPE_QUERY_PIPELINE_STATISTICS: 7617e995a2eSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 7627e995a2eSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); 7637e995a2eSmrg radeon_emit(cs, va); 7647e995a2eSmrg radeon_emit(cs, va >> 32); 7657e995a2eSmrg break; 7667e995a2eSmrg default: 7677e995a2eSmrg assert(0); 7687e995a2eSmrg } 7697e995a2eSmrg r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, 7707e995a2eSmrg RADEON_PRIO_QUERY); 7717e995a2eSmrg} 7727e995a2eSmrg 7737e995a2eSmrgstatic void r600_query_hw_emit_start(struct r600_common_context *ctx, 7747e995a2eSmrg struct r600_query_hw *query) 7757e995a2eSmrg{ 7767e995a2eSmrg uint64_t va; 7777e995a2eSmrg 7787e995a2eSmrg if (!query->buffer.buf) 7797e995a2eSmrg return; // previous buffer allocation failure 7807e995a2eSmrg 7817e995a2eSmrg r600_update_occlusion_query_state(ctx, query->b.type, 1); 7827e995a2eSmrg r600_update_prims_generated_query_state(ctx, query->b.type, 1); 7837e995a2eSmrg 7847e995a2eSmrg ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + query->num_cs_dw_end, 7857e995a2eSmrg true); 7867e995a2eSmrg 7877e995a2eSmrg /* Get a new query buffer if needed. */ 7887e995a2eSmrg if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { 7897e995a2eSmrg struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); 7907e995a2eSmrg *qbuf = query->buffer; 7917e995a2eSmrg query->buffer.results_end = 0; 7927e995a2eSmrg query->buffer.previous = qbuf; 7937e995a2eSmrg query->buffer.buf = r600_new_query_buffer(ctx->screen, query); 7947e995a2eSmrg if (!query->buffer.buf) 7957e995a2eSmrg return; 7967e995a2eSmrg } 7977e995a2eSmrg 7987e995a2eSmrg /* emit begin query */ 7997e995a2eSmrg va = query->buffer.buf->gpu_address + query->buffer.results_end; 8007e995a2eSmrg 8017e995a2eSmrg query->ops->emit_start(ctx, query, query->buffer.buf, va); 8027e995a2eSmrg 8037e995a2eSmrg ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end; 8047e995a2eSmrg} 8057e995a2eSmrg 8067e995a2eSmrgstatic void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, 8077e995a2eSmrg struct r600_query_hw *query, 8087e995a2eSmrg struct r600_resource *buffer, 8097e995a2eSmrg uint64_t va) 8107e995a2eSmrg{ 8111463c08dSmrg struct radeon_cmdbuf *cs = &ctx->gfx.cs; 8127e995a2eSmrg uint64_t fence_va = 0; 8137e995a2eSmrg 8147e995a2eSmrg switch (query->b.type) { 8157e995a2eSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 8167e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 8177e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 8187e995a2eSmrg va += 8; 8197e995a2eSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 8207e995a2eSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 8217e995a2eSmrg radeon_emit(cs, va); 8227e995a2eSmrg radeon_emit(cs, va >> 32); 8237e995a2eSmrg 8241463c08dSmrg fence_va = va + ctx->screen->info.max_render_backends * 16 - 8; 8257e995a2eSmrg break; 8267e995a2eSmrg case PIPE_QUERY_PRIMITIVES_EMITTED: 8277e995a2eSmrg case PIPE_QUERY_PRIMITIVES_GENERATED: 8287e995a2eSmrg case PIPE_QUERY_SO_STATISTICS: 8297e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 8307e995a2eSmrg va += 16; 8317e995a2eSmrg emit_sample_streamout(cs, va, query->stream); 8327e995a2eSmrg break; 8337e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 8347e995a2eSmrg va += 16; 8357e995a2eSmrg for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) 8367e995a2eSmrg emit_sample_streamout(cs, va + 32 * stream, stream); 8377e995a2eSmrg break; 8387e995a2eSmrg case PIPE_QUERY_TIME_ELAPSED: 8397e995a2eSmrg va += 8; 8401463c08dSmrg FALLTHROUGH; 8417e995a2eSmrg case PIPE_QUERY_TIMESTAMP: 8427e995a2eSmrg r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 8437e995a2eSmrg 0, EOP_DATA_SEL_TIMESTAMP, NULL, va, 8447e995a2eSmrg 0, query->b.type); 8457e995a2eSmrg fence_va = va + 8; 8467e995a2eSmrg break; 8477e995a2eSmrg case PIPE_QUERY_PIPELINE_STATISTICS: { 8487e995a2eSmrg unsigned sample_size = (query->result_size - 8) / 2; 8497e995a2eSmrg 8507e995a2eSmrg va += sample_size; 8517e995a2eSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 8527e995a2eSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); 8537e995a2eSmrg radeon_emit(cs, va); 8547e995a2eSmrg radeon_emit(cs, va >> 32); 8557e995a2eSmrg 8567e995a2eSmrg fence_va = va + sample_size; 8577e995a2eSmrg break; 8587e995a2eSmrg } 8597e995a2eSmrg default: 8607e995a2eSmrg assert(0); 8617e995a2eSmrg } 8627e995a2eSmrg r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, 8637e995a2eSmrg RADEON_PRIO_QUERY); 8647e995a2eSmrg 8657e995a2eSmrg if (fence_va) 8667e995a2eSmrg r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 8677e995a2eSmrg EOP_DATA_SEL_VALUE_32BIT, 8687e995a2eSmrg query->buffer.buf, fence_va, 0x80000000, 8697e995a2eSmrg query->b.type); 8707e995a2eSmrg} 8717e995a2eSmrg 8727e995a2eSmrgstatic void r600_query_hw_emit_stop(struct r600_common_context *ctx, 8737e995a2eSmrg struct r600_query_hw *query) 8747e995a2eSmrg{ 8757e995a2eSmrg uint64_t va; 8767e995a2eSmrg 8777e995a2eSmrg if (!query->buffer.buf) 8787e995a2eSmrg return; // previous buffer allocation failure 8797e995a2eSmrg 8807e995a2eSmrg /* The queries which need begin already called this in begin_query. */ 8817e995a2eSmrg if (query->flags & R600_QUERY_HW_FLAG_NO_START) { 8827e995a2eSmrg ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, false); 8837e995a2eSmrg } 8847e995a2eSmrg 8857e995a2eSmrg /* emit end query */ 8867e995a2eSmrg va = query->buffer.buf->gpu_address + query->buffer.results_end; 8877e995a2eSmrg 8887e995a2eSmrg query->ops->emit_stop(ctx, query, query->buffer.buf, va); 8897e995a2eSmrg 8907e995a2eSmrg query->buffer.results_end += query->result_size; 8917e995a2eSmrg 8927e995a2eSmrg if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) 8937e995a2eSmrg ctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end; 8947e995a2eSmrg 8957e995a2eSmrg r600_update_occlusion_query_state(ctx, query->b.type, -1); 8967e995a2eSmrg r600_update_prims_generated_query_state(ctx, query->b.type, -1); 8977e995a2eSmrg} 8987e995a2eSmrg 8997e995a2eSmrgstatic void emit_set_predicate(struct r600_common_context *ctx, 9007e995a2eSmrg struct r600_resource *buf, uint64_t va, 9017e995a2eSmrg uint32_t op) 9027e995a2eSmrg{ 9031463c08dSmrg struct radeon_cmdbuf *cs = &ctx->gfx.cs; 9047e995a2eSmrg 9057e995a2eSmrg radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); 9067e995a2eSmrg radeon_emit(cs, va); 9077e995a2eSmrg radeon_emit(cs, op | ((va >> 32) & 0xFF)); 9087e995a2eSmrg r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ, 9097e995a2eSmrg RADEON_PRIO_QUERY); 9107e995a2eSmrg} 9117e995a2eSmrg 9127e995a2eSmrgstatic void r600_emit_query_predication(struct r600_common_context *ctx, 9137e995a2eSmrg struct r600_atom *atom) 9147e995a2eSmrg{ 9157e995a2eSmrg struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond; 9167e995a2eSmrg struct r600_query_buffer *qbuf; 9177e995a2eSmrg uint32_t op; 9187e995a2eSmrg bool flag_wait, invert; 9197e995a2eSmrg 9207e995a2eSmrg if (!query) 9217e995a2eSmrg return; 9227e995a2eSmrg 9237e995a2eSmrg invert = ctx->render_cond_invert; 9247e995a2eSmrg flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT || 9257e995a2eSmrg ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT; 9267e995a2eSmrg 9277e995a2eSmrg switch (query->b.type) { 9287e995a2eSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 9297e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 9307e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 9317e995a2eSmrg op = PRED_OP(PREDICATION_OP_ZPASS); 9327e995a2eSmrg break; 9337e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 9347e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 9357e995a2eSmrg op = PRED_OP(PREDICATION_OP_PRIMCOUNT); 9367e995a2eSmrg invert = !invert; 9377e995a2eSmrg break; 9387e995a2eSmrg default: 9397e995a2eSmrg assert(0); 9407e995a2eSmrg return; 9417e995a2eSmrg } 9427e995a2eSmrg 9437e995a2eSmrg /* if true then invert, see GL_ARB_conditional_render_inverted */ 9447e995a2eSmrg if (invert) 9457e995a2eSmrg op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visible or overflow */ 9467e995a2eSmrg else 9477e995a2eSmrg op |= PREDICATION_DRAW_VISIBLE; /* Draw if visible or no overflow */ 9487e995a2eSmrg 9497e995a2eSmrg op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW; 9507e995a2eSmrg 9517e995a2eSmrg /* emit predicate packets for all data blocks */ 9527e995a2eSmrg for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 9537e995a2eSmrg unsigned results_base = 0; 9547e995a2eSmrg uint64_t va_base = qbuf->buf->gpu_address; 9557e995a2eSmrg 9567e995a2eSmrg while (results_base < qbuf->results_end) { 9577e995a2eSmrg uint64_t va = va_base + results_base; 9587e995a2eSmrg 9597e995a2eSmrg if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { 9607e995a2eSmrg for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) { 9617e995a2eSmrg emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op); 9627e995a2eSmrg 9637e995a2eSmrg /* set CONTINUE bit for all packets except the first */ 9647e995a2eSmrg op |= PREDICATION_CONTINUE; 9657e995a2eSmrg } 9667e995a2eSmrg } else { 9677e995a2eSmrg emit_set_predicate(ctx, qbuf->buf, va, op); 9687e995a2eSmrg op |= PREDICATION_CONTINUE; 9697e995a2eSmrg } 9707e995a2eSmrg 9717e995a2eSmrg results_base += query->result_size; 9727e995a2eSmrg } 9737e995a2eSmrg } 9747e995a2eSmrg} 9757e995a2eSmrg 9767e995a2eSmrgstatic struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) 9777e995a2eSmrg{ 9787e995a2eSmrg struct r600_common_screen *rscreen = 9797e995a2eSmrg (struct r600_common_screen *)ctx->screen; 9807e995a2eSmrg 9817e995a2eSmrg if (query_type == PIPE_QUERY_TIMESTAMP_DISJOINT || 9827e995a2eSmrg query_type == PIPE_QUERY_GPU_FINISHED || 9837e995a2eSmrg query_type >= PIPE_QUERY_DRIVER_SPECIFIC) 9847e995a2eSmrg return r600_query_sw_create(query_type); 9857e995a2eSmrg 9867e995a2eSmrg return r600_query_hw_create(rscreen, query_type, index); 9877e995a2eSmrg} 9887e995a2eSmrg 9897e995a2eSmrgstatic void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) 9907e995a2eSmrg{ 9917e995a2eSmrg struct r600_common_context *rctx = (struct r600_common_context *)ctx; 9927e995a2eSmrg struct r600_query *rquery = (struct r600_query *)query; 9937e995a2eSmrg 9947e995a2eSmrg rquery->ops->destroy(rctx->screen, rquery); 9957e995a2eSmrg} 9967e995a2eSmrg 9971463c08dSmrgstatic bool r600_begin_query(struct pipe_context *ctx, 9981463c08dSmrg struct pipe_query *query) 9997e995a2eSmrg{ 10007e995a2eSmrg struct r600_common_context *rctx = (struct r600_common_context *)ctx; 10017e995a2eSmrg struct r600_query *rquery = (struct r600_query *)query; 10027e995a2eSmrg 10037e995a2eSmrg return rquery->ops->begin(rctx, rquery); 10047e995a2eSmrg} 10057e995a2eSmrg 10067e995a2eSmrgvoid r600_query_hw_reset_buffers(struct r600_common_context *rctx, 10077e995a2eSmrg struct r600_query_hw *query) 10087e995a2eSmrg{ 10097e995a2eSmrg struct r600_query_buffer *prev = query->buffer.previous; 10107e995a2eSmrg 10117e995a2eSmrg /* Discard the old query buffers. */ 10127e995a2eSmrg while (prev) { 10137e995a2eSmrg struct r600_query_buffer *qbuf = prev; 10147e995a2eSmrg prev = prev->previous; 10157e995a2eSmrg r600_resource_reference(&qbuf->buf, NULL); 10167e995a2eSmrg FREE(qbuf); 10177e995a2eSmrg } 10187e995a2eSmrg 10197e995a2eSmrg query->buffer.results_end = 0; 10207e995a2eSmrg query->buffer.previous = NULL; 10217e995a2eSmrg 10227e995a2eSmrg /* Obtain a new buffer if the current one can't be mapped without a stall. */ 10237e995a2eSmrg if (r600_rings_is_buffer_referenced(rctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) || 10241463c08dSmrg !rctx->ws->buffer_wait(rctx->ws, query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { 10257e995a2eSmrg r600_resource_reference(&query->buffer.buf, NULL); 10267e995a2eSmrg query->buffer.buf = r600_new_query_buffer(rctx->screen, query); 10277e995a2eSmrg } else { 10287e995a2eSmrg if (!query->ops->prepare_buffer(rctx->screen, query, query->buffer.buf)) 10297e995a2eSmrg r600_resource_reference(&query->buffer.buf, NULL); 10307e995a2eSmrg } 10317e995a2eSmrg} 10327e995a2eSmrg 10337e995a2eSmrgbool r600_query_hw_begin(struct r600_common_context *rctx, 10347e995a2eSmrg struct r600_query *rquery) 10357e995a2eSmrg{ 10367e995a2eSmrg struct r600_query_hw *query = (struct r600_query_hw *)rquery; 10377e995a2eSmrg 10387e995a2eSmrg if (query->flags & R600_QUERY_HW_FLAG_NO_START) { 10397e995a2eSmrg assert(0); 10407e995a2eSmrg return false; 10417e995a2eSmrg } 10427e995a2eSmrg 10437e995a2eSmrg if (!(query->flags & R600_QUERY_HW_FLAG_BEGIN_RESUMES)) 10447e995a2eSmrg r600_query_hw_reset_buffers(rctx, query); 10457e995a2eSmrg 10467e995a2eSmrg r600_query_hw_emit_start(rctx, query); 10477e995a2eSmrg if (!query->buffer.buf) 10487e995a2eSmrg return false; 10497e995a2eSmrg 10501463c08dSmrg list_addtail(&query->list, &rctx->active_queries); 10517e995a2eSmrg return true; 10527e995a2eSmrg} 10537e995a2eSmrg 10547e995a2eSmrgstatic bool r600_end_query(struct pipe_context *ctx, struct pipe_query *query) 10557e995a2eSmrg{ 10567e995a2eSmrg struct r600_common_context *rctx = (struct r600_common_context *)ctx; 10577e995a2eSmrg struct r600_query *rquery = (struct r600_query *)query; 10587e995a2eSmrg 10597e995a2eSmrg return rquery->ops->end(rctx, rquery); 10607e995a2eSmrg} 10617e995a2eSmrg 10627e995a2eSmrgbool r600_query_hw_end(struct r600_common_context *rctx, 10637e995a2eSmrg struct r600_query *rquery) 10647e995a2eSmrg{ 10657e995a2eSmrg struct r600_query_hw *query = (struct r600_query_hw *)rquery; 10667e995a2eSmrg 10677e995a2eSmrg if (query->flags & R600_QUERY_HW_FLAG_NO_START) 10687e995a2eSmrg r600_query_hw_reset_buffers(rctx, query); 10697e995a2eSmrg 10707e995a2eSmrg r600_query_hw_emit_stop(rctx, query); 10717e995a2eSmrg 10727e995a2eSmrg if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) 10731463c08dSmrg list_delinit(&query->list); 10747e995a2eSmrg 10757e995a2eSmrg if (!query->buffer.buf) 10767e995a2eSmrg return false; 10777e995a2eSmrg 10787e995a2eSmrg return true; 10797e995a2eSmrg} 10807e995a2eSmrg 10817e995a2eSmrgstatic void r600_get_hw_query_params(struct r600_common_context *rctx, 10827e995a2eSmrg struct r600_query_hw *rquery, int index, 10837e995a2eSmrg struct r600_hw_query_params *params) 10847e995a2eSmrg{ 10851463c08dSmrg unsigned max_rbs = rctx->screen->info.max_render_backends; 10867e995a2eSmrg 10877e995a2eSmrg params->pair_stride = 0; 10887e995a2eSmrg params->pair_count = 1; 10897e995a2eSmrg 10907e995a2eSmrg switch (rquery->b.type) { 10917e995a2eSmrg case PIPE_QUERY_OCCLUSION_COUNTER: 10927e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 10937e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: 10947e995a2eSmrg params->start_offset = 0; 10957e995a2eSmrg params->end_offset = 8; 10967e995a2eSmrg params->fence_offset = max_rbs * 16; 10977e995a2eSmrg params->pair_stride = 16; 10987e995a2eSmrg params->pair_count = max_rbs; 10997e995a2eSmrg break; 11007e995a2eSmrg case PIPE_QUERY_TIME_ELAPSED: 11017e995a2eSmrg params->start_offset = 0; 11027e995a2eSmrg params->end_offset = 8; 11037e995a2eSmrg params->fence_offset = 16; 11047e995a2eSmrg break; 11057e995a2eSmrg case PIPE_QUERY_TIMESTAMP: 11067e995a2eSmrg params->start_offset = 0; 11077e995a2eSmrg params->end_offset = 0; 11087e995a2eSmrg params->fence_offset = 8; 11097e995a2eSmrg break; 11107e995a2eSmrg case PIPE_QUERY_PRIMITIVES_EMITTED: 11117e995a2eSmrg params->start_offset = 8; 11127e995a2eSmrg params->end_offset = 24; 11137e995a2eSmrg params->fence_offset = params->end_offset + 4; 11147e995a2eSmrg break; 11157e995a2eSmrg case PIPE_QUERY_PRIMITIVES_GENERATED: 11167e995a2eSmrg params->start_offset = 0; 11177e995a2eSmrg params->end_offset = 16; 11187e995a2eSmrg params->fence_offset = params->end_offset + 4; 11197e995a2eSmrg break; 11207e995a2eSmrg case PIPE_QUERY_SO_STATISTICS: 11217e995a2eSmrg params->start_offset = 8 - index * 8; 11227e995a2eSmrg params->end_offset = 24 - index * 8; 11237e995a2eSmrg params->fence_offset = params->end_offset + 4; 11247e995a2eSmrg break; 11257e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 11267e995a2eSmrg params->pair_count = R600_MAX_STREAMS; 11277e995a2eSmrg params->pair_stride = 32; 11281463c08dSmrg FALLTHROUGH; 11297e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 11307e995a2eSmrg params->start_offset = 0; 11317e995a2eSmrg params->end_offset = 16; 11327e995a2eSmrg 11337e995a2eSmrg /* We can re-use the high dword of the last 64-bit value as a 11347e995a2eSmrg * fence: it is initialized as 0, and the high bit is set by 11357e995a2eSmrg * the write of the streamout stats event. 11367e995a2eSmrg */ 11377e995a2eSmrg params->fence_offset = rquery->result_size - 4; 11387e995a2eSmrg break; 11397e995a2eSmrg case PIPE_QUERY_PIPELINE_STATISTICS: 11407e995a2eSmrg { 11417e995a2eSmrg /* Offsets apply to EG+ */ 11427e995a2eSmrg static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80}; 11437e995a2eSmrg params->start_offset = offsets[index]; 11447e995a2eSmrg params->end_offset = 88 + offsets[index]; 11457e995a2eSmrg params->fence_offset = 2 * 88; 11467e995a2eSmrg break; 11477e995a2eSmrg } 11487e995a2eSmrg default: 11497e995a2eSmrg unreachable("r600_get_hw_query_params unsupported"); 11507e995a2eSmrg } 11517e995a2eSmrg} 11527e995a2eSmrg 11537e995a2eSmrgstatic unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index, 11547e995a2eSmrg bool test_status_bit) 11557e995a2eSmrg{ 11567e995a2eSmrg uint32_t *current_result = (uint32_t*)map; 11577e995a2eSmrg uint64_t start, end; 11587e995a2eSmrg 11597e995a2eSmrg start = (uint64_t)current_result[start_index] | 11607e995a2eSmrg (uint64_t)current_result[start_index+1] << 32; 11617e995a2eSmrg end = (uint64_t)current_result[end_index] | 11627e995a2eSmrg (uint64_t)current_result[end_index+1] << 32; 11637e995a2eSmrg 11647e995a2eSmrg if (!test_status_bit || 11657e995a2eSmrg ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { 11667e995a2eSmrg return end - start; 11677e995a2eSmrg } 11687e995a2eSmrg return 0; 11697e995a2eSmrg} 11707e995a2eSmrg 11717e995a2eSmrgstatic void r600_query_hw_add_result(struct r600_common_screen *rscreen, 11727e995a2eSmrg struct r600_query_hw *query, 11737e995a2eSmrg void *buffer, 11747e995a2eSmrg union pipe_query_result *result) 11757e995a2eSmrg{ 11761463c08dSmrg unsigned max_rbs = rscreen->info.max_render_backends; 11777e995a2eSmrg 11787e995a2eSmrg switch (query->b.type) { 11797e995a2eSmrg case PIPE_QUERY_OCCLUSION_COUNTER: { 11807e995a2eSmrg for (unsigned i = 0; i < max_rbs; ++i) { 11817e995a2eSmrg unsigned results_base = i * 16; 11827e995a2eSmrg result->u64 += 11837e995a2eSmrg r600_query_read_result(buffer + results_base, 0, 2, true); 11847e995a2eSmrg } 11857e995a2eSmrg break; 11867e995a2eSmrg } 11877e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE: 11887e995a2eSmrg case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { 11897e995a2eSmrg for (unsigned i = 0; i < max_rbs; ++i) { 11907e995a2eSmrg unsigned results_base = i * 16; 11917e995a2eSmrg result->b = result->b || 11927e995a2eSmrg r600_query_read_result(buffer + results_base, 0, 2, true) != 0; 11937e995a2eSmrg } 11947e995a2eSmrg break; 11957e995a2eSmrg } 11967e995a2eSmrg case PIPE_QUERY_TIME_ELAPSED: 11977e995a2eSmrg result->u64 += r600_query_read_result(buffer, 0, 2, false); 11987e995a2eSmrg break; 11997e995a2eSmrg case PIPE_QUERY_TIMESTAMP: 12007e995a2eSmrg result->u64 = *(uint64_t*)buffer; 12017e995a2eSmrg break; 12027e995a2eSmrg case PIPE_QUERY_PRIMITIVES_EMITTED: 12037e995a2eSmrg /* SAMPLE_STREAMOUTSTATS stores this structure: 12047e995a2eSmrg * { 12057e995a2eSmrg * u64 NumPrimitivesWritten; 12067e995a2eSmrg * u64 PrimitiveStorageNeeded; 12077e995a2eSmrg * } 12087e995a2eSmrg * We only need NumPrimitivesWritten here. */ 12097e995a2eSmrg result->u64 += r600_query_read_result(buffer, 2, 6, true); 12107e995a2eSmrg break; 12117e995a2eSmrg case PIPE_QUERY_PRIMITIVES_GENERATED: 12127e995a2eSmrg /* Here we read PrimitiveStorageNeeded. */ 12137e995a2eSmrg result->u64 += r600_query_read_result(buffer, 0, 4, true); 12147e995a2eSmrg break; 12157e995a2eSmrg case PIPE_QUERY_SO_STATISTICS: 12167e995a2eSmrg result->so_statistics.num_primitives_written += 12177e995a2eSmrg r600_query_read_result(buffer, 2, 6, true); 12187e995a2eSmrg result->so_statistics.primitives_storage_needed += 12197e995a2eSmrg r600_query_read_result(buffer, 0, 4, true); 12207e995a2eSmrg break; 12217e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_PREDICATE: 12227e995a2eSmrg result->b = result->b || 12237e995a2eSmrg r600_query_read_result(buffer, 2, 6, true) != 12247e995a2eSmrg r600_query_read_result(buffer, 0, 4, true); 12257e995a2eSmrg break; 12267e995a2eSmrg case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: 12277e995a2eSmrg for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) { 12287e995a2eSmrg result->b = result->b || 12297e995a2eSmrg r600_query_read_result(buffer, 2, 6, true) != 12307e995a2eSmrg r600_query_read_result(buffer, 0, 4, true); 12317e995a2eSmrg buffer = (char *)buffer + 32; 12327e995a2eSmrg } 12337e995a2eSmrg break; 12347e995a2eSmrg case PIPE_QUERY_PIPELINE_STATISTICS: 12357e995a2eSmrg if (rscreen->chip_class >= EVERGREEN) { 12367e995a2eSmrg result->pipeline_statistics.ps_invocations += 12377e995a2eSmrg r600_query_read_result(buffer, 0, 22, false); 12387e995a2eSmrg result->pipeline_statistics.c_primitives += 12397e995a2eSmrg r600_query_read_result(buffer, 2, 24, false); 12407e995a2eSmrg result->pipeline_statistics.c_invocations += 12417e995a2eSmrg r600_query_read_result(buffer, 4, 26, false); 12427e995a2eSmrg result->pipeline_statistics.vs_invocations += 12437e995a2eSmrg r600_query_read_result(buffer, 6, 28, false); 12447e995a2eSmrg result->pipeline_statistics.gs_invocations += 12457e995a2eSmrg r600_query_read_result(buffer, 8, 30, false); 12467e995a2eSmrg result->pipeline_statistics.gs_primitives += 12477e995a2eSmrg r600_query_read_result(buffer, 10, 32, false); 12487e995a2eSmrg result->pipeline_statistics.ia_primitives += 12497e995a2eSmrg r600_query_read_result(buffer, 12, 34, false); 12507e995a2eSmrg result->pipeline_statistics.ia_vertices += 12517e995a2eSmrg r600_query_read_result(buffer, 14, 36, false); 12527e995a2eSmrg result->pipeline_statistics.hs_invocations += 12537e995a2eSmrg r600_query_read_result(buffer, 16, 38, false); 12547e995a2eSmrg result->pipeline_statistics.ds_invocations += 12557e995a2eSmrg r600_query_read_result(buffer, 18, 40, false); 12567e995a2eSmrg result->pipeline_statistics.cs_invocations += 12577e995a2eSmrg r600_query_read_result(buffer, 20, 42, false); 12587e995a2eSmrg } else { 12597e995a2eSmrg result->pipeline_statistics.ps_invocations += 12607e995a2eSmrg r600_query_read_result(buffer, 0, 16, false); 12617e995a2eSmrg result->pipeline_statistics.c_primitives += 12627e995a2eSmrg r600_query_read_result(buffer, 2, 18, false); 12637e995a2eSmrg result->pipeline_statistics.c_invocations += 12647e995a2eSmrg r600_query_read_result(buffer, 4, 20, false); 12657e995a2eSmrg result->pipeline_statistics.vs_invocations += 12667e995a2eSmrg r600_query_read_result(buffer, 6, 22, false); 12677e995a2eSmrg result->pipeline_statistics.gs_invocations += 12687e995a2eSmrg r600_query_read_result(buffer, 8, 24, false); 12697e995a2eSmrg result->pipeline_statistics.gs_primitives += 12707e995a2eSmrg r600_query_read_result(buffer, 10, 26, false); 12717e995a2eSmrg result->pipeline_statistics.ia_primitives += 12727e995a2eSmrg r600_query_read_result(buffer, 12, 28, false); 12737e995a2eSmrg result->pipeline_statistics.ia_vertices += 12747e995a2eSmrg r600_query_read_result(buffer, 14, 30, false); 12757e995a2eSmrg } 12767e995a2eSmrg#if 0 /* for testing */ 12777e995a2eSmrg printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " 12787e995a2eSmrg "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, " 12797e995a2eSmrg "Clipper prims=%llu, PS=%llu, CS=%llu\n", 12807e995a2eSmrg result->pipeline_statistics.ia_vertices, 12817e995a2eSmrg result->pipeline_statistics.ia_primitives, 12827e995a2eSmrg result->pipeline_statistics.vs_invocations, 12837e995a2eSmrg result->pipeline_statistics.hs_invocations, 12847e995a2eSmrg result->pipeline_statistics.ds_invocations, 12857e995a2eSmrg result->pipeline_statistics.gs_invocations, 12867e995a2eSmrg result->pipeline_statistics.gs_primitives, 12877e995a2eSmrg result->pipeline_statistics.c_invocations, 12887e995a2eSmrg result->pipeline_statistics.c_primitives, 12897e995a2eSmrg result->pipeline_statistics.ps_invocations, 12907e995a2eSmrg result->pipeline_statistics.cs_invocations); 12917e995a2eSmrg#endif 12927e995a2eSmrg break; 12937e995a2eSmrg default: 12947e995a2eSmrg assert(0); 12957e995a2eSmrg } 12967e995a2eSmrg} 12977e995a2eSmrg 12981463c08dSmrgstatic bool r600_get_query_result(struct pipe_context *ctx, 12991463c08dSmrg struct pipe_query *query, bool wait, 13001463c08dSmrg union pipe_query_result *result) 13017e995a2eSmrg{ 13027e995a2eSmrg struct r600_common_context *rctx = (struct r600_common_context *)ctx; 13037e995a2eSmrg struct r600_query *rquery = (struct r600_query *)query; 13047e995a2eSmrg 13057e995a2eSmrg return rquery->ops->get_result(rctx, rquery, wait, result); 13067e995a2eSmrg} 13077e995a2eSmrg 13087e995a2eSmrgstatic void r600_get_query_result_resource(struct pipe_context *ctx, 13097e995a2eSmrg struct pipe_query *query, 13101463c08dSmrg bool wait, 13117e995a2eSmrg enum pipe_query_value_type result_type, 13127e995a2eSmrg int index, 13137e995a2eSmrg struct pipe_resource *resource, 13147e995a2eSmrg unsigned offset) 13157e995a2eSmrg{ 13167e995a2eSmrg struct r600_common_context *rctx = (struct r600_common_context *)ctx; 13177e995a2eSmrg struct r600_query *rquery = (struct r600_query *)query; 13187e995a2eSmrg 13197e995a2eSmrg rquery->ops->get_result_resource(rctx, rquery, wait, result_type, index, 13207e995a2eSmrg resource, offset); 13217e995a2eSmrg} 13227e995a2eSmrg 13237e995a2eSmrgstatic void r600_query_hw_clear_result(struct r600_query_hw *query, 13247e995a2eSmrg union pipe_query_result *result) 13257e995a2eSmrg{ 13267e995a2eSmrg util_query_clear_result(result, query->b.type); 13277e995a2eSmrg} 13287e995a2eSmrg 13297e995a2eSmrgbool r600_query_hw_get_result(struct r600_common_context *rctx, 13307e995a2eSmrg struct r600_query *rquery, 13317e995a2eSmrg bool wait, union pipe_query_result *result) 13327e995a2eSmrg{ 13337e995a2eSmrg struct r600_common_screen *rscreen = rctx->screen; 13347e995a2eSmrg struct r600_query_hw *query = (struct r600_query_hw *)rquery; 13357e995a2eSmrg struct r600_query_buffer *qbuf; 13367e995a2eSmrg 13377e995a2eSmrg query->ops->clear_result(query, result); 13387e995a2eSmrg 13397e995a2eSmrg for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { 13401463c08dSmrg unsigned usage = PIPE_MAP_READ | 13411463c08dSmrg (wait ? 0 : PIPE_MAP_DONTBLOCK); 13427e995a2eSmrg unsigned results_base = 0; 13437e995a2eSmrg void *map; 13447e995a2eSmrg 13457e995a2eSmrg if (rquery->b.flushed) 13461463c08dSmrg map = rctx->ws->buffer_map(rctx->ws, qbuf->buf->buf, NULL, usage); 13477e995a2eSmrg else 13487e995a2eSmrg map = r600_buffer_map_sync_with_rings(rctx, qbuf->buf, usage); 13497e995a2eSmrg 13507e995a2eSmrg if (!map) 13517e995a2eSmrg return false; 13527e995a2eSmrg 13537e995a2eSmrg while (results_base != qbuf->results_end) { 13547e995a2eSmrg query->ops->add_result(rscreen, query, map + results_base, 13557e995a2eSmrg result); 13567e995a2eSmrg results_base += query->result_size; 13577e995a2eSmrg } 13587e995a2eSmrg } 13597e995a2eSmrg 13607e995a2eSmrg /* Convert the time to expected units. */ 13617e995a2eSmrg if (rquery->type == PIPE_QUERY_TIME_ELAPSED || 13627e995a2eSmrg rquery->type == PIPE_QUERY_TIMESTAMP) { 13637e995a2eSmrg result->u64 = (1000000 * result->u64) / rscreen->info.clock_crystal_freq; 13647e995a2eSmrg } 13657e995a2eSmrg return true; 13667e995a2eSmrg} 13677e995a2eSmrg 13687e995a2eSmrg/* Create the compute shader that is used to collect the results. 13697e995a2eSmrg * 13707e995a2eSmrg * One compute grid with a single thread is launched for every query result 13717e995a2eSmrg * buffer. The thread (optionally) reads a previous summary buffer, then 13727e995a2eSmrg * accumulates data from the query result buffer, and writes the result either 13737e995a2eSmrg * to a summary buffer to be consumed by the next grid invocation or to the 13747e995a2eSmrg * user-supplied buffer. 13757e995a2eSmrg * 13767e995a2eSmrg * Data layout: 13777e995a2eSmrg * 13787e995a2eSmrg * CONST 13797e995a2eSmrg * 0.x = end_offset 13807e995a2eSmrg * 0.y = result_stride 13817e995a2eSmrg * 0.z = result_count 13827e995a2eSmrg * 0.w = bit field: 13837e995a2eSmrg * 1: read previously accumulated values 13847e995a2eSmrg * 2: write accumulated values for chaining 13857e995a2eSmrg * 4: write result available 13867e995a2eSmrg * 8: convert result to boolean (0/1) 13877e995a2eSmrg * 16: only read one dword and use that as result 13887e995a2eSmrg * 32: apply timestamp conversion 13897e995a2eSmrg * 64: store full 64 bits result 13907e995a2eSmrg * 128: store signed 32 bits result 13917e995a2eSmrg * 256: SO_OVERFLOW mode: take the difference of two successive half-pairs 13927e995a2eSmrg * 1.x = fence_offset 13937e995a2eSmrg * 1.y = pair_stride 13947e995a2eSmrg * 1.z = pair_count 13957e995a2eSmrg * 1.w = result_offset 13967e995a2eSmrg * 2.x = buffer0 offset 13977e995a2eSmrg * 13987e995a2eSmrg * BUFFER[0] = query result buffer 13997e995a2eSmrg * BUFFER[1] = previous summary buffer 14007e995a2eSmrg * BUFFER[2] = next summary buffer or user-supplied buffer 14017e995a2eSmrg */ 14027e995a2eSmrgstatic void r600_create_query_result_shader(struct r600_common_context *rctx) 14037e995a2eSmrg{ 14047e995a2eSmrg /* TEMP[0].xy = accumulated result so far 14057e995a2eSmrg * TEMP[0].z = result not available 14067e995a2eSmrg * 14077e995a2eSmrg * TEMP[1].x = current result index 14087e995a2eSmrg * TEMP[1].y = current pair index 14097e995a2eSmrg */ 14107e995a2eSmrg static const char text_tmpl[] = 14117e995a2eSmrg "COMP\n" 14127e995a2eSmrg "PROPERTY CS_FIXED_BLOCK_WIDTH 1\n" 14137e995a2eSmrg "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" 14147e995a2eSmrg "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" 14157e995a2eSmrg "DCL BUFFER[0]\n" 14167e995a2eSmrg "DCL BUFFER[1]\n" 14177e995a2eSmrg "DCL BUFFER[2]\n" 14187e995a2eSmrg "DCL CONST[0][0..2]\n" 14197e995a2eSmrg "DCL TEMP[0..5]\n" 14207e995a2eSmrg "IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n" 14217e995a2eSmrg "IMM[1] UINT32 {1, 2, 4, 8}\n" 14227e995a2eSmrg "IMM[2] UINT32 {16, 32, 64, 128}\n" 14237e995a2eSmrg "IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */ 14247e995a2eSmrg "IMM[4] UINT32 {256, 0, 0, 0}\n" 14257e995a2eSmrg 14267e995a2eSmrg "AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n" 14277e995a2eSmrg "UIF TEMP[5]\n" 14287e995a2eSmrg /* Check result availability. */ 14297e995a2eSmrg "UADD TEMP[1].x, CONST[0][1].xxxx, CONST[0][2].xxxx\n" 14307e995a2eSmrg "LOAD TEMP[1].x, BUFFER[0], TEMP[1].xxxx\n" 14317e995a2eSmrg "ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n" 14327e995a2eSmrg "MOV TEMP[1], TEMP[0].zzzz\n" 14337e995a2eSmrg "NOT TEMP[0].z, TEMP[0].zzzz\n" 14347e995a2eSmrg 14357e995a2eSmrg /* Load result if available. */ 14367e995a2eSmrg "UIF TEMP[1]\n" 14377e995a2eSmrg "UADD TEMP[0].x, IMM[0].xxxx, CONST[0][2].xxxx\n" 14387e995a2eSmrg "LOAD TEMP[0].xy, BUFFER[0], TEMP[0].xxxx\n" 14397e995a2eSmrg "ENDIF\n" 14407e995a2eSmrg "ELSE\n" 14417e995a2eSmrg /* Load previously accumulated result if requested. */ 14427e995a2eSmrg "MOV TEMP[0], IMM[0].xxxx\n" 14437e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n" 14447e995a2eSmrg "UIF TEMP[4]\n" 14457e995a2eSmrg "LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n" 14467e995a2eSmrg "ENDIF\n" 14477e995a2eSmrg 14487e995a2eSmrg "MOV TEMP[1].x, IMM[0].xxxx\n" 14497e995a2eSmrg "BGNLOOP\n" 14507e995a2eSmrg /* Break if accumulated result so far is not available. */ 14517e995a2eSmrg "UIF TEMP[0].zzzz\n" 14527e995a2eSmrg "BRK\n" 14537e995a2eSmrg "ENDIF\n" 14547e995a2eSmrg 14557e995a2eSmrg /* Break if result_index >= result_count. */ 14567e995a2eSmrg "USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n" 14577e995a2eSmrg "UIF TEMP[5]\n" 14587e995a2eSmrg "BRK\n" 14597e995a2eSmrg "ENDIF\n" 14607e995a2eSmrg 14617e995a2eSmrg /* Load fence and check result availability */ 14627e995a2eSmrg "UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n" 14637e995a2eSmrg "UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n" 14647e995a2eSmrg "LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n" 14657e995a2eSmrg "ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n" 14667e995a2eSmrg "NOT TEMP[0].z, TEMP[0].zzzz\n" 14677e995a2eSmrg "UIF TEMP[0].zzzz\n" 14687e995a2eSmrg "BRK\n" 14697e995a2eSmrg "ENDIF\n" 14707e995a2eSmrg 14717e995a2eSmrg "MOV TEMP[1].y, IMM[0].xxxx\n" 14727e995a2eSmrg "BGNLOOP\n" 14737e995a2eSmrg /* Load start and end. */ 14747e995a2eSmrg "UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n" 14757e995a2eSmrg "UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n" 14767e995a2eSmrg "UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n" 14777e995a2eSmrg "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" 14787e995a2eSmrg 14797e995a2eSmrg "UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n" 14807e995a2eSmrg "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" 14817e995a2eSmrg 14827e995a2eSmrg "U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n" 14837e995a2eSmrg 14847e995a2eSmrg "AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n" 14857e995a2eSmrg "UIF TEMP[5].zzzz\n" 14867e995a2eSmrg /* Load second start/end half-pair and 14877e995a2eSmrg * take the difference 14887e995a2eSmrg */ 14897e995a2eSmrg "UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n" 14907e995a2eSmrg "LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n" 14917e995a2eSmrg "LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n" 14927e995a2eSmrg 14937e995a2eSmrg "U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n" 14947e995a2eSmrg "U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n" 14957e995a2eSmrg "ENDIF\n" 14967e995a2eSmrg 14977e995a2eSmrg "U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n" 14987e995a2eSmrg 14997e995a2eSmrg /* Increment pair index */ 15007e995a2eSmrg "UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n" 15017e995a2eSmrg "USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n" 15027e995a2eSmrg "UIF TEMP[5]\n" 15037e995a2eSmrg "BRK\n" 15047e995a2eSmrg "ENDIF\n" 15057e995a2eSmrg "ENDLOOP\n" 15067e995a2eSmrg 15077e995a2eSmrg /* Increment result index */ 15087e995a2eSmrg "UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n" 15097e995a2eSmrg "ENDLOOP\n" 15107e995a2eSmrg "ENDIF\n" 15117e995a2eSmrg 15127e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n" 15137e995a2eSmrg "UIF TEMP[4]\n" 15147e995a2eSmrg /* Store accumulated data for chaining. */ 15157e995a2eSmrg "STORE BUFFER[2].xyz, CONST[0][1].wwww, TEMP[0]\n" 15167e995a2eSmrg "ELSE\n" 15177e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n" 15187e995a2eSmrg "UIF TEMP[4]\n" 15197e995a2eSmrg /* Store result availability. */ 15207e995a2eSmrg "NOT TEMP[0].z, TEMP[0]\n" 15217e995a2eSmrg "AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n" 15227e995a2eSmrg "STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].zzzz\n" 15237e995a2eSmrg 15247e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" 15257e995a2eSmrg "UIF TEMP[4]\n" 15267e995a2eSmrg "STORE BUFFER[2].y, CONST[0][1].wwww, IMM[0].xxxx\n" 15277e995a2eSmrg "ENDIF\n" 15287e995a2eSmrg "ELSE\n" 15297e995a2eSmrg /* Store result if it is available. */ 15307e995a2eSmrg "NOT TEMP[4], TEMP[0].zzzz\n" 15317e995a2eSmrg "UIF TEMP[4]\n" 15327e995a2eSmrg /* Apply timestamp conversion */ 15337e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n" 15347e995a2eSmrg "UIF TEMP[4]\n" 15357e995a2eSmrg "U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n" 15367e995a2eSmrg "U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n" 15377e995a2eSmrg "ENDIF\n" 15387e995a2eSmrg 15397e995a2eSmrg /* Convert to boolean */ 15407e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n" 15417e995a2eSmrg "UIF TEMP[4]\n" 15427e995a2eSmrg "U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n" 15437e995a2eSmrg "AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n" 15447e995a2eSmrg "MOV TEMP[0].y, IMM[0].xxxx\n" 15457e995a2eSmrg "ENDIF\n" 15467e995a2eSmrg 15477e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n" 15487e995a2eSmrg "UIF TEMP[4]\n" 15497e995a2eSmrg "STORE BUFFER[2].xy, CONST[0][1].wwww, TEMP[0].xyxy\n" 15507e995a2eSmrg "ELSE\n" 15517e995a2eSmrg /* Clamping */ 15527e995a2eSmrg "UIF TEMP[0].yyyy\n" 15537e995a2eSmrg "MOV TEMP[0].x, IMM[0].wwww\n" 15547e995a2eSmrg "ENDIF\n" 15557e995a2eSmrg 15567e995a2eSmrg "AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n" 15577e995a2eSmrg "UIF TEMP[4]\n" 15587e995a2eSmrg "UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n" 15597e995a2eSmrg "ENDIF\n" 15607e995a2eSmrg 15617e995a2eSmrg "STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].xxxx\n" 15627e995a2eSmrg "ENDIF\n" 15637e995a2eSmrg "ENDIF\n" 15647e995a2eSmrg "ENDIF\n" 15657e995a2eSmrg "ENDIF\n" 15667e995a2eSmrg 15677e995a2eSmrg "END\n"; 15687e995a2eSmrg 15697e995a2eSmrg char text[sizeof(text_tmpl) + 32]; 15707e995a2eSmrg struct tgsi_token tokens[1024]; 15717e995a2eSmrg struct pipe_compute_state state = {}; 15727e995a2eSmrg 15737e995a2eSmrg /* Hard code the frequency into the shader so that the backend can 15747e995a2eSmrg * use the full range of optimizations for divide-by-constant. 15757e995a2eSmrg */ 15767e995a2eSmrg snprintf(text, sizeof(text), text_tmpl, 15777e995a2eSmrg rctx->screen->info.clock_crystal_freq); 15787e995a2eSmrg 15797e995a2eSmrg if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { 15807e995a2eSmrg assert(false); 15817e995a2eSmrg return; 15827e995a2eSmrg } 15837e995a2eSmrg 15847e995a2eSmrg state.ir_type = PIPE_SHADER_IR_TGSI; 15857e995a2eSmrg state.prog = tokens; 15867e995a2eSmrg 15877e995a2eSmrg rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state); 15887e995a2eSmrg} 15897e995a2eSmrg 15907e995a2eSmrgstatic void r600_restore_qbo_state(struct r600_common_context *rctx, 15917e995a2eSmrg struct r600_qbo_state *st) 15927e995a2eSmrg{ 15937e995a2eSmrg rctx->b.bind_compute_state(&rctx->b, st->saved_compute); 15941463c08dSmrg rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0); 1595d8407755Smaya rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo, ~0); 15967e995a2eSmrg for (unsigned i = 0; i < 3; ++i) 15977e995a2eSmrg pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL); 15987e995a2eSmrg} 15997e995a2eSmrg 16007e995a2eSmrgstatic void r600_query_hw_get_result_resource(struct r600_common_context *rctx, 16017e995a2eSmrg struct r600_query *rquery, 16027e995a2eSmrg bool wait, 16037e995a2eSmrg enum pipe_query_value_type result_type, 16047e995a2eSmrg int index, 16057e995a2eSmrg struct pipe_resource *resource, 16067e995a2eSmrg unsigned offset) 16077e995a2eSmrg{ 16087e995a2eSmrg struct r600_query_hw *query = (struct r600_query_hw *)rquery; 16097e995a2eSmrg struct r600_query_buffer *qbuf; 16107e995a2eSmrg struct r600_query_buffer *qbuf_prev; 16117e995a2eSmrg struct pipe_resource *tmp_buffer = NULL; 16127e995a2eSmrg unsigned tmp_buffer_offset = 0; 16137e995a2eSmrg struct r600_qbo_state saved_state = {}; 16147e995a2eSmrg struct pipe_grid_info grid = {}; 16157e995a2eSmrg struct pipe_constant_buffer constant_buffer = {}; 16167e995a2eSmrg struct pipe_shader_buffer ssbo[3]; 16177e995a2eSmrg struct r600_hw_query_params params; 16187e995a2eSmrg struct { 16197e995a2eSmrg uint32_t end_offset; 16207e995a2eSmrg uint32_t result_stride; 16217e995a2eSmrg uint32_t result_count; 16227e995a2eSmrg uint32_t config; 16237e995a2eSmrg uint32_t fence_offset; 16247e995a2eSmrg uint32_t pair_stride; 16257e995a2eSmrg uint32_t pair_count; 16267e995a2eSmrg uint32_t buffer_offset; 16277e995a2eSmrg uint32_t buffer0_offset; 16287e995a2eSmrg } consts; 16297e995a2eSmrg 16307e995a2eSmrg if (!rctx->query_result_shader) { 16317e995a2eSmrg r600_create_query_result_shader(rctx); 16327e995a2eSmrg if (!rctx->query_result_shader) 16337e995a2eSmrg return; 16347e995a2eSmrg } 16357e995a2eSmrg 16367e995a2eSmrg if (query->buffer.previous) { 16371463c08dSmrg u_suballocator_alloc(&rctx->allocator_zeroed_memory, 16, 256, 16387e995a2eSmrg &tmp_buffer_offset, &tmp_buffer); 16397e995a2eSmrg if (!tmp_buffer) 16407e995a2eSmrg return; 16417e995a2eSmrg } 16427e995a2eSmrg 16437e995a2eSmrg rctx->save_qbo_state(&rctx->b, &saved_state); 16447e995a2eSmrg 16457e995a2eSmrg r600_get_hw_query_params(rctx, query, index >= 0 ? index : 0, ¶ms); 16467e995a2eSmrg consts.end_offset = params.end_offset - params.start_offset; 16477e995a2eSmrg consts.fence_offset = params.fence_offset - params.start_offset; 16487e995a2eSmrg consts.result_stride = query->result_size; 16497e995a2eSmrg consts.pair_stride = params.pair_stride; 16507e995a2eSmrg consts.pair_count = params.pair_count; 16517e995a2eSmrg 16527e995a2eSmrg constant_buffer.buffer_size = sizeof(consts); 16537e995a2eSmrg constant_buffer.user_buffer = &consts; 16547e995a2eSmrg 16557e995a2eSmrg ssbo[1].buffer = tmp_buffer; 16567e995a2eSmrg ssbo[1].buffer_offset = tmp_buffer_offset; 16577e995a2eSmrg ssbo[1].buffer_size = 16; 16587e995a2eSmrg 16597e995a2eSmrg ssbo[2] = ssbo[1]; 16607e995a2eSmrg 16617e995a2eSmrg rctx->b.bind_compute_state(&rctx->b, rctx->query_result_shader); 16627e995a2eSmrg 16637e995a2eSmrg grid.block[0] = 1; 16647e995a2eSmrg grid.block[1] = 1; 16657e995a2eSmrg grid.block[2] = 1; 16667e995a2eSmrg grid.grid[0] = 1; 16677e995a2eSmrg grid.grid[1] = 1; 16687e995a2eSmrg grid.grid[2] = 1; 16697e995a2eSmrg 16707e995a2eSmrg consts.config = 0; 16717e995a2eSmrg if (index < 0) 16727e995a2eSmrg consts.config |= 4; 16737e995a2eSmrg if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || 16747e995a2eSmrg query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) 16757e995a2eSmrg consts.config |= 8; 16767e995a2eSmrg else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || 16777e995a2eSmrg query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 16787e995a2eSmrg consts.config |= 8 | 256; 16797e995a2eSmrg else if (query->b.type == PIPE_QUERY_TIMESTAMP || 16807e995a2eSmrg query->b.type == PIPE_QUERY_TIME_ELAPSED) 16817e995a2eSmrg consts.config |= 32; 16827e995a2eSmrg 16837e995a2eSmrg switch (result_type) { 16847e995a2eSmrg case PIPE_QUERY_TYPE_U64: 16857e995a2eSmrg case PIPE_QUERY_TYPE_I64: 16867e995a2eSmrg consts.config |= 64; 16877e995a2eSmrg break; 16887e995a2eSmrg case PIPE_QUERY_TYPE_I32: 16897e995a2eSmrg consts.config |= 128; 16907e995a2eSmrg break; 16917e995a2eSmrg case PIPE_QUERY_TYPE_U32: 16927e995a2eSmrg break; 16937e995a2eSmrg } 16947e995a2eSmrg 16957e995a2eSmrg rctx->flags |= rctx->screen->barrier_flags.cp_to_L2; 16967e995a2eSmrg 16977e995a2eSmrg for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) { 16987e995a2eSmrg if (query->b.type != PIPE_QUERY_TIMESTAMP) { 16997e995a2eSmrg qbuf_prev = qbuf->previous; 17007e995a2eSmrg consts.result_count = qbuf->results_end / query->result_size; 17017e995a2eSmrg consts.config &= ~3; 17027e995a2eSmrg if (qbuf != &query->buffer) 17037e995a2eSmrg consts.config |= 1; 17047e995a2eSmrg if (qbuf->previous) 17057e995a2eSmrg consts.config |= 2; 17067e995a2eSmrg } else { 17077e995a2eSmrg /* Only read the last timestamp. */ 17087e995a2eSmrg qbuf_prev = NULL; 17097e995a2eSmrg consts.result_count = 0; 17107e995a2eSmrg consts.config |= 16; 17117e995a2eSmrg params.start_offset += qbuf->results_end - query->result_size; 17127e995a2eSmrg } 17137e995a2eSmrg 17147e995a2eSmrg ssbo[0].buffer = &qbuf->buf->b.b; 17157e995a2eSmrg ssbo[0].buffer_offset = params.start_offset & ~0xff; 17167e995a2eSmrg ssbo[0].buffer_size = qbuf->results_end - ssbo[0].buffer_offset; 17177e995a2eSmrg consts.buffer0_offset = (params.start_offset & 0xff); 17187e995a2eSmrg if (!qbuf->previous) { 17197e995a2eSmrg 17207e995a2eSmrg ssbo[2].buffer = resource; 17217e995a2eSmrg ssbo[2].buffer_offset = offset & ~0xff; 17227e995a2eSmrg ssbo[2].buffer_size = offset + 8; 17237e995a2eSmrg consts.buffer_offset = (offset & 0xff); 17247e995a2eSmrg } else 17257e995a2eSmrg consts.buffer_offset = 0; 17267e995a2eSmrg 17271463c08dSmrg rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer); 17287e995a2eSmrg 1729d8407755Smaya rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo, ~0); 17307e995a2eSmrg 17317e995a2eSmrg if (wait && qbuf == &query->buffer) { 17327e995a2eSmrg uint64_t va; 17337e995a2eSmrg 17347e995a2eSmrg /* Wait for result availability. Wait only for readiness 17357e995a2eSmrg * of the last entry, since the fence writes should be 17367e995a2eSmrg * serialized in the CP. 17377e995a2eSmrg */ 17387e995a2eSmrg va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; 17397e995a2eSmrg va += params.fence_offset; 17407e995a2eSmrg 17417e995a2eSmrg r600_gfx_wait_fence(rctx, qbuf->buf, va, 0x80000000, 0x80000000); 17427e995a2eSmrg } 17437e995a2eSmrg 17447e995a2eSmrg rctx->b.launch_grid(&rctx->b, &grid); 17457e995a2eSmrg rctx->flags |= rctx->screen->barrier_flags.compute_to_L2; 17467e995a2eSmrg } 17477e995a2eSmrg 17487e995a2eSmrg r600_restore_qbo_state(rctx, &saved_state); 17497e995a2eSmrg pipe_resource_reference(&tmp_buffer, NULL); 17507e995a2eSmrg} 17517e995a2eSmrg 17527e995a2eSmrgstatic void r600_render_condition(struct pipe_context *ctx, 17537e995a2eSmrg struct pipe_query *query, 17541463c08dSmrg bool condition, 17557e995a2eSmrg enum pipe_render_cond_flag mode) 17567e995a2eSmrg{ 17577e995a2eSmrg struct r600_common_context *rctx = (struct r600_common_context *)ctx; 17587e995a2eSmrg struct r600_query_hw *rquery = (struct r600_query_hw *)query; 17597e995a2eSmrg struct r600_query_buffer *qbuf; 17607e995a2eSmrg struct r600_atom *atom = &rctx->render_cond_atom; 17617e995a2eSmrg 17627e995a2eSmrg /* Compute the size of SET_PREDICATION packets. */ 17637e995a2eSmrg atom->num_dw = 0; 17647e995a2eSmrg if (query) { 17657e995a2eSmrg for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) 17667e995a2eSmrg atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; 17677e995a2eSmrg 17687e995a2eSmrg if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) 17697e995a2eSmrg atom->num_dw *= R600_MAX_STREAMS; 17707e995a2eSmrg } 17717e995a2eSmrg 17727e995a2eSmrg rctx->render_cond = query; 17737e995a2eSmrg rctx->render_cond_invert = condition; 17747e995a2eSmrg rctx->render_cond_mode = mode; 17757e995a2eSmrg 17767e995a2eSmrg rctx->set_atom_dirty(rctx, atom, query != NULL); 17777e995a2eSmrg} 17787e995a2eSmrg 17797e995a2eSmrgvoid r600_suspend_queries(struct r600_common_context *ctx) 17807e995a2eSmrg{ 17817e995a2eSmrg struct r600_query_hw *query; 17827e995a2eSmrg 17837e995a2eSmrg LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { 17847e995a2eSmrg r600_query_hw_emit_stop(ctx, query); 17857e995a2eSmrg } 17867e995a2eSmrg assert(ctx->num_cs_dw_queries_suspend == 0); 17877e995a2eSmrg} 17887e995a2eSmrg 17897e995a2eSmrgstatic unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx, 17907e995a2eSmrg struct list_head *query_list) 17917e995a2eSmrg{ 17927e995a2eSmrg struct r600_query_hw *query; 17937e995a2eSmrg unsigned num_dw = 0; 17947e995a2eSmrg 17957e995a2eSmrg LIST_FOR_EACH_ENTRY(query, query_list, list) { 17967e995a2eSmrg /* begin + end */ 17977e995a2eSmrg num_dw += query->num_cs_dw_begin + query->num_cs_dw_end; 17987e995a2eSmrg 17997e995a2eSmrg /* Workaround for the fact that 18007e995a2eSmrg * num_cs_dw_nontimer_queries_suspend is incremented for every 18017e995a2eSmrg * resumed query, which raises the bar in need_cs_space for 18027e995a2eSmrg * queries about to be resumed. 18037e995a2eSmrg */ 18047e995a2eSmrg num_dw += query->num_cs_dw_end; 18057e995a2eSmrg } 18067e995a2eSmrg /* primitives generated query */ 18077e995a2eSmrg num_dw += ctx->streamout.enable_atom.num_dw; 18087e995a2eSmrg /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */ 18097e995a2eSmrg num_dw += 13; 18107e995a2eSmrg 18117e995a2eSmrg return num_dw; 18127e995a2eSmrg} 18137e995a2eSmrg 18147e995a2eSmrgvoid r600_resume_queries(struct r600_common_context *ctx) 18157e995a2eSmrg{ 18167e995a2eSmrg struct r600_query_hw *query; 18177e995a2eSmrg unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, &ctx->active_queries); 18187e995a2eSmrg 18197e995a2eSmrg assert(ctx->num_cs_dw_queries_suspend == 0); 18207e995a2eSmrg 18217e995a2eSmrg /* Check CS space here. Resuming must not be interrupted by flushes. */ 18227e995a2eSmrg ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, true); 18237e995a2eSmrg 18247e995a2eSmrg LIST_FOR_EACH_ENTRY(query, &ctx->active_queries, list) { 18257e995a2eSmrg r600_query_hw_emit_start(ctx, query); 18267e995a2eSmrg } 18277e995a2eSmrg} 18287e995a2eSmrg 18297e995a2eSmrg/* Fix radeon_info::enabled_rb_mask for R600, R700, EVERGREEN, NI. */ 18307e995a2eSmrgvoid r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen) 18317e995a2eSmrg{ 18327e995a2eSmrg struct r600_common_context *ctx = 18337e995a2eSmrg (struct r600_common_context*)rscreen->aux_context; 18341463c08dSmrg struct radeon_cmdbuf *cs = &ctx->gfx.cs; 18357e995a2eSmrg struct r600_resource *buffer; 18367e995a2eSmrg uint32_t *results; 18377e995a2eSmrg unsigned i, mask = 0; 18387e995a2eSmrg unsigned max_rbs; 18397e995a2eSmrg 18407e995a2eSmrg if (ctx->family == CHIP_JUNIPER) { 18417e995a2eSmrg /* 18427e995a2eSmrg * Fix for predication lockups - the chip can only ever have 18437e995a2eSmrg * 4 RBs, however it looks like the predication logic assumes 18447e995a2eSmrg * there's 8, trying to read results from query buffers never 18457e995a2eSmrg * written to. By increasing this number we'll write the 18467e995a2eSmrg * status bit for these as per the normal disabled rb logic. 18477e995a2eSmrg */ 18481463c08dSmrg ctx->screen->info.max_render_backends = 8; 18497e995a2eSmrg } 18501463c08dSmrg max_rbs = ctx->screen->info.max_render_backends; 18517e995a2eSmrg 18527e995a2eSmrg assert(rscreen->chip_class <= CAYMAN); 18537e995a2eSmrg 18547e995a2eSmrg /* 18557e995a2eSmrg * if backend_map query is supported by the kernel. 18567e995a2eSmrg * Note the kernel drm driver for a long time never filled in the 18577e995a2eSmrg * associated data on eg/cm, only on r600/r700, hence ignore the valid 18587e995a2eSmrg * bit there if the map is zero. 18597e995a2eSmrg * (Albeit some chips with just one active rb can have a valid 0 map.) 18607e995a2eSmrg */ 18617e995a2eSmrg if (rscreen->info.r600_gb_backend_map_valid && 18627e995a2eSmrg (ctx->chip_class < EVERGREEN || rscreen->info.r600_gb_backend_map != 0)) { 18637e995a2eSmrg unsigned num_tile_pipes = rscreen->info.num_tile_pipes; 18647e995a2eSmrg unsigned backend_map = rscreen->info.r600_gb_backend_map; 18657e995a2eSmrg unsigned item_width, item_mask; 18667e995a2eSmrg 18677e995a2eSmrg if (ctx->chip_class >= EVERGREEN) { 18687e995a2eSmrg item_width = 4; 18697e995a2eSmrg item_mask = 0x7; 18707e995a2eSmrg } else { 18717e995a2eSmrg item_width = 2; 18727e995a2eSmrg item_mask = 0x3; 18737e995a2eSmrg } 18747e995a2eSmrg 18757e995a2eSmrg while (num_tile_pipes--) { 18767e995a2eSmrg i = backend_map & item_mask; 18777e995a2eSmrg mask |= (1<<i); 18787e995a2eSmrg backend_map >>= item_width; 18797e995a2eSmrg } 18807e995a2eSmrg if (mask != 0) { 18817e995a2eSmrg rscreen->info.enabled_rb_mask = mask; 18827e995a2eSmrg return; 18837e995a2eSmrg } 18847e995a2eSmrg } 18857e995a2eSmrg 18867e995a2eSmrg /* otherwise backup path for older kernels */ 18877e995a2eSmrg 18887e995a2eSmrg /* create buffer for event data */ 18897e995a2eSmrg buffer = (struct r600_resource*) 18907e995a2eSmrg pipe_buffer_create(ctx->b.screen, 0, 18917e995a2eSmrg PIPE_USAGE_STAGING, max_rbs * 16); 18927e995a2eSmrg if (!buffer) 18937e995a2eSmrg return; 18947e995a2eSmrg 18957e995a2eSmrg /* initialize buffer with zeroes */ 18961463c08dSmrg results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_MAP_WRITE); 18977e995a2eSmrg if (results) { 18987e995a2eSmrg memset(results, 0, max_rbs * 4 * 4); 18997e995a2eSmrg 19007e995a2eSmrg /* emit EVENT_WRITE for ZPASS_DONE */ 19017e995a2eSmrg radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); 19027e995a2eSmrg radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); 19037e995a2eSmrg radeon_emit(cs, buffer->gpu_address); 19047e995a2eSmrg radeon_emit(cs, buffer->gpu_address >> 32); 19057e995a2eSmrg 19067e995a2eSmrg r600_emit_reloc(ctx, &ctx->gfx, buffer, 19077e995a2eSmrg RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); 19087e995a2eSmrg 19097e995a2eSmrg /* analyze results */ 19101463c08dSmrg results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_MAP_READ); 19117e995a2eSmrg if (results) { 19127e995a2eSmrg for(i = 0; i < max_rbs; i++) { 19137e995a2eSmrg /* at least highest bit will be set if backend is used */ 19147e995a2eSmrg if (results[i*4 + 1]) 19157e995a2eSmrg mask |= (1<<i); 19167e995a2eSmrg } 19177e995a2eSmrg } 19187e995a2eSmrg } 19197e995a2eSmrg 19207e995a2eSmrg r600_resource_reference(&buffer, NULL); 19217e995a2eSmrg 19227e995a2eSmrg if (mask) { 19237e995a2eSmrg if (rscreen->debug_flags & DBG_INFO && 19247e995a2eSmrg mask != rscreen->info.enabled_rb_mask) { 19257e995a2eSmrg printf("enabled_rb_mask (fixed) = 0x%x\n", mask); 19267e995a2eSmrg } 19277e995a2eSmrg rscreen->info.enabled_rb_mask = mask; 19287e995a2eSmrg } 19297e995a2eSmrg} 19307e995a2eSmrg 19317e995a2eSmrg#define XFULL(name_, query_type_, type_, result_type_, group_id_) \ 19327e995a2eSmrg { \ 19337e995a2eSmrg .name = name_, \ 19347e995a2eSmrg .query_type = R600_QUERY_##query_type_, \ 19357e995a2eSmrg .type = PIPE_DRIVER_QUERY_TYPE_##type_, \ 19367e995a2eSmrg .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \ 19377e995a2eSmrg .group_id = group_id_ \ 19387e995a2eSmrg } 19397e995a2eSmrg 19407e995a2eSmrg#define X(name_, query_type_, type_, result_type_) \ 19417e995a2eSmrg XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0) 19427e995a2eSmrg 19437e995a2eSmrg#define XG(group_, name_, query_type_, type_, result_type_) \ 19447e995a2eSmrg XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_) 19457e995a2eSmrg 19467e995a2eSmrgstatic const struct pipe_driver_query_info r600_driver_query_list[] = { 19477e995a2eSmrg X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE), 19487e995a2eSmrg X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE), 19497e995a2eSmrg X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE), 19507e995a2eSmrg X("draw-calls", DRAW_CALLS, UINT64, AVERAGE), 19517e995a2eSmrg X("decompress-calls", DECOMPRESS_CALLS, UINT64, AVERAGE), 19527e995a2eSmrg X("MRT-draw-calls", MRT_DRAW_CALLS, UINT64, AVERAGE), 19537e995a2eSmrg X("prim-restart-calls", PRIM_RESTART_CALLS, UINT64, AVERAGE), 19547e995a2eSmrg X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE), 19557e995a2eSmrg X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE), 19567e995a2eSmrg X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE), 19577e995a2eSmrg X("dma-calls", DMA_CALLS, UINT64, AVERAGE), 19587e995a2eSmrg X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE), 19597e995a2eSmrg X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE), 19607e995a2eSmrg X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE), 19617e995a2eSmrg X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE), 19627e995a2eSmrg X("num-CB-cache-flushes", NUM_CB_CACHE_FLUSHES, UINT64, AVERAGE), 19637e995a2eSmrg X("num-DB-cache-flushes", NUM_DB_CACHE_FLUSHES, UINT64, AVERAGE), 19647e995a2eSmrg X("num-resident-handles", NUM_RESIDENT_HANDLES, UINT64, AVERAGE), 19657e995a2eSmrg X("tc-offloaded-slots", TC_OFFLOADED_SLOTS, UINT64, AVERAGE), 19667e995a2eSmrg X("tc-direct-slots", TC_DIRECT_SLOTS, UINT64, AVERAGE), 19677e995a2eSmrg X("tc-num-syncs", TC_NUM_SYNCS, UINT64, AVERAGE), 19687e995a2eSmrg X("CS-thread-busy", CS_THREAD_BUSY, UINT64, AVERAGE), 19697e995a2eSmrg X("gallium-thread-busy", GALLIUM_THREAD_BUSY, UINT64, AVERAGE), 19707e995a2eSmrg X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE), 19717e995a2eSmrg X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE), 19727e995a2eSmrg X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE), 19737e995a2eSmrg X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE), 19747e995a2eSmrg X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE), 19757e995a2eSmrg X("num-mapped-buffers", NUM_MAPPED_BUFFERS, UINT64, AVERAGE), 19767e995a2eSmrg X("num-GFX-IBs", NUM_GFX_IBS, UINT64, AVERAGE), 19777e995a2eSmrg X("num-SDMA-IBs", NUM_SDMA_IBS, UINT64, AVERAGE), 19787e995a2eSmrg X("GFX-BO-list-size", GFX_BO_LIST_SIZE, UINT64, AVERAGE), 19797e995a2eSmrg X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE), 19807e995a2eSmrg X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE), 19817e995a2eSmrg X("VRAM-CPU-page-faults", NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE), 19827e995a2eSmrg X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), 19837e995a2eSmrg X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE), 19847e995a2eSmrg X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), 19857e995a2eSmrg 19867e995a2eSmrg /* GPIN queries are for the benefit of old versions of GPUPerfStudio, 19877e995a2eSmrg * which use it as a fallback path to detect the GPU type. 19887e995a2eSmrg * 19897e995a2eSmrg * Note: The names of these queries are significant for GPUPerfStudio 19907e995a2eSmrg * (and possibly their order as well). */ 19917e995a2eSmrg XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE), 19927e995a2eSmrg XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE), 19937e995a2eSmrg XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE), 19947e995a2eSmrg XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE), 19957e995a2eSmrg XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE), 19967e995a2eSmrg 19977e995a2eSmrg X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE), 19987e995a2eSmrg X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE), 19997e995a2eSmrg X("memory-clock", CURRENT_GPU_MCLK, HZ, AVERAGE), 20007e995a2eSmrg 20017e995a2eSmrg /* The following queries must be at the end of the list because their 20027e995a2eSmrg * availability is adjusted dynamically based on the DRM version. */ 20037e995a2eSmrg X("GPU-load", GPU_LOAD, UINT64, AVERAGE), 20047e995a2eSmrg X("GPU-shaders-busy", GPU_SHADERS_BUSY, UINT64, AVERAGE), 20057e995a2eSmrg X("GPU-ta-busy", GPU_TA_BUSY, UINT64, AVERAGE), 20067e995a2eSmrg X("GPU-gds-busy", GPU_GDS_BUSY, UINT64, AVERAGE), 20077e995a2eSmrg X("GPU-vgt-busy", GPU_VGT_BUSY, UINT64, AVERAGE), 20087e995a2eSmrg X("GPU-ia-busy", GPU_IA_BUSY, UINT64, AVERAGE), 20097e995a2eSmrg X("GPU-sx-busy", GPU_SX_BUSY, UINT64, AVERAGE), 20107e995a2eSmrg X("GPU-wd-busy", GPU_WD_BUSY, UINT64, AVERAGE), 20117e995a2eSmrg X("GPU-bci-busy", GPU_BCI_BUSY, UINT64, AVERAGE), 20127e995a2eSmrg X("GPU-sc-busy", GPU_SC_BUSY, UINT64, AVERAGE), 20137e995a2eSmrg X("GPU-pa-busy", GPU_PA_BUSY, UINT64, AVERAGE), 20147e995a2eSmrg X("GPU-db-busy", GPU_DB_BUSY, UINT64, AVERAGE), 20157e995a2eSmrg X("GPU-cp-busy", GPU_CP_BUSY, UINT64, AVERAGE), 20167e995a2eSmrg X("GPU-cb-busy", GPU_CB_BUSY, UINT64, AVERAGE), 20177e995a2eSmrg X("GPU-sdma-busy", GPU_SDMA_BUSY, UINT64, AVERAGE), 20187e995a2eSmrg X("GPU-pfp-busy", GPU_PFP_BUSY, UINT64, AVERAGE), 20197e995a2eSmrg X("GPU-meq-busy", GPU_MEQ_BUSY, UINT64, AVERAGE), 20207e995a2eSmrg X("GPU-me-busy", GPU_ME_BUSY, UINT64, AVERAGE), 20217e995a2eSmrg X("GPU-surf-sync-busy", GPU_SURF_SYNC_BUSY, UINT64, AVERAGE), 20227e995a2eSmrg X("GPU-cp-dma-busy", GPU_CP_DMA_BUSY, UINT64, AVERAGE), 20237e995a2eSmrg X("GPU-scratch-ram-busy", GPU_SCRATCH_RAM_BUSY, UINT64, AVERAGE), 20247e995a2eSmrg}; 20257e995a2eSmrg 20267e995a2eSmrg#undef X 20277e995a2eSmrg#undef XG 20287e995a2eSmrg#undef XFULL 20297e995a2eSmrg 20307e995a2eSmrgstatic unsigned r600_get_num_queries(struct r600_common_screen *rscreen) 20317e995a2eSmrg{ 20321463c08dSmrg if (rscreen->info.drm_minor >= 42) 20337e995a2eSmrg return ARRAY_SIZE(r600_driver_query_list); 20347e995a2eSmrg else 20357e995a2eSmrg return ARRAY_SIZE(r600_driver_query_list) - 25; 20367e995a2eSmrg} 20377e995a2eSmrg 20387e995a2eSmrgstatic int r600_get_driver_query_info(struct pipe_screen *screen, 20397e995a2eSmrg unsigned index, 20407e995a2eSmrg struct pipe_driver_query_info *info) 20417e995a2eSmrg{ 20427e995a2eSmrg struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 20437e995a2eSmrg unsigned num_queries = r600_get_num_queries(rscreen); 20447e995a2eSmrg 20457e995a2eSmrg if (!info) { 20467e995a2eSmrg unsigned num_perfcounters = 20477e995a2eSmrg r600_get_perfcounter_info(rscreen, 0, NULL); 20487e995a2eSmrg 20497e995a2eSmrg return num_queries + num_perfcounters; 20507e995a2eSmrg } 20517e995a2eSmrg 20527e995a2eSmrg if (index >= num_queries) 20537e995a2eSmrg return r600_get_perfcounter_info(rscreen, index - num_queries, info); 20547e995a2eSmrg 20557e995a2eSmrg *info = r600_driver_query_list[index]; 20567e995a2eSmrg 20577e995a2eSmrg switch (info->query_type) { 20587e995a2eSmrg case R600_QUERY_REQUESTED_VRAM: 20597e995a2eSmrg case R600_QUERY_VRAM_USAGE: 20607e995a2eSmrg case R600_QUERY_MAPPED_VRAM: 20617e995a2eSmrg info->max_value.u64 = rscreen->info.vram_size; 20627e995a2eSmrg break; 20637e995a2eSmrg case R600_QUERY_REQUESTED_GTT: 20647e995a2eSmrg case R600_QUERY_GTT_USAGE: 20657e995a2eSmrg case R600_QUERY_MAPPED_GTT: 20667e995a2eSmrg info->max_value.u64 = rscreen->info.gart_size; 20677e995a2eSmrg break; 20687e995a2eSmrg case R600_QUERY_GPU_TEMPERATURE: 20697e995a2eSmrg info->max_value.u64 = 125; 20707e995a2eSmrg break; 20717e995a2eSmrg case R600_QUERY_VRAM_VIS_USAGE: 20727e995a2eSmrg info->max_value.u64 = rscreen->info.vram_vis_size; 20737e995a2eSmrg break; 20747e995a2eSmrg } 20757e995a2eSmrg 20767e995a2eSmrg if (info->group_id != ~(unsigned)0 && rscreen->perfcounters) 20777e995a2eSmrg info->group_id += rscreen->perfcounters->num_groups; 20787e995a2eSmrg 20797e995a2eSmrg return 1; 20807e995a2eSmrg} 20817e995a2eSmrg 20827e995a2eSmrg/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware 20837e995a2eSmrg * performance counter groups, so be careful when changing this and related 20847e995a2eSmrg * functions. 20857e995a2eSmrg */ 20867e995a2eSmrgstatic int r600_get_driver_query_group_info(struct pipe_screen *screen, 20877e995a2eSmrg unsigned index, 20887e995a2eSmrg struct pipe_driver_query_group_info *info) 20897e995a2eSmrg{ 20907e995a2eSmrg struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; 20917e995a2eSmrg unsigned num_pc_groups = 0; 20927e995a2eSmrg 20937e995a2eSmrg if (rscreen->perfcounters) 20947e995a2eSmrg num_pc_groups = rscreen->perfcounters->num_groups; 20957e995a2eSmrg 20967e995a2eSmrg if (!info) 20977e995a2eSmrg return num_pc_groups + R600_NUM_SW_QUERY_GROUPS; 20987e995a2eSmrg 20997e995a2eSmrg if (index < num_pc_groups) 21007e995a2eSmrg return r600_get_perfcounter_group_info(rscreen, index, info); 21017e995a2eSmrg 21027e995a2eSmrg index -= num_pc_groups; 21037e995a2eSmrg if (index >= R600_NUM_SW_QUERY_GROUPS) 21047e995a2eSmrg return 0; 21057e995a2eSmrg 21067e995a2eSmrg info->name = "GPIN"; 21077e995a2eSmrg info->max_active_queries = 5; 21087e995a2eSmrg info->num_queries = 5; 21097e995a2eSmrg return 1; 21107e995a2eSmrg} 21117e995a2eSmrg 21127e995a2eSmrgvoid r600_query_init(struct r600_common_context *rctx) 21137e995a2eSmrg{ 21147e995a2eSmrg rctx->b.create_query = r600_create_query; 21157e995a2eSmrg rctx->b.create_batch_query = r600_create_batch_query; 21167e995a2eSmrg rctx->b.destroy_query = r600_destroy_query; 21177e995a2eSmrg rctx->b.begin_query = r600_begin_query; 21187e995a2eSmrg rctx->b.end_query = r600_end_query; 21197e995a2eSmrg rctx->b.get_query_result = r600_get_query_result; 21207e995a2eSmrg rctx->b.get_query_result_resource = r600_get_query_result_resource; 21217e995a2eSmrg rctx->render_cond_atom.emit = r600_emit_query_predication; 21227e995a2eSmrg 21231463c08dSmrg if (((struct r600_common_screen*)rctx->b.screen)->info.max_render_backends > 0) 21247e995a2eSmrg rctx->b.render_condition = r600_render_condition; 21257e995a2eSmrg 21261463c08dSmrg list_inithead(&rctx->active_queries); 21277e995a2eSmrg} 21287e995a2eSmrg 21297e995a2eSmrgvoid r600_init_screen_query_functions(struct r600_common_screen *rscreen) 21307e995a2eSmrg{ 21317e995a2eSmrg rscreen->b.get_driver_query_info = r600_get_driver_query_info; 21327e995a2eSmrg rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info; 21337e995a2eSmrg} 2134