1848b8605Smrg/* 2848b8605Smrg * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3b8e80941Smrg * Copyright 2018 Advanced Micro Devices, Inc. 4b8e80941Smrg * All Rights Reserved. 5848b8605Smrg * 6848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 7848b8605Smrg * copy of this software and associated documentation files (the "Software"), 8848b8605Smrg * to deal in the Software without restriction, including without limitation 9848b8605Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 10848b8605Smrg * license, and/or sell copies of the Software, and to permit persons to whom 11848b8605Smrg * the Software is furnished to do so, subject to the following conditions: 12848b8605Smrg * 13848b8605Smrg * The above copyright notice and this permission notice (including the next 14848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 15848b8605Smrg * Software. 16848b8605Smrg * 17848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20848b8605Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21848b8605Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22848b8605Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23848b8605Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 24848b8605Smrg */ 25848b8605Smrg 26848b8605Smrg#include "si_pipe.h" 27848b8605Smrg#include "si_public.h" 28b8e80941Smrg#include "si_shader_internal.h" 29b8e80941Smrg#include "si_compute.h" 30848b8605Smrg#include "sid.h" 31848b8605Smrg 32b8e80941Smrg#include "ac_llvm_util.h" 33848b8605Smrg#include "radeon/radeon_uvd.h" 34b8e80941Smrg#include "gallivm/lp_bld_misc.h" 35b8e80941Smrg#include "util/disk_cache.h" 36b8e80941Smrg#include "util/u_log.h" 37848b8605Smrg#include "util/u_memory.h" 38b8e80941Smrg#include "util/u_suballoc.h" 39b8e80941Smrg#include "util/u_tests.h" 40b8e80941Smrg#include "util/u_upload_mgr.h" 41b8e80941Smrg#include "util/xmlconfig.h" 42848b8605Smrg#include "vl/vl_decoder.h" 43b8e80941Smrg#include "driver_ddebug/dd_util.h" 44b8e80941Smrg 45b8e80941Smrgstatic const struct debug_named_value debug_options[] = { 46b8e80941Smrg /* Shader logging options: */ 47b8e80941Smrg { "vs", DBG(VS), "Print vertex shaders" }, 48b8e80941Smrg { "ps", DBG(PS), "Print pixel shaders" }, 49b8e80941Smrg { "gs", DBG(GS), "Print geometry shaders" }, 50b8e80941Smrg { "tcs", DBG(TCS), "Print tessellation control shaders" }, 51b8e80941Smrg { "tes", DBG(TES), "Print tessellation evaluation shaders" }, 52b8e80941Smrg { "cs", DBG(CS), "Print compute shaders" }, 53b8e80941Smrg { "noir", DBG(NO_IR), "Don't print the LLVM IR"}, 54b8e80941Smrg { "notgsi", DBG(NO_TGSI), "Don't print the TGSI"}, 55b8e80941Smrg { "noasm", DBG(NO_ASM), "Don't print disassembled shaders"}, 56b8e80941Smrg { "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" }, 57b8e80941Smrg 58b8e80941Smrg /* Shader compiler options the shader cache should be aware of: */ 59b8e80941Smrg { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" }, 60b8e80941Smrg { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." }, 61b8e80941Smrg { "gisel", DBG(GISEL), "Enable LLVM global instruction selector." }, 62b8e80941Smrg 63b8e80941Smrg /* Shader compiler options (with no effect on the shader cache): */ 64b8e80941Smrg { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" }, 65b8e80941Smrg { "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" }, 66b8e80941Smrg { "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." }, 67b8e80941Smrg 68b8e80941Smrg /* Information logging options: */ 69b8e80941Smrg { "info", DBG(INFO), "Print driver information" }, 70b8e80941Smrg { "tex", DBG(TEX), "Print texture info" }, 71b8e80941Smrg { "compute", DBG(COMPUTE), "Print compute info" }, 72b8e80941Smrg { "vm", DBG(VM), "Print virtual addresses when creating resources" }, 73b8e80941Smrg 74b8e80941Smrg /* Driver options: */ 75b8e80941Smrg { "forcedma", DBG(FORCE_DMA), "Use asynchronous DMA for all operations when possible." }, 76b8e80941Smrg { "nodma", DBG(NO_ASYNC_DMA), "Disable asynchronous DMA" }, 77b8e80941Smrg { "nowc", DBG(NO_WC), "Disable GTT write combining" }, 78b8e80941Smrg { "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." }, 79b8e80941Smrg { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." }, 80b8e80941Smrg { "zerovram", DBG(ZERO_VRAM), "Clear VRAM allocations." }, 81b8e80941Smrg 82b8e80941Smrg /* 3D engine options: */ 83b8e80941Smrg { "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." }, 84b8e80941Smrg { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" }, 85b8e80941Smrg { "nodpbb", DBG(NO_DPBB), "Disable DPBB." }, 86b8e80941Smrg { "nodfsm", DBG(NO_DFSM), "Disable DFSM." }, 87b8e80941Smrg { "dpbb", DBG(DPBB), "Enable DPBB." }, 88b8e80941Smrg { "dfsm", DBG(DFSM), "Enable DFSM." }, 89b8e80941Smrg { "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" }, 90b8e80941Smrg { "norbplus", DBG(NO_RB_PLUS), "Disable RB+." }, 91b8e80941Smrg { "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" }, 92b8e80941Smrg { "notiling", DBG(NO_TILING), "Disable tiling" }, 93b8e80941Smrg { "nodcc", DBG(NO_DCC), "Disable DCC." }, 94b8e80941Smrg { "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." }, 95b8e80941Smrg { "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" }, 96b8e80941Smrg { "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" }, 97b8e80941Smrg { "nofmask", DBG(NO_FMASK), "Disable MSAA compression" }, 98b8e80941Smrg 99b8e80941Smrg /* Tests: */ 100b8e80941Smrg { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." }, 101b8e80941Smrg { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." }, 102b8e80941Smrg { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." }, 103b8e80941Smrg { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." }, 104b8e80941Smrg { "testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance" }, 105b8e80941Smrg { "testgds", DBG(TEST_GDS), "Test GDS." }, 106b8e80941Smrg { "testgdsmm", DBG(TEST_GDS_MM), "Test GDS memory management." }, 107b8e80941Smrg { "testgdsoamm", DBG(TEST_GDS_OA_MM), "Test GDS OA memory management." }, 108b8e80941Smrg 109b8e80941Smrg DEBUG_NAMED_VALUE_END /* must be last */ 110b8e80941Smrg}; 111b8e80941Smrg 112b8e80941Smrgstatic void si_init_compiler(struct si_screen *sscreen, 113b8e80941Smrg struct ac_llvm_compiler *compiler) 114b8e80941Smrg{ 115b8e80941Smrg /* Only create the less-optimizing version of the compiler on APUs 116b8e80941Smrg * predating Ryzen (Raven). */ 117b8e80941Smrg bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && 118b8e80941Smrg sscreen->info.chip_class <= VI; 119b8e80941Smrg 120b8e80941Smrg enum ac_target_machine_options tm_options = 121b8e80941Smrg (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | 122b8e80941Smrg (sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) | 123ac028361Smrg (sscreen->info.chip_class >= VI ? AC_TM_FORCE_ENABLE_XNACK : 0) | 124ac028361Smrg (sscreen->info.chip_class < VI ? AC_TM_FORCE_DISABLE_XNACK : 0) | 125b8e80941Smrg (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) | 126b8e80941Smrg (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) | 127b8e80941Smrg (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); 128b8e80941Smrg 129b8e80941Smrg ac_init_llvm_once(); 130b8e80941Smrg ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options); 131b8e80941Smrg compiler->passes = ac_create_llvm_passes(compiler->tm); 132b8e80941Smrg 133b8e80941Smrg if (compiler->low_opt_tm) 134b8e80941Smrg compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm); 135b8e80941Smrg} 136b8e80941Smrg 137b8e80941Smrgstatic void si_destroy_compiler(struct ac_llvm_compiler *compiler) 138b8e80941Smrg{ 139b8e80941Smrg ac_destroy_llvm_passes(compiler->passes); 140b8e80941Smrg ac_destroy_llvm_passes(compiler->low_opt_passes); 141b8e80941Smrg ac_destroy_llvm_compiler(compiler); 142b8e80941Smrg} 143848b8605Smrg 144848b8605Smrg/* 145848b8605Smrg * pipe_context 146848b8605Smrg */ 147848b8605Smrgstatic void si_destroy_context(struct pipe_context *context) 148848b8605Smrg{ 149848b8605Smrg struct si_context *sctx = (struct si_context *)context; 150b8e80941Smrg int i; 151b8e80941Smrg 152b8e80941Smrg util_queue_finish(&sctx->screen->shader_compiler_queue); 153b8e80941Smrg util_queue_finish(&sctx->screen->shader_compiler_queue_low_priority); 154b8e80941Smrg 155b8e80941Smrg /* Unreference the framebuffer normally to disable related logic 156b8e80941Smrg * properly. 157b8e80941Smrg */ 158b8e80941Smrg struct pipe_framebuffer_state fb = {}; 159b8e80941Smrg if (context->set_framebuffer_state) 160b8e80941Smrg context->set_framebuffer_state(context, &fb); 161848b8605Smrg 162848b8605Smrg si_release_all_descriptors(sctx); 163848b8605Smrg 164b8e80941Smrg pipe_resource_reference(&sctx->esgs_ring, NULL); 165b8e80941Smrg pipe_resource_reference(&sctx->gsvs_ring, NULL); 166b8e80941Smrg pipe_resource_reference(&sctx->tess_rings, NULL); 167848b8605Smrg pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); 168b8e80941Smrg pipe_resource_reference(&sctx->sample_pos_buffer, NULL); 169b8e80941Smrg si_resource_reference(&sctx->border_color_buffer, NULL); 170b8e80941Smrg free(sctx->border_color_table); 171b8e80941Smrg si_resource_reference(&sctx->scratch_buffer, NULL); 172b8e80941Smrg si_resource_reference(&sctx->compute_scratch_buffer, NULL); 173b8e80941Smrg si_resource_reference(&sctx->wait_mem_scratch, NULL); 174b8e80941Smrg 175b8e80941Smrg si_pm4_free_state(sctx, sctx->init_config, ~0); 176b8e80941Smrg if (sctx->init_config_gs_rings) 177b8e80941Smrg si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0); 178b8e80941Smrg for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++) 179b8e80941Smrg si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]); 180b8e80941Smrg 181b8e80941Smrg if (sctx->fixed_func_tcs_shader.cso) 182b8e80941Smrg sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso); 183b8e80941Smrg if (sctx->custom_dsa_flush) 184b8e80941Smrg sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush); 185b8e80941Smrg if (sctx->custom_blend_resolve) 186b8e80941Smrg sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_resolve); 187b8e80941Smrg if (sctx->custom_blend_fmask_decompress) 188b8e80941Smrg sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_fmask_decompress); 189b8e80941Smrg if (sctx->custom_blend_eliminate_fastclear) 190b8e80941Smrg sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_eliminate_fastclear); 191b8e80941Smrg if (sctx->custom_blend_dcc_decompress) 192b8e80941Smrg sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_dcc_decompress); 193b8e80941Smrg if (sctx->vs_blit_pos) 194b8e80941Smrg sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_pos); 195b8e80941Smrg if (sctx->vs_blit_pos_layered) 196b8e80941Smrg sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_pos_layered); 197b8e80941Smrg if (sctx->vs_blit_color) 198b8e80941Smrg sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color); 199b8e80941Smrg if (sctx->vs_blit_color_layered) 200b8e80941Smrg sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color_layered); 201b8e80941Smrg if (sctx->vs_blit_texcoord) 202b8e80941Smrg sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_texcoord); 203b8e80941Smrg if (sctx->cs_clear_buffer) 204b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer); 205b8e80941Smrg if (sctx->cs_copy_buffer) 206b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer); 207b8e80941Smrg if (sctx->cs_copy_image) 208b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image); 209b8e80941Smrg if (sctx->cs_copy_image_1d_array) 210b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1d_array); 211b8e80941Smrg if (sctx->cs_clear_render_target) 212b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target); 213b8e80941Smrg if (sctx->cs_clear_render_target_1d_array) 214b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array); 215b8e80941Smrg if (sctx->cs_dcc_retile) 216b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile); 217b8e80941Smrg 218b8e80941Smrg if (sctx->blitter) 219b8e80941Smrg util_blitter_destroy(sctx->blitter); 220b8e80941Smrg 221b8e80941Smrg /* Release DCC stats. */ 222b8e80941Smrg for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) { 223b8e80941Smrg assert(!sctx->dcc_stats[i].query_active); 224b8e80941Smrg 225b8e80941Smrg for (int j = 0; j < ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++) 226b8e80941Smrg if (sctx->dcc_stats[i].ps_stats[j]) 227b8e80941Smrg sctx->b.destroy_query(&sctx->b, 228b8e80941Smrg sctx->dcc_stats[i].ps_stats[j]); 229b8e80941Smrg 230b8e80941Smrg si_texture_reference(&sctx->dcc_stats[i].tex, NULL); 231b8e80941Smrg } 232848b8605Smrg 233b8e80941Smrg if (sctx->query_result_shader) 234b8e80941Smrg sctx->b.delete_compute_state(&sctx->b, sctx->query_result_shader); 235848b8605Smrg 236b8e80941Smrg if (sctx->gfx_cs) 237b8e80941Smrg sctx->ws->cs_destroy(sctx->gfx_cs); 238b8e80941Smrg if (sctx->dma_cs) 239b8e80941Smrg sctx->ws->cs_destroy(sctx->dma_cs); 240b8e80941Smrg if (sctx->ctx) 241b8e80941Smrg sctx->ws->ctx_destroy(sctx->ctx); 242b8e80941Smrg 243b8e80941Smrg if (sctx->b.stream_uploader) 244b8e80941Smrg u_upload_destroy(sctx->b.stream_uploader); 245b8e80941Smrg if (sctx->b.const_uploader) 246b8e80941Smrg u_upload_destroy(sctx->b.const_uploader); 247b8e80941Smrg if (sctx->cached_gtt_allocator) 248b8e80941Smrg u_upload_destroy(sctx->cached_gtt_allocator); 249b8e80941Smrg 250b8e80941Smrg slab_destroy_child(&sctx->pool_transfers); 251b8e80941Smrg slab_destroy_child(&sctx->pool_transfers_unsync); 252b8e80941Smrg 253b8e80941Smrg if (sctx->allocator_zeroed_memory) 254b8e80941Smrg u_suballocator_destroy(sctx->allocator_zeroed_memory); 255b8e80941Smrg 256b8e80941Smrg sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL); 257b8e80941Smrg sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL); 258b8e80941Smrg si_resource_reference(&sctx->eop_bug_scratch, NULL); 259848b8605Smrg 260b8e80941Smrg si_destroy_compiler(&sctx->compiler); 261848b8605Smrg 262b8e80941Smrg si_saved_cs_reference(&sctx->current_saved_cs, NULL); 263848b8605Smrg 264b8e80941Smrg _mesa_hash_table_destroy(sctx->tex_handles, NULL); 265b8e80941Smrg _mesa_hash_table_destroy(sctx->img_handles, NULL); 266b8e80941Smrg 267b8e80941Smrg util_dynarray_fini(&sctx->resident_tex_handles); 268b8e80941Smrg util_dynarray_fini(&sctx->resident_img_handles); 269b8e80941Smrg util_dynarray_fini(&sctx->resident_tex_needs_color_decompress); 270b8e80941Smrg util_dynarray_fini(&sctx->resident_img_needs_color_decompress); 271b8e80941Smrg util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress); 272b8e80941Smrg si_unref_sdma_uploads(sctx); 273848b8605Smrg FREE(sctx); 274848b8605Smrg} 275848b8605Smrg 276b8e80941Smrgstatic enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx) 277b8e80941Smrg{ 278b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 279b8e80941Smrg 280b8e80941Smrg if (sctx->screen->info.has_gpu_reset_status_query) 281b8e80941Smrg return sctx->ws->ctx_query_reset_status(sctx->ctx); 282b8e80941Smrg 283b8e80941Smrg if (sctx->screen->info.has_gpu_reset_counter_query) { 284b8e80941Smrg unsigned latest = sctx->ws->query_value(sctx->ws, 285b8e80941Smrg RADEON_GPU_RESET_COUNTER); 286b8e80941Smrg 287b8e80941Smrg if (sctx->gpu_reset_counter == latest) 288b8e80941Smrg return PIPE_NO_RESET; 289b8e80941Smrg 290b8e80941Smrg sctx->gpu_reset_counter = latest; 291b8e80941Smrg return PIPE_UNKNOWN_CONTEXT_RESET; 292b8e80941Smrg } 293b8e80941Smrg 294b8e80941Smrg return PIPE_NO_RESET; 295b8e80941Smrg} 296b8e80941Smrg 297b8e80941Smrgstatic void si_set_device_reset_callback(struct pipe_context *ctx, 298b8e80941Smrg const struct pipe_device_reset_callback *cb) 299b8e80941Smrg{ 300b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 301b8e80941Smrg 302b8e80941Smrg if (cb) 303b8e80941Smrg sctx->device_reset_callback = *cb; 304b8e80941Smrg else 305b8e80941Smrg memset(&sctx->device_reset_callback, 0, 306b8e80941Smrg sizeof(sctx->device_reset_callback)); 307b8e80941Smrg} 308b8e80941Smrg 309b8e80941Smrgbool si_check_device_reset(struct si_context *sctx) 310b8e80941Smrg{ 311b8e80941Smrg enum pipe_reset_status status; 312b8e80941Smrg 313b8e80941Smrg if (!sctx->device_reset_callback.reset) 314b8e80941Smrg return false; 315b8e80941Smrg 316b8e80941Smrg if (!sctx->b.get_device_reset_status) 317b8e80941Smrg return false; 318b8e80941Smrg 319b8e80941Smrg status = sctx->b.get_device_reset_status(&sctx->b); 320b8e80941Smrg if (status == PIPE_NO_RESET) 321b8e80941Smrg return false; 322b8e80941Smrg 323b8e80941Smrg sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status); 324b8e80941Smrg return true; 325b8e80941Smrg} 326b8e80941Smrg 327b8e80941Smrg/* Apitrace profiling: 328b8e80941Smrg * 1) qapitrace : Tools -> Profile: Measure CPU & GPU times 329b8e80941Smrg * 2) In the middle panel, zoom in (mouse wheel) on some bad draw call 330b8e80941Smrg * and remember its number. 331b8e80941Smrg * 3) In Mesa, enable queries and performance counters around that draw 332b8e80941Smrg * call and print the results. 333b8e80941Smrg * 4) glretrace --benchmark --markers .. 334b8e80941Smrg */ 335b8e80941Smrgstatic void si_emit_string_marker(struct pipe_context *ctx, 336b8e80941Smrg const char *string, int len) 337b8e80941Smrg{ 338b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 339b8e80941Smrg 340b8e80941Smrg dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number); 341b8e80941Smrg 342b8e80941Smrg if (sctx->log) 343b8e80941Smrg u_log_printf(sctx->log, "\nString marker: %*s\n", len, string); 344b8e80941Smrg} 345b8e80941Smrg 346b8e80941Smrgstatic void si_set_debug_callback(struct pipe_context *ctx, 347b8e80941Smrg const struct pipe_debug_callback *cb) 348b8e80941Smrg{ 349b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 350b8e80941Smrg struct si_screen *screen = sctx->screen; 351b8e80941Smrg 352b8e80941Smrg util_queue_finish(&screen->shader_compiler_queue); 353b8e80941Smrg util_queue_finish(&screen->shader_compiler_queue_low_priority); 354b8e80941Smrg 355b8e80941Smrg if (cb) 356b8e80941Smrg sctx->debug = *cb; 357b8e80941Smrg else 358b8e80941Smrg memset(&sctx->debug, 0, sizeof(sctx->debug)); 359b8e80941Smrg} 360b8e80941Smrg 361b8e80941Smrgstatic void si_set_log_context(struct pipe_context *ctx, 362b8e80941Smrg struct u_log_context *log) 363b8e80941Smrg{ 364b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 365b8e80941Smrg sctx->log = log; 366b8e80941Smrg 367b8e80941Smrg if (log) 368b8e80941Smrg u_log_add_auto_logger(log, si_auto_log_cs, sctx); 369b8e80941Smrg} 370b8e80941Smrg 371b8e80941Smrgstatic void si_set_context_param(struct pipe_context *ctx, 372b8e80941Smrg enum pipe_context_param param, 373b8e80941Smrg unsigned value) 374b8e80941Smrg{ 375b8e80941Smrg struct radeon_winsys *ws = ((struct si_context *)ctx)->ws; 376b8e80941Smrg 377b8e80941Smrg switch (param) { 378b8e80941Smrg case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE: 379b8e80941Smrg ws->pin_threads_to_L3_cache(ws, value); 380b8e80941Smrg break; 381b8e80941Smrg default:; 382b8e80941Smrg } 383b8e80941Smrg} 384b8e80941Smrg 385b8e80941Smrgstatic struct pipe_context *si_create_context(struct pipe_screen *screen, 386b8e80941Smrg unsigned flags) 387848b8605Smrg{ 388848b8605Smrg struct si_context *sctx = CALLOC_STRUCT(si_context); 389848b8605Smrg struct si_screen* sscreen = (struct si_screen *)screen; 390b8e80941Smrg struct radeon_winsys *ws = sscreen->ws; 391848b8605Smrg int shader, i; 392b8e80941Smrg bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0; 393848b8605Smrg 394b8e80941Smrg if (!sctx) 395848b8605Smrg return NULL; 396848b8605Smrg 397b8e80941Smrg sctx->has_graphics = sscreen->info.chip_class == SI || 398b8e80941Smrg !(flags & PIPE_CONTEXT_COMPUTE_ONLY); 399b8e80941Smrg 400b8e80941Smrg if (flags & PIPE_CONTEXT_DEBUG) 401b8e80941Smrg sscreen->record_llvm_ir = true; /* racy but not critical */ 402b8e80941Smrg 403b8e80941Smrg sctx->b.screen = screen; /* this must be set first */ 404b8e80941Smrg sctx->b.priv = NULL; 405b8e80941Smrg sctx->b.destroy = si_destroy_context; 406848b8605Smrg sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ 407b8e80941Smrg sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; 408b8e80941Smrg 409b8e80941Smrg slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers); 410b8e80941Smrg slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers); 411b8e80941Smrg 412b8e80941Smrg sctx->ws = sscreen->ws; 413b8e80941Smrg sctx->family = sscreen->info.family; 414b8e80941Smrg sctx->chip_class = sscreen->info.chip_class; 415b8e80941Smrg 416b8e80941Smrg if (sscreen->info.has_gpu_reset_counter_query) { 417b8e80941Smrg sctx->gpu_reset_counter = 418b8e80941Smrg sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER); 419b8e80941Smrg } 420b8e80941Smrg 421b8e80941Smrg 422b8e80941Smrg if (sctx->chip_class == CIK || 423b8e80941Smrg sctx->chip_class == VI || 424b8e80941Smrg sctx->chip_class == GFX9) { 425b8e80941Smrg sctx->eop_bug_scratch = si_resource( 426b8e80941Smrg pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 427b8e80941Smrg 16 * sscreen->info.num_render_backends)); 428b8e80941Smrg if (!sctx->eop_bug_scratch) 429b8e80941Smrg goto fail; 430b8e80941Smrg } 431b8e80941Smrg 432b8e80941Smrg /* Initialize context allocators. */ 433b8e80941Smrg sctx->allocator_zeroed_memory = 434b8e80941Smrg u_suballocator_create(&sctx->b, 128 * 1024, 435b8e80941Smrg 0, PIPE_USAGE_DEFAULT, 436b8e80941Smrg SI_RESOURCE_FLAG_UNMAPPABLE | 437b8e80941Smrg SI_RESOURCE_FLAG_CLEAR, false); 438b8e80941Smrg if (!sctx->allocator_zeroed_memory) 439b8e80941Smrg goto fail; 440b8e80941Smrg 441b8e80941Smrg sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024, 442b8e80941Smrg 0, PIPE_USAGE_STREAM, 443b8e80941Smrg SI_RESOURCE_FLAG_READ_ONLY); 444b8e80941Smrg if (!sctx->b.stream_uploader) 445b8e80941Smrg goto fail; 446b8e80941Smrg 447b8e80941Smrg sctx->cached_gtt_allocator = u_upload_create(&sctx->b, 16 * 1024, 448b8e80941Smrg 0, PIPE_USAGE_STAGING, 0); 449b8e80941Smrg if (!sctx->cached_gtt_allocator) 450b8e80941Smrg goto fail; 451b8e80941Smrg 452b8e80941Smrg sctx->ctx = sctx->ws->ctx_create(sctx->ws); 453b8e80941Smrg if (!sctx->ctx) 454b8e80941Smrg goto fail; 455b8e80941Smrg 456b8e80941Smrg if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) { 457b8e80941Smrg sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA, 458b8e80941Smrg (void*)si_flush_dma_cs, 459b8e80941Smrg sctx, stop_exec_on_failure); 460b8e80941Smrg } 461b8e80941Smrg 462b8e80941Smrg bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->dma_cs; 463b8e80941Smrg sctx->b.const_uploader = u_upload_create(&sctx->b, 256 * 1024, 464b8e80941Smrg 0, PIPE_USAGE_DEFAULT, 465b8e80941Smrg SI_RESOURCE_FLAG_32BIT | 466b8e80941Smrg (use_sdma_upload ? 467b8e80941Smrg SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : 0)); 468b8e80941Smrg if (!sctx->b.const_uploader) 469b8e80941Smrg goto fail; 470b8e80941Smrg 471b8e80941Smrg if (use_sdma_upload) 472b8e80941Smrg u_upload_enable_flush_explicit(sctx->b.const_uploader); 473b8e80941Smrg 474b8e80941Smrg sctx->gfx_cs = ws->cs_create(sctx->ctx, 475b8e80941Smrg sctx->has_graphics ? RING_GFX : RING_COMPUTE, 476b8e80941Smrg (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure); 477b8e80941Smrg 478b8e80941Smrg /* Border colors. */ 479b8e80941Smrg sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * 480b8e80941Smrg sizeof(*sctx->border_color_table)); 481b8e80941Smrg if (!sctx->border_color_table) 482b8e80941Smrg goto fail; 483b8e80941Smrg 484b8e80941Smrg sctx->border_color_buffer = si_resource( 485b8e80941Smrg pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 486b8e80941Smrg SI_MAX_BORDER_COLORS * 487b8e80941Smrg sizeof(*sctx->border_color_table))); 488b8e80941Smrg if (!sctx->border_color_buffer) 489b8e80941Smrg goto fail; 490848b8605Smrg 491b8e80941Smrg sctx->border_color_map = 492b8e80941Smrg ws->buffer_map(sctx->border_color_buffer->buf, 493b8e80941Smrg NULL, PIPE_TRANSFER_WRITE); 494b8e80941Smrg if (!sctx->border_color_map) 495848b8605Smrg goto fail; 496848b8605Smrg 497b8e80941Smrg /* Initialize context functions used by graphics and compute. */ 498b8e80941Smrg sctx->b.emit_string_marker = si_emit_string_marker; 499b8e80941Smrg sctx->b.set_debug_callback = si_set_debug_callback; 500b8e80941Smrg sctx->b.set_log_context = si_set_log_context; 501b8e80941Smrg sctx->b.set_context_param = si_set_context_param; 502b8e80941Smrg sctx->b.get_device_reset_status = si_get_reset_status; 503b8e80941Smrg sctx->b.set_device_reset_callback = si_set_device_reset_callback; 504b8e80941Smrg 505b8e80941Smrg si_init_all_descriptors(sctx); 506b8e80941Smrg si_init_buffer_functions(sctx); 507b8e80941Smrg si_init_clear_functions(sctx); 508848b8605Smrg si_init_blit_functions(sctx); 509848b8605Smrg si_init_compute_functions(sctx); 510b8e80941Smrg si_init_compute_blit_functions(sctx); 511b8e80941Smrg si_init_debug_functions(sctx); 512b8e80941Smrg si_init_fence_functions(sctx); 513b8e80941Smrg si_init_state_compute_functions(sctx); 514b8e80941Smrg 515b8e80941Smrg /* Initialize graphics-only context functions. */ 516b8e80941Smrg if (sctx->has_graphics) { 517b8e80941Smrg si_init_context_texture_functions(sctx); 518b8e80941Smrg si_init_query_functions(sctx); 519b8e80941Smrg si_init_msaa_functions(sctx); 520b8e80941Smrg si_init_shader_functions(sctx); 521b8e80941Smrg si_init_state_functions(sctx); 522b8e80941Smrg si_init_streamout_functions(sctx); 523b8e80941Smrg si_init_viewport_functions(sctx); 524848b8605Smrg 525b8e80941Smrg sctx->blitter = util_blitter_create(&sctx->b); 526b8e80941Smrg if (sctx->blitter == NULL) 527b8e80941Smrg goto fail; 528b8e80941Smrg sctx->blitter->skip_viewport_restore = true; 529b8e80941Smrg 530b8e80941Smrg si_init_draw_functions(sctx); 531848b8605Smrg } 532848b8605Smrg 533b8e80941Smrg /* Initialize SDMA functions. */ 534b8e80941Smrg if (sctx->chip_class >= CIK) 535b8e80941Smrg cik_init_sdma_functions(sctx); 536b8e80941Smrg else 537b8e80941Smrg si_init_dma_functions(sctx); 538848b8605Smrg 539b8e80941Smrg if (sscreen->debug_flags & DBG(FORCE_DMA)) 540b8e80941Smrg sctx->b.resource_copy_region = sctx->dma_copy; 541848b8605Smrg 542b8e80941Smrg sctx->sample_mask = 0xffff; 543848b8605Smrg 544b8e80941Smrg /* Initialize multimedia functions. */ 545b8e80941Smrg if (sscreen->info.has_hw_decode) { 546b8e80941Smrg sctx->b.create_video_codec = si_uvd_create_decoder; 547b8e80941Smrg sctx->b.create_video_buffer = si_video_buffer_create; 548b8e80941Smrg } else { 549b8e80941Smrg sctx->b.create_video_codec = vl_create_decoder; 550b8e80941Smrg sctx->b.create_video_buffer = vl_video_buffer_create; 551b8e80941Smrg } 552848b8605Smrg 553b8e80941Smrg if (sctx->chip_class >= GFX9) { 554b8e80941Smrg sctx->wait_mem_scratch = si_resource( 555b8e80941Smrg pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4)); 556b8e80941Smrg if (!sctx->wait_mem_scratch) 557b8e80941Smrg goto fail; 558848b8605Smrg 559b8e80941Smrg /* Initialize the memory. */ 560b8e80941Smrg si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, 561b8e80941Smrg V_370_MEM, V_370_ME, &sctx->wait_mem_number); 562848b8605Smrg } 563848b8605Smrg 564b8e80941Smrg /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads 565b8e80941Smrg * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ 566b8e80941Smrg if (sctx->chip_class == CIK) { 567b8e80941Smrg sctx->null_const_buf.buffer = 568b8e80941Smrg pipe_aligned_buffer_create(screen, 569b8e80941Smrg SI_RESOURCE_FLAG_32BIT, 570b8e80941Smrg PIPE_USAGE_DEFAULT, 16, 571b8e80941Smrg sctx->screen->info.tcc_cache_line_size); 572b8e80941Smrg if (!sctx->null_const_buf.buffer) 573b8e80941Smrg goto fail; 574848b8605Smrg sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0; 575848b8605Smrg 576b8e80941Smrg unsigned start_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE; 577b8e80941Smrg for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) { 578848b8605Smrg for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) { 579b8e80941Smrg sctx->b.set_constant_buffer(&sctx->b, shader, i, 580848b8605Smrg &sctx->null_const_buf); 581848b8605Smrg } 582848b8605Smrg } 583848b8605Smrg 584b8e80941Smrg si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, 585b8e80941Smrg &sctx->null_const_buf); 586b8e80941Smrg si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, 587b8e80941Smrg &sctx->null_const_buf); 588b8e80941Smrg si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES, 589b8e80941Smrg &sctx->null_const_buf); 590b8e80941Smrg si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, 591b8e80941Smrg &sctx->null_const_buf); 592b8e80941Smrg si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, 593b8e80941Smrg &sctx->null_const_buf); 594848b8605Smrg } 595848b8605Smrg 596b8e80941Smrg uint64_t max_threads_per_block; 597b8e80941Smrg screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, 598b8e80941Smrg PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, 599b8e80941Smrg &max_threads_per_block); 600b8e80941Smrg 601b8e80941Smrg /* The maximum number of scratch waves. Scratch space isn't divided 602b8e80941Smrg * evenly between CUs. The number is only a function of the number of CUs. 603b8e80941Smrg * We can decrease the constant to decrease the scratch buffer size. 604b8e80941Smrg * 605b8e80941Smrg * sctx->scratch_waves must be >= the maximum posible size of 606b8e80941Smrg * 1 threadgroup, so that the hw doesn't hang from being unable 607b8e80941Smrg * to start any. 608b8e80941Smrg * 609b8e80941Smrg * The recommended value is 4 per CU at most. Higher numbers don't 610b8e80941Smrg * bring much benefit, but they still occupy chip resources (think 611b8e80941Smrg * async compute). I've seen ~2% performance difference between 4 and 32. 612b8e80941Smrg */ 613b8e80941Smrg sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units, 614b8e80941Smrg max_threads_per_block / 64); 615b8e80941Smrg 616b8e80941Smrg si_init_compiler(sscreen, &sctx->compiler); 617b8e80941Smrg 618b8e80941Smrg /* Bindless handles. */ 619b8e80941Smrg sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 620b8e80941Smrg _mesa_key_pointer_equal); 621b8e80941Smrg sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 622b8e80941Smrg _mesa_key_pointer_equal); 623b8e80941Smrg 624b8e80941Smrg util_dynarray_init(&sctx->resident_tex_handles, NULL); 625b8e80941Smrg util_dynarray_init(&sctx->resident_img_handles, NULL); 626b8e80941Smrg util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL); 627b8e80941Smrg util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL); 628b8e80941Smrg util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL); 629b8e80941Smrg 630b8e80941Smrg sctx->sample_pos_buffer = 631b8e80941Smrg pipe_buffer_create(sctx->b.screen, 0, PIPE_USAGE_DEFAULT, 632b8e80941Smrg sizeof(sctx->sample_positions)); 633b8e80941Smrg pipe_buffer_write(&sctx->b, sctx->sample_pos_buffer, 0, 634b8e80941Smrg sizeof(sctx->sample_positions), &sctx->sample_positions); 635b8e80941Smrg 636b8e80941Smrg /* this must be last */ 637b8e80941Smrg si_begin_new_gfx_cs(sctx); 638b8e80941Smrg 639b8e80941Smrg if (sctx->chip_class == CIK) { 640b8e80941Smrg /* Clear the NULL constant buffer, because loads should return zeros. 641b8e80941Smrg * Note that this forces CP DMA to be used, because clover deadlocks 642b8e80941Smrg * for some reason when the compute codepath is used. 643b8e80941Smrg */ 644b8e80941Smrg uint32_t clear_value = 0; 645b8e80941Smrg si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, 646b8e80941Smrg sctx->null_const_buf.buffer->width0, 647b8e80941Smrg &clear_value, 4, SI_COHERENCY_SHADER, true); 648b8e80941Smrg } 649b8e80941Smrg return &sctx->b; 650848b8605Smrgfail: 651b8e80941Smrg fprintf(stderr, "radeonsi: Failed to create a context.\n"); 652b8e80941Smrg si_destroy_context(&sctx->b); 653848b8605Smrg return NULL; 654848b8605Smrg} 655848b8605Smrg 656b8e80941Smrgstatic struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, 657b8e80941Smrg void *priv, unsigned flags) 658b8e80941Smrg{ 659b8e80941Smrg struct si_screen *sscreen = (struct si_screen *)screen; 660b8e80941Smrg struct pipe_context *ctx; 661b8e80941Smrg 662b8e80941Smrg if (sscreen->debug_flags & DBG(CHECK_VM)) 663b8e80941Smrg flags |= PIPE_CONTEXT_DEBUG; 664b8e80941Smrg 665b8e80941Smrg ctx = si_create_context(screen, flags); 666b8e80941Smrg 667b8e80941Smrg if (!(flags & PIPE_CONTEXT_PREFER_THREADED)) 668b8e80941Smrg return ctx; 669b8e80941Smrg 670b8e80941Smrg /* Clover (compute-only) is unsupported. */ 671b8e80941Smrg if (flags & PIPE_CONTEXT_COMPUTE_ONLY) 672b8e80941Smrg return ctx; 673b8e80941Smrg 674b8e80941Smrg /* When shaders are logged to stderr, asynchronous compilation is 675b8e80941Smrg * disabled too. */ 676b8e80941Smrg if (sscreen->debug_flags & DBG_ALL_SHADERS) 677b8e80941Smrg return ctx; 678b8e80941Smrg 679b8e80941Smrg /* Use asynchronous flushes only on amdgpu, since the radeon 680b8e80941Smrg * implementation for fence_server_sync is incomplete. */ 681b8e80941Smrg return threaded_context_create(ctx, &sscreen->pool_transfers, 682b8e80941Smrg si_replace_buffer_storage, 683b8e80941Smrg sscreen->info.drm_major >= 3 ? si_create_fence : NULL, 684b8e80941Smrg &((struct si_context*)ctx)->tc); 685b8e80941Smrg} 686b8e80941Smrg 687848b8605Smrg/* 688848b8605Smrg * pipe_screen 689848b8605Smrg */ 690b8e80941Smrgstatic void si_destroy_screen(struct pipe_screen* pscreen) 691848b8605Smrg{ 692848b8605Smrg struct si_screen *sscreen = (struct si_screen *)pscreen; 693b8e80941Smrg struct si_shader_part *parts[] = { 694b8e80941Smrg sscreen->vs_prologs, 695b8e80941Smrg sscreen->tcs_epilogs, 696b8e80941Smrg sscreen->gs_prologs, 697b8e80941Smrg sscreen->ps_prologs, 698b8e80941Smrg sscreen->ps_epilogs 699b8e80941Smrg }; 700b8e80941Smrg unsigned i; 701b8e80941Smrg 702b8e80941Smrg if (!sscreen->ws->unref(sscreen->ws)) 703b8e80941Smrg return; 704848b8605Smrg 705b8e80941Smrg mtx_destroy(&sscreen->aux_context_lock); 706b8e80941Smrg 707b8e80941Smrg struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log; 708b8e80941Smrg if (aux_log) { 709b8e80941Smrg sscreen->aux_context->set_log_context(sscreen->aux_context, NULL); 710b8e80941Smrg u_log_context_destroy(aux_log); 711b8e80941Smrg FREE(aux_log); 712b8e80941Smrg } 713b8e80941Smrg 714b8e80941Smrg sscreen->aux_context->destroy(sscreen->aux_context); 715b8e80941Smrg 716b8e80941Smrg util_queue_destroy(&sscreen->shader_compiler_queue); 717b8e80941Smrg util_queue_destroy(&sscreen->shader_compiler_queue_low_priority); 718b8e80941Smrg 719b8e80941Smrg for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++) 720b8e80941Smrg si_destroy_compiler(&sscreen->compiler[i]); 721b8e80941Smrg 722b8e80941Smrg for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++) 723b8e80941Smrg si_destroy_compiler(&sscreen->compiler_lowp[i]); 724b8e80941Smrg 725b8e80941Smrg /* Free shader parts. */ 726b8e80941Smrg for (i = 0; i < ARRAY_SIZE(parts); i++) { 727b8e80941Smrg while (parts[i]) { 728b8e80941Smrg struct si_shader_part *part = parts[i]; 729b8e80941Smrg 730b8e80941Smrg parts[i] = part->next; 731b8e80941Smrg ac_shader_binary_clean(&part->binary); 732b8e80941Smrg FREE(part); 733b8e80941Smrg } 734b8e80941Smrg } 735b8e80941Smrg mtx_destroy(&sscreen->shader_parts_mutex); 736b8e80941Smrg si_destroy_shader_cache(sscreen); 737b8e80941Smrg 738b8e80941Smrg si_destroy_perfcounters(sscreen); 739b8e80941Smrg si_gpu_load_kill_thread(sscreen); 740b8e80941Smrg 741b8e80941Smrg mtx_destroy(&sscreen->gpu_load_mutex); 742b8e80941Smrg 743b8e80941Smrg slab_destroy_parent(&sscreen->pool_transfers); 744b8e80941Smrg 745b8e80941Smrg disk_cache_destroy(sscreen->disk_shader_cache); 746b8e80941Smrg sscreen->ws->destroy(sscreen->ws); 747b8e80941Smrg FREE(sscreen); 748848b8605Smrg} 749848b8605Smrg 750b8e80941Smrgstatic void si_init_gs_info(struct si_screen *sscreen) 751848b8605Smrg{ 752b8e80941Smrg sscreen->gs_table_depth = ac_get_gs_table_depth(sscreen->info.chip_class, 753b8e80941Smrg sscreen->info.family); 754b8e80941Smrg} 755b8e80941Smrg 756b8e80941Smrgstatic void si_test_vmfault(struct si_screen *sscreen) 757b8e80941Smrg{ 758b8e80941Smrg struct pipe_context *ctx = sscreen->aux_context; 759b8e80941Smrg struct si_context *sctx = (struct si_context *)ctx; 760b8e80941Smrg struct pipe_resource *buf = 761b8e80941Smrg pipe_buffer_create_const0(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 64); 762b8e80941Smrg 763b8e80941Smrg if (!buf) { 764b8e80941Smrg puts("Buffer allocation failed."); 765b8e80941Smrg exit(1); 766848b8605Smrg } 767848b8605Smrg 768b8e80941Smrg si_resource(buf)->gpu_address = 0; /* cause a VM fault */ 769b8e80941Smrg 770b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) { 771b8e80941Smrg si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, 772b8e80941Smrg SI_COHERENCY_NONE, L2_BYPASS); 773b8e80941Smrg ctx->flush(ctx, NULL, 0); 774b8e80941Smrg puts("VM fault test: CP - done."); 775b8e80941Smrg } 776b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) { 777b8e80941Smrg si_sdma_clear_buffer(sctx, buf, 0, 4, 0); 778b8e80941Smrg ctx->flush(ctx, NULL, 0); 779b8e80941Smrg puts("VM fault test: SDMA - done."); 780b8e80941Smrg } 781b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_VMFAULT_SHADER)) { 782b8e80941Smrg util_test_constant_buffer(ctx, buf); 783b8e80941Smrg puts("VM fault test: Shader - done."); 784b8e80941Smrg } 785b8e80941Smrg exit(0); 786848b8605Smrg} 787848b8605Smrg 788b8e80941Smrgstatic void si_test_gds_memory_management(struct si_context *sctx, 789b8e80941Smrg unsigned alloc_size, unsigned alignment, 790b8e80941Smrg enum radeon_bo_domain domain) 791848b8605Smrg{ 792b8e80941Smrg struct radeon_winsys *ws = sctx->ws; 793b8e80941Smrg struct radeon_cmdbuf *cs[8]; 794b8e80941Smrg struct pb_buffer *gds_bo[ARRAY_SIZE(cs)]; 795b8e80941Smrg 796b8e80941Smrg for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { 797b8e80941Smrg cs[i] = ws->cs_create(sctx->ctx, RING_COMPUTE, 798b8e80941Smrg NULL, NULL, false); 799b8e80941Smrg gds_bo[i] = ws->buffer_create(ws, alloc_size, alignment, domain, 0); 800b8e80941Smrg assert(gds_bo[i]); 801b8e80941Smrg } 802848b8605Smrg 803b8e80941Smrg for (unsigned iterations = 0; iterations < 20000; iterations++) { 804b8e80941Smrg for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { 805b8e80941Smrg /* This clears GDS with CP DMA. 806b8e80941Smrg * 807b8e80941Smrg * We don't care if GDS is present. Just add some packet 808b8e80941Smrg * to make the GPU busy for a moment. 809b8e80941Smrg */ 810b8e80941Smrg si_cp_dma_clear_buffer(sctx, cs[i], NULL, 0, alloc_size, 0, 811b8e80941Smrg SI_CPDMA_SKIP_BO_LIST_UPDATE | 812b8e80941Smrg SI_CPDMA_SKIP_CHECK_CS_SPACE | 813b8e80941Smrg SI_CPDMA_SKIP_GFX_SYNC, 0, 0); 814b8e80941Smrg 815b8e80941Smrg ws->cs_add_buffer(cs[i], gds_bo[i], domain, 816b8e80941Smrg RADEON_USAGE_READWRITE, 0); 817b8e80941Smrg ws->cs_flush(cs[i], PIPE_FLUSH_ASYNC, NULL); 818b8e80941Smrg } 819b8e80941Smrg } 820b8e80941Smrg exit(0); 821b8e80941Smrg} 822b8e80941Smrg 823b8e80941Smrgstatic void si_disk_cache_create(struct si_screen *sscreen) 824b8e80941Smrg{ 825b8e80941Smrg /* Don't use the cache if shader dumping is enabled. */ 826b8e80941Smrg if (sscreen->debug_flags & DBG_ALL_SHADERS) 827848b8605Smrg return; 828848b8605Smrg 829b8e80941Smrg struct mesa_sha1 ctx; 830b8e80941Smrg unsigned char sha1[20]; 831b8e80941Smrg char cache_id[20 * 2 + 1]; 832b8e80941Smrg 833b8e80941Smrg _mesa_sha1_init(&ctx); 834b8e80941Smrg 835b8e80941Smrg if (!disk_cache_get_function_identifier(si_disk_cache_create, &ctx) || 836b8e80941Smrg !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, 837b8e80941Smrg &ctx)) 838848b8605Smrg return; 839848b8605Smrg 840b8e80941Smrg _mesa_sha1_final(&ctx, sha1); 841b8e80941Smrg disk_cache_format_hex_id(cache_id, sha1, 20 * 2); 842848b8605Smrg 843b8e80941Smrg /* These flags affect shader compilation. */ 844b8e80941Smrg #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ 845b8e80941Smrg DBG(SI_SCHED) | \ 846b8e80941Smrg DBG(GISEL) | \ 847b8e80941Smrg DBG(UNSAFE_MATH)) 848b8e80941Smrg uint64_t shader_debug_flags = sscreen->debug_flags & 849b8e80941Smrg ALL_FLAGS; 850848b8605Smrg 851b8e80941Smrg /* Add the high bits of 32-bit addresses, which affects 852b8e80941Smrg * how 32-bit addresses are expanded to 64 bits. 853b8e80941Smrg */ 854b8e80941Smrg STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); 855b8e80941Smrg assert((int16_t)sscreen->info.address32_hi == (int32_t)sscreen->info.address32_hi); 856b8e80941Smrg shader_debug_flags |= (uint64_t)(sscreen->info.address32_hi & 0xffff) << 32; 857b8e80941Smrg 858b8e80941Smrg if (sscreen->options.enable_nir) 859b8e80941Smrg shader_debug_flags |= 1ull << 48; 860b8e80941Smrg 861b8e80941Smrg sscreen->disk_shader_cache = 862b8e80941Smrg disk_cache_create(sscreen->info.name, 863b8e80941Smrg cache_id, 864b8e80941Smrg shader_debug_flags); 865b8e80941Smrg} 866b8e80941Smrg 867b8e80941Smrgstatic void si_set_max_shader_compiler_threads(struct pipe_screen *screen, 868b8e80941Smrg unsigned max_threads) 869848b8605Smrg{ 870b8e80941Smrg struct si_screen *sscreen = (struct si_screen *)screen; 871848b8605Smrg 872b8e80941Smrg /* This function doesn't allow a greater number of threads than 873b8e80941Smrg * the queue had at its creation. */ 874b8e80941Smrg util_queue_adjust_num_threads(&sscreen->shader_compiler_queue, 875b8e80941Smrg max_threads); 876b8e80941Smrg /* Don't change the number of threads on the low priority queue. */ 877b8e80941Smrg} 878848b8605Smrg 879b8e80941Smrgstatic bool si_is_parallel_shader_compilation_finished(struct pipe_screen *screen, 880b8e80941Smrg void *shader, 881b8e80941Smrg unsigned shader_type) 882b8e80941Smrg{ 883b8e80941Smrg if (shader_type == PIPE_SHADER_COMPUTE) { 884b8e80941Smrg struct si_compute *cs = (struct si_compute*)shader; 885848b8605Smrg 886b8e80941Smrg return util_queue_fence_is_signalled(&cs->ready); 887848b8605Smrg } 888b8e80941Smrg struct si_shader_selector *sel = (struct si_shader_selector *)shader; 889b8e80941Smrg 890b8e80941Smrg return util_queue_fence_is_signalled(&sel->ready); 891848b8605Smrg} 892848b8605Smrg 893b8e80941Smrgstruct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, 894b8e80941Smrg const struct pipe_screen_config *config) 895848b8605Smrg{ 896848b8605Smrg struct si_screen *sscreen = CALLOC_STRUCT(si_screen); 897b8e80941Smrg unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; 898b8e80941Smrg 899b8e80941Smrg if (!sscreen) { 900848b8605Smrg return NULL; 901848b8605Smrg } 902848b8605Smrg 903b8e80941Smrg sscreen->ws = ws; 904b8e80941Smrg ws->query_info(ws, &sscreen->info); 905b8e80941Smrg 906b8e80941Smrg if (sscreen->info.chip_class >= GFX9) { 907b8e80941Smrg sscreen->se_tile_repeat = 32 * sscreen->info.max_se; 908b8e80941Smrg } else { 909b8e80941Smrg ac_get_raster_config(&sscreen->info, 910b8e80941Smrg &sscreen->pa_sc_raster_config, 911b8e80941Smrg &sscreen->pa_sc_raster_config_1, 912b8e80941Smrg &sscreen->se_tile_repeat); 913b8e80941Smrg } 914b8e80941Smrg 915b8e80941Smrg sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", 916b8e80941Smrg debug_options, 0); 917b8e80941Smrg sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG", 918b8e80941Smrg debug_options, 0); 919b8e80941Smrg 920848b8605Smrg /* Set functions first. */ 921b8e80941Smrg sscreen->b.context_create = si_pipe_create_context; 922b8e80941Smrg sscreen->b.destroy = si_destroy_screen; 923b8e80941Smrg sscreen->b.set_max_shader_compiler_threads = 924b8e80941Smrg si_set_max_shader_compiler_threads; 925b8e80941Smrg sscreen->b.is_parallel_shader_compilation_finished = 926b8e80941Smrg si_is_parallel_shader_compilation_finished; 927b8e80941Smrg 928b8e80941Smrg si_init_screen_get_functions(sscreen); 929b8e80941Smrg si_init_screen_buffer_functions(sscreen); 930b8e80941Smrg si_init_screen_fence_functions(sscreen); 931b8e80941Smrg si_init_screen_state_functions(sscreen); 932b8e80941Smrg si_init_screen_texture_functions(sscreen); 933b8e80941Smrg si_init_screen_query_functions(sscreen); 934b8e80941Smrg 935b8e80941Smrg /* Set these flags in debug_flags early, so that the shader cache takes 936b8e80941Smrg * them into account. 937b8e80941Smrg */ 938b8e80941Smrg if (driQueryOptionb(config->options, 939b8e80941Smrg "glsl_correct_derivatives_after_discard")) 940b8e80941Smrg sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); 941b8e80941Smrg if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) 942b8e80941Smrg sscreen->debug_flags |= DBG(SI_SCHED); 943b8e80941Smrg 944b8e80941Smrg if (sscreen->debug_flags & DBG(INFO)) 945b8e80941Smrg ac_print_gpu_info(&sscreen->info); 946b8e80941Smrg 947b8e80941Smrg slab_create_parent(&sscreen->pool_transfers, 948b8e80941Smrg sizeof(struct si_transfer), 64); 949b8e80941Smrg 950b8e80941Smrg sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); 951b8e80941Smrg if (sscreen->force_aniso >= 0) { 952b8e80941Smrg printf("radeonsi: Forcing anisotropy filter to %ix\n", 953b8e80941Smrg /* round down to a power of two */ 954b8e80941Smrg 1 << util_logbase2(sscreen->force_aniso)); 955b8e80941Smrg } 956b8e80941Smrg 957b8e80941Smrg (void) mtx_init(&sscreen->aux_context_lock, mtx_plain); 958b8e80941Smrg (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain); 959b8e80941Smrg 960b8e80941Smrg si_init_gs_info(sscreen); 961b8e80941Smrg if (!si_init_shader_cache(sscreen)) { 962848b8605Smrg FREE(sscreen); 963848b8605Smrg return NULL; 964848b8605Smrg } 965848b8605Smrg 966b8e80941Smrg si_disk_cache_create(sscreen); 967b8e80941Smrg 968b8e80941Smrg /* Determine the number of shader compiler threads. */ 969b8e80941Smrg hw_threads = sysconf(_SC_NPROCESSORS_ONLN); 970b8e80941Smrg 971b8e80941Smrg if (hw_threads >= 12) { 972b8e80941Smrg num_comp_hi_threads = hw_threads * 3 / 4; 973b8e80941Smrg num_comp_lo_threads = hw_threads / 3; 974b8e80941Smrg } else if (hw_threads >= 6) { 975b8e80941Smrg num_comp_hi_threads = hw_threads - 2; 976b8e80941Smrg num_comp_lo_threads = hw_threads / 2; 977b8e80941Smrg } else if (hw_threads >= 2) { 978b8e80941Smrg num_comp_hi_threads = hw_threads - 1; 979b8e80941Smrg num_comp_lo_threads = hw_threads / 2; 980b8e80941Smrg } else { 981b8e80941Smrg num_comp_hi_threads = 1; 982b8e80941Smrg num_comp_lo_threads = 1; 983b8e80941Smrg } 984b8e80941Smrg 985b8e80941Smrg num_comp_hi_threads = MIN2(num_comp_hi_threads, 986b8e80941Smrg ARRAY_SIZE(sscreen->compiler)); 987b8e80941Smrg num_comp_lo_threads = MIN2(num_comp_lo_threads, 988b8e80941Smrg ARRAY_SIZE(sscreen->compiler_lowp)); 989b8e80941Smrg 990b8e80941Smrg if (!util_queue_init(&sscreen->shader_compiler_queue, "sh", 991b8e80941Smrg 64, num_comp_hi_threads, 992b8e80941Smrg UTIL_QUEUE_INIT_RESIZE_IF_FULL | 993b8e80941Smrg UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { 994b8e80941Smrg si_destroy_shader_cache(sscreen); 995b8e80941Smrg FREE(sscreen); 996b8e80941Smrg return NULL; 997b8e80941Smrg } 998b8e80941Smrg 999b8e80941Smrg if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, 1000b8e80941Smrg "shlo", 1001b8e80941Smrg 64, num_comp_lo_threads, 1002b8e80941Smrg UTIL_QUEUE_INIT_RESIZE_IF_FULL | 1003b8e80941Smrg UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | 1004b8e80941Smrg UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { 1005b8e80941Smrg si_destroy_shader_cache(sscreen); 1006b8e80941Smrg FREE(sscreen); 1007b8e80941Smrg return NULL; 1008b8e80941Smrg } 1009b8e80941Smrg 1010b8e80941Smrg if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) 1011b8e80941Smrg si_init_perfcounters(sscreen); 1012b8e80941Smrg 1013b8e80941Smrg /* Determine tessellation ring info. */ 1014b8e80941Smrg bool double_offchip_buffers = sscreen->info.chip_class >= CIK && 1015b8e80941Smrg sscreen->info.family != CHIP_CARRIZO && 1016b8e80941Smrg sscreen->info.family != CHIP_STONEY; 1017b8e80941Smrg /* This must be one less than the maximum number due to a hw limitation. 1018b8e80941Smrg * Various hardware bugs in SI, CIK, and GFX9 need this. 1019b8e80941Smrg */ 1020b8e80941Smrg unsigned max_offchip_buffers_per_se; 1021b8e80941Smrg 1022b8e80941Smrg /* Only certain chips can use the maximum value. */ 1023b8e80941Smrg if (sscreen->info.family == CHIP_VEGA12 || 1024b8e80941Smrg sscreen->info.family == CHIP_VEGA20) 1025b8e80941Smrg max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; 1026b8e80941Smrg else 1027b8e80941Smrg max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; 1028b8e80941Smrg 1029b8e80941Smrg unsigned max_offchip_buffers = max_offchip_buffers_per_se * 1030b8e80941Smrg sscreen->info.max_se; 1031b8e80941Smrg unsigned offchip_granularity; 1032b8e80941Smrg 1033b8e80941Smrg /* Hawaii has a bug with offchip buffers > 256 that can be worked 1034b8e80941Smrg * around by setting 4K granularity. 1035b8e80941Smrg */ 1036b8e80941Smrg if (sscreen->info.family == CHIP_HAWAII) { 1037b8e80941Smrg sscreen->tess_offchip_block_dw_size = 4096; 1038b8e80941Smrg offchip_granularity = V_03093C_X_4K_DWORDS; 1039b8e80941Smrg } else { 1040b8e80941Smrg sscreen->tess_offchip_block_dw_size = 8192; 1041b8e80941Smrg offchip_granularity = V_03093C_X_8K_DWORDS; 1042b8e80941Smrg } 1043b8e80941Smrg 1044b8e80941Smrg sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; 1045b8e80941Smrg assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0); 1046b8e80941Smrg sscreen->tess_offchip_ring_size = max_offchip_buffers * 1047b8e80941Smrg sscreen->tess_offchip_block_dw_size * 4; 1048b8e80941Smrg 1049b8e80941Smrg if (sscreen->info.chip_class >= CIK) { 1050b8e80941Smrg if (sscreen->info.chip_class >= VI) 1051b8e80941Smrg --max_offchip_buffers; 1052b8e80941Smrg sscreen->vgt_hs_offchip_param = 1053b8e80941Smrg S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | 1054b8e80941Smrg S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); 1055b8e80941Smrg } else { 1056b8e80941Smrg assert(offchip_granularity == V_03093C_X_8K_DWORDS); 1057b8e80941Smrg sscreen->vgt_hs_offchip_param = 1058b8e80941Smrg S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); 1059b8e80941Smrg } 1060b8e80941Smrg 1061b8e80941Smrg /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs 1062b8e80941Smrg * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. 1063b8e80941Smrg * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ 1064b8e80941Smrg sscreen->has_clear_state = sscreen->info.chip_class >= CIK && 1065b8e80941Smrg sscreen->info.drm_major == 3; 1066b8e80941Smrg 1067b8e80941Smrg sscreen->has_distributed_tess = 1068b8e80941Smrg sscreen->info.chip_class >= VI && 1069b8e80941Smrg sscreen->info.max_se >= 2; 1070b8e80941Smrg 1071b8e80941Smrg sscreen->has_draw_indirect_multi = 1072b8e80941Smrg (sscreen->info.family >= CHIP_POLARIS10) || 1073b8e80941Smrg (sscreen->info.chip_class == VI && 1074b8e80941Smrg sscreen->info.pfp_fw_version >= 121 && 1075b8e80941Smrg sscreen->info.me_fw_version >= 87) || 1076b8e80941Smrg (sscreen->info.chip_class == CIK && 1077b8e80941Smrg sscreen->info.pfp_fw_version >= 211 && 1078b8e80941Smrg sscreen->info.me_fw_version >= 173) || 1079b8e80941Smrg (sscreen->info.chip_class == SI && 1080b8e80941Smrg sscreen->info.pfp_fw_version >= 79 && 1081b8e80941Smrg sscreen->info.me_fw_version >= 142); 1082b8e80941Smrg 1083b8e80941Smrg sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && 1084b8e80941Smrg sscreen->info.max_se >= 2 && 1085b8e80941Smrg !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); 1086b8e80941Smrg sscreen->assume_no_z_fights = 1087b8e80941Smrg driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); 1088b8e80941Smrg sscreen->commutative_blend_add = 1089b8e80941Smrg driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); 1090b8e80941Smrg 1091b8e80941Smrg { 1092b8e80941Smrg#define OPT_BOOL(name, dflt, description) \ 1093b8e80941Smrg sscreen->options.name = \ 1094b8e80941Smrg driQueryOptionb(config->options, "radeonsi_"#name); 1095b8e80941Smrg#include "si_debug_options.h" 1096b8e80941Smrg } 1097b8e80941Smrg 1098b8e80941Smrg sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 || 1099b8e80941Smrg sscreen->info.family == CHIP_RAVEN; 1100b8e80941Smrg sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && 1101b8e80941Smrg sscreen->info.family <= CHIP_POLARIS12) || 1102b8e80941Smrg sscreen->info.family == CHIP_VEGA10 || 1103b8e80941Smrg sscreen->info.family == CHIP_RAVEN; 1104b8e80941Smrg sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || 1105b8e80941Smrg sscreen->info.family == CHIP_RAVEN; 1106b8e80941Smrg sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; 1107b8e80941Smrg 1108b8e80941Smrg /* Only enable primitive binning on APUs by default. */ 1109b8e80941Smrg sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || 1110b8e80941Smrg sscreen->info.family == CHIP_RAVEN2; 1111b8e80941Smrg 1112b8e80941Smrg sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || 1113b8e80941Smrg sscreen->info.family == CHIP_RAVEN2; 1114b8e80941Smrg 1115b8e80941Smrg /* Process DPBB enable flags. */ 1116b8e80941Smrg if (sscreen->debug_flags & DBG(DPBB)) { 1117b8e80941Smrg sscreen->dpbb_allowed = true; 1118b8e80941Smrg if (sscreen->debug_flags & DBG(DFSM)) 1119b8e80941Smrg sscreen->dfsm_allowed = true; 1120b8e80941Smrg } 1121b8e80941Smrg 1122b8e80941Smrg /* Process DPBB disable flags. */ 1123b8e80941Smrg if (sscreen->debug_flags & DBG(NO_DPBB)) { 1124b8e80941Smrg sscreen->dpbb_allowed = false; 1125b8e80941Smrg sscreen->dfsm_allowed = false; 1126b8e80941Smrg } else if (sscreen->debug_flags & DBG(NO_DFSM)) { 1127b8e80941Smrg sscreen->dfsm_allowed = false; 1128b8e80941Smrg } 1129b8e80941Smrg 1130b8e80941Smrg /* While it would be nice not to have this flag, we are constrained 1131b8e80941Smrg * by the reality that LLVM 5.0 doesn't have working VGPR indexing 1132b8e80941Smrg * on GFX9. 1133b8e80941Smrg */ 1134b8e80941Smrg sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; 1135848b8605Smrg 1136b8e80941Smrg /* Some chips have RB+ registers, but don't support RB+. Those must 1137b8e80941Smrg * always disable it. 1138b8e80941Smrg */ 1139b8e80941Smrg if (sscreen->info.family == CHIP_STONEY || 1140b8e80941Smrg sscreen->info.chip_class >= GFX9) { 1141b8e80941Smrg sscreen->has_rbplus = true; 1142b8e80941Smrg 1143b8e80941Smrg sscreen->rbplus_allowed = 1144b8e80941Smrg !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && 1145b8e80941Smrg (sscreen->info.family == CHIP_STONEY || 1146b8e80941Smrg sscreen->info.family == CHIP_VEGA12 || 1147b8e80941Smrg sscreen->info.family == CHIP_RAVEN || 1148b8e80941Smrg sscreen->info.family == CHIP_RAVEN2); 1149b8e80941Smrg } 1150b8e80941Smrg 1151b8e80941Smrg sscreen->dcc_msaa_allowed = 1152b8e80941Smrg !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); 1153b8e80941Smrg 1154b8e80941Smrg sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; 1155b8e80941Smrg 1156b8e80941Smrg (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); 1157b8e80941Smrg sscreen->use_monolithic_shaders = 1158b8e80941Smrg (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; 1159b8e80941Smrg 1160b8e80941Smrg sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | 1161b8e80941Smrg SI_CONTEXT_INV_VMEM_L1; 1162b8e80941Smrg if (sscreen->info.chip_class <= VI) { 1163b8e80941Smrg sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; 1164b8e80941Smrg sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; 1165b8e80941Smrg } 1166b8e80941Smrg 1167b8e80941Smrg if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) 1168b8e80941Smrg sscreen->debug_flags |= DBG_ALL_SHADERS; 1169b8e80941Smrg 1170b8e80941Smrg /* Syntax: 1171b8e80941Smrg * EQAA=s,z,c 1172b8e80941Smrg * Example: 1173b8e80941Smrg * EQAA=8,4,2 1174b8e80941Smrg 1175b8e80941Smrg * That means 8 coverage samples, 4 Z/S samples, and 2 color samples. 1176b8e80941Smrg * Constraints: 1177b8e80941Smrg * s >= z >= c (ignoring this only wastes memory) 1178b8e80941Smrg * s = [2..16] 1179b8e80941Smrg * z = [2..8] 1180b8e80941Smrg * c = [2..8] 1181b8e80941Smrg * 1182b8e80941Smrg * Only MSAA color and depth buffers are overriden. 1183b8e80941Smrg */ 1184b8e80941Smrg if (sscreen->info.has_eqaa_surface_allocator) { 1185b8e80941Smrg const char *eqaa = debug_get_option("EQAA", NULL); 1186b8e80941Smrg unsigned s,z,f; 1187b8e80941Smrg 1188b8e80941Smrg if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) { 1189b8e80941Smrg sscreen->eqaa_force_coverage_samples = s; 1190b8e80941Smrg sscreen->eqaa_force_z_samples = z; 1191b8e80941Smrg sscreen->eqaa_force_color_samples = f; 1192b8e80941Smrg } 1193b8e80941Smrg } 1194b8e80941Smrg 1195b8e80941Smrg for (i = 0; i < num_comp_hi_threads; i++) 1196b8e80941Smrg si_init_compiler(sscreen, &sscreen->compiler[i]); 1197b8e80941Smrg for (i = 0; i < num_comp_lo_threads; i++) 1198b8e80941Smrg si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); 1199848b8605Smrg 1200848b8605Smrg /* Create the auxiliary context. This must be done last. */ 1201b8e80941Smrg sscreen->aux_context = si_create_context( 1202b8e80941Smrg &sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0); 1203b8e80941Smrg if (sscreen->options.aux_debug) { 1204b8e80941Smrg struct u_log_context *log = CALLOC_STRUCT(u_log_context); 1205b8e80941Smrg u_log_context_init(log); 1206b8e80941Smrg sscreen->aux_context->set_log_context(sscreen->aux_context, log); 1207b8e80941Smrg } 1208b8e80941Smrg 1209b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_DMA)) 1210b8e80941Smrg si_test_dma(sscreen); 1211b8e80941Smrg 1212b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) { 1213b8e80941Smrg si_test_dma_perf(sscreen); 1214b8e80941Smrg } 1215b8e80941Smrg 1216b8e80941Smrg if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | 1217b8e80941Smrg DBG(TEST_VMFAULT_SDMA) | 1218b8e80941Smrg DBG(TEST_VMFAULT_SHADER))) 1219b8e80941Smrg si_test_vmfault(sscreen); 1220b8e80941Smrg 1221b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_GDS)) 1222b8e80941Smrg si_test_gds((struct si_context*)sscreen->aux_context); 1223b8e80941Smrg 1224b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { 1225b8e80941Smrg si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 1226b8e80941Smrg 32 * 1024, 4, RADEON_DOMAIN_GDS); 1227b8e80941Smrg } 1228b8e80941Smrg if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { 1229b8e80941Smrg si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 1230b8e80941Smrg 4, 1, RADEON_DOMAIN_OA); 1231b8e80941Smrg } 1232848b8605Smrg 1233b8e80941Smrg return &sscreen->b; 1234848b8605Smrg} 1235