17ec681f3Smrg/* 27ec681f3Smrg * Copyright 2020 Advanced Micro Devices, Inc. 37ec681f3Smrg * Copyright 2020 Valve Corporation 47ec681f3Smrg * All Rights Reserved. 57ec681f3Smrg * 67ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 77ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 87ec681f3Smrg * to deal in the Software without restriction, including without limitation 97ec681f3Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 107ec681f3Smrg * license, and/or sell copies of the Software, and to permit persons to whom 117ec681f3Smrg * the Software is furnished to do so, subject to the following conditions: 127ec681f3Smrg * 137ec681f3Smrg * The above copyright notice and this permission notice (including the next 147ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 157ec681f3Smrg * Software. 167ec681f3Smrg * 177ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 187ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 197ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 207ec681f3Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 217ec681f3Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 227ec681f3Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 237ec681f3Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 247ec681f3Smrg */ 257ec681f3Smrg 267ec681f3Smrg#include "ac_sqtt.h" 277ec681f3Smrg 287ec681f3Smrg#include "ac_gpu_info.h" 297ec681f3Smrg#include "util/u_math.h" 307ec681f3Smrg#include "util/os_time.h" 317ec681f3Smrg 327ec681f3Smrguint64_t 337ec681f3Smrgac_thread_trace_get_info_offset(unsigned se) 347ec681f3Smrg{ 357ec681f3Smrg return sizeof(struct ac_thread_trace_info) * se; 367ec681f3Smrg} 377ec681f3Smrg 387ec681f3Smrguint64_t 397ec681f3Smrgac_thread_trace_get_data_offset(const struct radeon_info *rad_info, 407ec681f3Smrg const struct ac_thread_trace_data *data, unsigned se) 417ec681f3Smrg{ 427ec681f3Smrg unsigned max_se = rad_info->max_se; 437ec681f3Smrg uint64_t data_offset; 447ec681f3Smrg 457ec681f3Smrg data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se, 467ec681f3Smrg 1 << SQTT_BUFFER_ALIGN_SHIFT); 477ec681f3Smrg data_offset += data->buffer_size * se; 487ec681f3Smrg 497ec681f3Smrg return data_offset; 507ec681f3Smrg} 517ec681f3Smrg 527ec681f3Smrguint64_t 537ec681f3Smrgac_thread_trace_get_info_va(uint64_t va, unsigned se) 547ec681f3Smrg{ 557ec681f3Smrg return va + ac_thread_trace_get_info_offset(se); 567ec681f3Smrg} 577ec681f3Smrg 587ec681f3Smrguint64_t 597ec681f3Smrgac_thread_trace_get_data_va(const struct radeon_info *rad_info, 607ec681f3Smrg const struct ac_thread_trace_data *data, uint64_t va, unsigned se) 617ec681f3Smrg{ 627ec681f3Smrg return va + ac_thread_trace_get_data_offset(rad_info, data, se); 637ec681f3Smrg} 647ec681f3Smrg 657ec681f3Smrgbool 667ec681f3Smrgac_is_thread_trace_complete(struct radeon_info *rad_info, 677ec681f3Smrg const struct ac_thread_trace_data *data, 687ec681f3Smrg const struct ac_thread_trace_info *info) 697ec681f3Smrg{ 707ec681f3Smrg if (rad_info->chip_class >= GFX10) { 717ec681f3Smrg /* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the number of 727ec681f3Smrg * dropped bytes per SE via THREAD_TRACE_DROPPED_CNTR. Though, this 737ec681f3Smrg * doesn't seem reliable because it might still report non-zero even if 747ec681f3Smrg * the SQTT buffer isn't full. 757ec681f3Smrg * 767ec681f3Smrg * The solution here is to compare the number of bytes written by the hw 777ec681f3Smrg * (in units of 32 bytes) to the SQTT buffer size. If it's equal, that 787ec681f3Smrg * means that the buffer is full and should be resized. 797ec681f3Smrg */ 807ec681f3Smrg return !(info->cur_offset * 32 == data->buffer_size - 32); 817ec681f3Smrg } 827ec681f3Smrg 837ec681f3Smrg /* Otherwise, compare the current thread trace offset with the number 847ec681f3Smrg * of written bytes. 857ec681f3Smrg */ 867ec681f3Smrg return info->cur_offset == info->gfx9_write_counter; 877ec681f3Smrg} 887ec681f3Smrg 897ec681f3Smrguint32_t 907ec681f3Smrgac_get_expected_buffer_size(struct radeon_info *rad_info, 917ec681f3Smrg const struct ac_thread_trace_info *info) 927ec681f3Smrg{ 937ec681f3Smrg if (rad_info->chip_class >= GFX10) { 947ec681f3Smrg uint32_t dropped_cntr_per_se = info->gfx10_dropped_cntr / rad_info->max_se; 957ec681f3Smrg return ((info->cur_offset * 32) + dropped_cntr_per_se) / 1024; 967ec681f3Smrg } 977ec681f3Smrg 987ec681f3Smrg return (info->gfx9_write_counter * 32) / 1024; 997ec681f3Smrg} 1007ec681f3Smrg 1017ec681f3Smrgbool 1027ec681f3Smrgac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data, 1037ec681f3Smrg uint64_t pipeline_hash) 1047ec681f3Smrg{ 1057ec681f3Smrg struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation; 1067ec681f3Smrg struct rgp_pso_correlation_record *record; 1077ec681f3Smrg 1087ec681f3Smrg record = malloc(sizeof(struct rgp_pso_correlation_record)); 1097ec681f3Smrg if (!record) 1107ec681f3Smrg return false; 1117ec681f3Smrg 1127ec681f3Smrg record->api_pso_hash = pipeline_hash; 1137ec681f3Smrg record->pipeline_hash[0] = pipeline_hash; 1147ec681f3Smrg record->pipeline_hash[1] = pipeline_hash; 1157ec681f3Smrg memset(record->api_level_obj_name, 0, sizeof(record->api_level_obj_name)); 1167ec681f3Smrg 1177ec681f3Smrg simple_mtx_lock(&pso_correlation->lock); 1187ec681f3Smrg list_addtail(&record->list, &pso_correlation->record); 1197ec681f3Smrg pso_correlation->record_count++; 1207ec681f3Smrg simple_mtx_unlock(&pso_correlation->lock); 1217ec681f3Smrg 1227ec681f3Smrg return true; 1237ec681f3Smrg} 1247ec681f3Smrg 1257ec681f3Smrgbool 1267ec681f3Smrgac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data, 1277ec681f3Smrg uint64_t pipeline_hash, 1287ec681f3Smrg uint64_t base_address) 1297ec681f3Smrg{ 1307ec681f3Smrg struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events; 1317ec681f3Smrg struct rgp_loader_events_record *record; 1327ec681f3Smrg 1337ec681f3Smrg record = malloc(sizeof(struct rgp_loader_events_record)); 1347ec681f3Smrg if (!record) 1357ec681f3Smrg return false; 1367ec681f3Smrg 1377ec681f3Smrg record->loader_event_type = RGP_LOAD_TO_GPU_MEMORY; 1387ec681f3Smrg record->reserved = 0; 1397ec681f3Smrg record->base_address = base_address & 0xffffffffffff; 1407ec681f3Smrg record->code_object_hash[0] = pipeline_hash; 1417ec681f3Smrg record->code_object_hash[1] = pipeline_hash; 1427ec681f3Smrg record->time_stamp = os_time_get_nano(); 1437ec681f3Smrg 1447ec681f3Smrg simple_mtx_lock(&loader_events->lock); 1457ec681f3Smrg list_addtail(&record->list, &loader_events->record); 1467ec681f3Smrg loader_events->record_count++; 1477ec681f3Smrg simple_mtx_unlock(&loader_events->lock); 1487ec681f3Smrg 1497ec681f3Smrg return true; 1507ec681f3Smrg} 151