17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2021 Google, Inc. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg#include "freedreno_autotune.h" 257ec681f3Smrg#include "freedreno_batch.h" 267ec681f3Smrg#include "freedreno_util.h" 277ec681f3Smrg 287ec681f3Smrg/** 297ec681f3Smrg * Tracks, for a given batch key (which maps to a FBO/framebuffer state), 307ec681f3Smrg * 317ec681f3Smrg * ralloc parent is fd_autotune::ht 327ec681f3Smrg */ 337ec681f3Smrgstruct fd_batch_history { 347ec681f3Smrg struct fd_batch_key *key; 357ec681f3Smrg 367ec681f3Smrg /* Entry in fd_autotune::lru: */ 377ec681f3Smrg struct list_head node; 387ec681f3Smrg 397ec681f3Smrg unsigned num_results; 407ec681f3Smrg 417ec681f3Smrg /** 427ec681f3Smrg * List of recent fd_batch_result's 437ec681f3Smrg */ 447ec681f3Smrg struct list_head results; 457ec681f3Smrg#define MAX_RESULTS 5 467ec681f3Smrg}; 477ec681f3Smrg 487ec681f3Smrgstatic struct fd_batch_history * 497ec681f3Smrgget_history(struct fd_autotune *at, struct fd_batch *batch) 507ec681f3Smrg{ 517ec681f3Smrg struct fd_batch_history *history; 527ec681f3Smrg 537ec681f3Smrg /* draw batches should still have their key at this point. */ 547ec681f3Smrg assert(batch->key || batch->nondraw); 557ec681f3Smrg if (!batch->key) 567ec681f3Smrg return NULL; 577ec681f3Smrg 587ec681f3Smrg struct hash_entry *entry = 597ec681f3Smrg _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key); 607ec681f3Smrg 617ec681f3Smrg if (entry) { 627ec681f3Smrg history = entry->data; 637ec681f3Smrg goto found; 647ec681f3Smrg } 657ec681f3Smrg 667ec681f3Smrg history = rzalloc_size(at->ht, sizeof(*history)); 677ec681f3Smrg 687ec681f3Smrg history->key = fd_batch_key_clone(history, batch->key); 697ec681f3Smrg list_inithead(&history->node); 707ec681f3Smrg list_inithead(&history->results); 717ec681f3Smrg 727ec681f3Smrg /* Note: We cap # of cached GMEM states at 20.. so assuming double- 737ec681f3Smrg * buffering, 40 should be a good place to cap cached autotune state 747ec681f3Smrg */ 757ec681f3Smrg if (at->ht->entries >= 40) { 767ec681f3Smrg struct fd_batch_history *last = 777ec681f3Smrg list_last_entry(&at->lru, struct fd_batch_history, node); 787ec681f3Smrg _mesa_hash_table_remove_key(at->ht, last->key); 797ec681f3Smrg list_del(&last->node); 807ec681f3Smrg ralloc_free(last); 817ec681f3Smrg } 827ec681f3Smrg 837ec681f3Smrg _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key, 847ec681f3Smrg history); 857ec681f3Smrg 867ec681f3Smrgfound: 877ec681f3Smrg /* Move to the head of the LRU: */ 887ec681f3Smrg list_delinit(&history->node); 897ec681f3Smrg list_add(&history->node, &at->lru); 907ec681f3Smrg 917ec681f3Smrg return history; 927ec681f3Smrg} 937ec681f3Smrg 947ec681f3Smrgstatic void 957ec681f3Smrgresult_destructor(void *r) 967ec681f3Smrg{ 977ec681f3Smrg struct fd_batch_result *result = r; 987ec681f3Smrg 997ec681f3Smrg /* Just in case we manage to somehow still be on the pending_results list: */ 1007ec681f3Smrg list_del(&result->node); 1017ec681f3Smrg} 1027ec681f3Smrg 1037ec681f3Smrgstatic struct fd_batch_result * 1047ec681f3Smrgget_result(struct fd_autotune *at, struct fd_batch_history *history) 1057ec681f3Smrg{ 1067ec681f3Smrg struct fd_batch_result *result = rzalloc_size(history, sizeof(*result)); 1077ec681f3Smrg 1087ec681f3Smrg result->fence = 1097ec681f3Smrg ++at->fence_counter; /* pre-increment so zero isn't valid fence */ 1107ec681f3Smrg result->idx = at->idx_counter++; 1117ec681f3Smrg 1127ec681f3Smrg if (at->idx_counter >= ARRAY_SIZE(at->results->result)) 1137ec681f3Smrg at->idx_counter = 0; 1147ec681f3Smrg 1157ec681f3Smrg result->history = history; 1167ec681f3Smrg list_addtail(&result->node, &at->pending_results); 1177ec681f3Smrg 1187ec681f3Smrg ralloc_set_destructor(result, result_destructor); 1197ec681f3Smrg 1207ec681f3Smrg return result; 1217ec681f3Smrg} 1227ec681f3Smrg 1237ec681f3Smrgstatic void 1247ec681f3Smrgprocess_results(struct fd_autotune *at) 1257ec681f3Smrg{ 1267ec681f3Smrg uint32_t current_fence = at->results->fence; 1277ec681f3Smrg 1287ec681f3Smrg list_for_each_entry_safe (struct fd_batch_result, result, 1297ec681f3Smrg &at->pending_results, node) { 1307ec681f3Smrg if (result->fence > current_fence) 1317ec681f3Smrg break; 1327ec681f3Smrg 1337ec681f3Smrg struct fd_batch_history *history = result->history; 1347ec681f3Smrg 1357ec681f3Smrg result->samples_passed = at->results->result[result->idx].samples_end - 1367ec681f3Smrg at->results->result[result->idx].samples_start; 1377ec681f3Smrg 1387ec681f3Smrg list_delinit(&result->node); 1397ec681f3Smrg list_add(&result->node, &history->results); 1407ec681f3Smrg 1417ec681f3Smrg if (history->num_results < MAX_RESULTS) { 1427ec681f3Smrg history->num_results++; 1437ec681f3Smrg } else { 1447ec681f3Smrg /* Once above a limit, start popping old results off the 1457ec681f3Smrg * tail of the list: 1467ec681f3Smrg */ 1477ec681f3Smrg struct fd_batch_result *old_result = 1487ec681f3Smrg list_last_entry(&history->results, struct fd_batch_result, node); 1497ec681f3Smrg list_delinit(&old_result->node); 1507ec681f3Smrg ralloc_free(old_result); 1517ec681f3Smrg } 1527ec681f3Smrg } 1537ec681f3Smrg} 1547ec681f3Smrg 1557ec681f3Smrgstatic bool 1567ec681f3Smrgfallback_use_bypass(struct fd_batch *batch) 1577ec681f3Smrg{ 1587ec681f3Smrg struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1597ec681f3Smrg 1607ec681f3Smrg /* Fallback logic if we have no historical data about the rendertarget: */ 1617ec681f3Smrg if (batch->cleared || batch->gmem_reason || 1627ec681f3Smrg (batch->num_draws > 5) || (pfb->samples > 1)) { 1637ec681f3Smrg return false; 1647ec681f3Smrg } 1657ec681f3Smrg 1667ec681f3Smrg return true; 1677ec681f3Smrg} 1687ec681f3Smrg 1697ec681f3Smrg/** 1707ec681f3Smrg * A magic 8-ball that tells the gmem code whether we should do bypass mode 1717ec681f3Smrg * for moar fps. 1727ec681f3Smrg */ 1737ec681f3Smrgbool 1747ec681f3Smrgfd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch) 1757ec681f3Smrg{ 1767ec681f3Smrg struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1777ec681f3Smrg 1787ec681f3Smrg process_results(at); 1797ec681f3Smrg 1807ec681f3Smrg /* Only enable on gen's that opt-in (and actually have sample-passed 1817ec681f3Smrg * collection wired up: 1827ec681f3Smrg */ 1837ec681f3Smrg if (!batch->ctx->screen->gmem_reason_mask) 1847ec681f3Smrg return fallback_use_bypass(batch); 1857ec681f3Smrg 1867ec681f3Smrg if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask) 1877ec681f3Smrg return fallback_use_bypass(batch); 1887ec681f3Smrg 1897ec681f3Smrg for (unsigned i = 0; i < pfb->nr_cbufs; i++) { 1907ec681f3Smrg /* If ms-rtt is involved, force GMEM, as we don't currently 1917ec681f3Smrg * implement a temporary render target that we can MSAA resolve 1927ec681f3Smrg * from 1937ec681f3Smrg */ 1947ec681f3Smrg if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples) 1957ec681f3Smrg return fallback_use_bypass(batch); 1967ec681f3Smrg } 1977ec681f3Smrg 1987ec681f3Smrg struct fd_batch_history *history = get_history(at, batch); 1997ec681f3Smrg if (!history) 2007ec681f3Smrg return fallback_use_bypass(batch); 2017ec681f3Smrg 2027ec681f3Smrg batch->autotune_result = get_result(at, history); 2037ec681f3Smrg batch->autotune_result->cost = batch->cost; 2047ec681f3Smrg 2057ec681f3Smrg bool use_bypass = fallback_use_bypass(batch); 2067ec681f3Smrg 2077ec681f3Smrg if (use_bypass) 2087ec681f3Smrg return true; 2097ec681f3Smrg 2107ec681f3Smrg if (history->num_results > 0) { 2117ec681f3Smrg uint32_t total_samples = 0; 2127ec681f3Smrg 2137ec681f3Smrg // TODO we should account for clears somehow 2147ec681f3Smrg // TODO should we try to notice if there is a drastic change from 2157ec681f3Smrg // frame to frame? 2167ec681f3Smrg list_for_each_entry (struct fd_batch_result, result, &history->results, 2177ec681f3Smrg node) { 2187ec681f3Smrg total_samples += result->samples_passed; 2197ec681f3Smrg } 2207ec681f3Smrg 2217ec681f3Smrg float avg_samples = (float)total_samples / (float)history->num_results; 2227ec681f3Smrg 2237ec681f3Smrg /* Low sample count could mean there was only a clear.. or there was 2247ec681f3Smrg * a clear plus draws that touch no or few samples 2257ec681f3Smrg */ 2267ec681f3Smrg if (avg_samples < 500.0) 2277ec681f3Smrg return true; 2287ec681f3Smrg 2297ec681f3Smrg /* Cost-per-sample is an estimate for the average number of reads+ 2307ec681f3Smrg * writes for a given passed sample. 2317ec681f3Smrg */ 2327ec681f3Smrg float sample_cost = batch->cost; 2337ec681f3Smrg sample_cost /= batch->num_draws; 2347ec681f3Smrg 2357ec681f3Smrg float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws; 2367ec681f3Smrg DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, " 2377ec681f3Smrg "total_draw_cost=%f\n", 2387ec681f3Smrg batch->hash, batch->num_draws, total_samples, avg_samples, 2397ec681f3Smrg sample_cost, total_draw_cost); 2407ec681f3Smrg 2417ec681f3Smrg if (total_draw_cost < 3000.0) 2427ec681f3Smrg return true; 2437ec681f3Smrg } 2447ec681f3Smrg 2457ec681f3Smrg return use_bypass; 2467ec681f3Smrg} 2477ec681f3Smrg 2487ec681f3Smrgvoid 2497ec681f3Smrgfd_autotune_init(struct fd_autotune *at, struct fd_device *dev) 2507ec681f3Smrg{ 2517ec681f3Smrg at->ht = 2527ec681f3Smrg _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals); 2537ec681f3Smrg list_inithead(&at->lru); 2547ec681f3Smrg 2557ec681f3Smrg at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results), 2567ec681f3Smrg 0, "autotune"); 2577ec681f3Smrg at->results = fd_bo_map(at->results_mem); 2587ec681f3Smrg 2597ec681f3Smrg list_inithead(&at->pending_results); 2607ec681f3Smrg} 2617ec681f3Smrg 2627ec681f3Smrgvoid 2637ec681f3Smrgfd_autotune_fini(struct fd_autotune *at) 2647ec681f3Smrg{ 2657ec681f3Smrg _mesa_hash_table_destroy(at->ht, NULL); 2667ec681f3Smrg fd_bo_del(at->results_mem); 2677ec681f3Smrg} 268