freedreno_autotune.c revision 7ec681f3
1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "freedreno_autotune.h" 25#include "freedreno_batch.h" 26#include "freedreno_util.h" 27 28/** 29 * Tracks, for a given batch key (which maps to a FBO/framebuffer state), 30 * 31 * ralloc parent is fd_autotune::ht 32 */ 33struct fd_batch_history { 34 struct fd_batch_key *key; 35 36 /* Entry in fd_autotune::lru: */ 37 struct list_head node; 38 39 unsigned num_results; 40 41 /** 42 * List of recent fd_batch_result's 43 */ 44 struct list_head results; 45#define MAX_RESULTS 5 46}; 47 48static struct fd_batch_history * 49get_history(struct fd_autotune *at, struct fd_batch *batch) 50{ 51 struct fd_batch_history *history; 52 53 /* draw batches should still have their key at this point. */ 54 assert(batch->key || batch->nondraw); 55 if (!batch->key) 56 return NULL; 57 58 struct hash_entry *entry = 59 _mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key); 60 61 if (entry) { 62 history = entry->data; 63 goto found; 64 } 65 66 history = rzalloc_size(at->ht, sizeof(*history)); 67 68 history->key = fd_batch_key_clone(history, batch->key); 69 list_inithead(&history->node); 70 list_inithead(&history->results); 71 72 /* Note: We cap # of cached GMEM states at 20.. so assuming double- 73 * buffering, 40 should be a good place to cap cached autotune state 74 */ 75 if (at->ht->entries >= 40) { 76 struct fd_batch_history *last = 77 list_last_entry(&at->lru, struct fd_batch_history, node); 78 _mesa_hash_table_remove_key(at->ht, last->key); 79 list_del(&last->node); 80 ralloc_free(last); 81 } 82 83 _mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key, 84 history); 85 86found: 87 /* Move to the head of the LRU: */ 88 list_delinit(&history->node); 89 list_add(&history->node, &at->lru); 90 91 return history; 92} 93 94static void 95result_destructor(void *r) 96{ 97 struct fd_batch_result *result = r; 98 99 /* Just in case we manage to somehow still be on the pending_results list: */ 100 list_del(&result->node); 101} 102 103static struct fd_batch_result * 104get_result(struct fd_autotune *at, struct fd_batch_history *history) 105{ 106 struct fd_batch_result *result = rzalloc_size(history, sizeof(*result)); 107 108 result->fence = 109 ++at->fence_counter; /* pre-increment so zero isn't valid fence */ 110 result->idx = at->idx_counter++; 111 112 if (at->idx_counter >= ARRAY_SIZE(at->results->result)) 113 at->idx_counter = 0; 114 115 result->history = history; 116 list_addtail(&result->node, &at->pending_results); 117 118 ralloc_set_destructor(result, result_destructor); 119 120 return result; 121} 122 123static void 124process_results(struct fd_autotune *at) 125{ 126 uint32_t current_fence = at->results->fence; 127 128 list_for_each_entry_safe (struct fd_batch_result, result, 129 &at->pending_results, node) { 130 if (result->fence > current_fence) 131 break; 132 133 struct fd_batch_history *history = result->history; 134 135 result->samples_passed = at->results->result[result->idx].samples_end - 136 at->results->result[result->idx].samples_start; 137 138 list_delinit(&result->node); 139 list_add(&result->node, &history->results); 140 141 if (history->num_results < MAX_RESULTS) { 142 history->num_results++; 143 } else { 144 /* Once above a limit, start popping old results off the 145 * tail of the list: 146 */ 147 struct fd_batch_result *old_result = 148 list_last_entry(&history->results, struct fd_batch_result, node); 149 list_delinit(&old_result->node); 150 ralloc_free(old_result); 151 } 152 } 153} 154 155static bool 156fallback_use_bypass(struct fd_batch *batch) 157{ 158 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 159 160 /* Fallback logic if we have no historical data about the rendertarget: */ 161 if (batch->cleared || batch->gmem_reason || 162 (batch->num_draws > 5) || (pfb->samples > 1)) { 163 return false; 164 } 165 166 return true; 167} 168 169/** 170 * A magic 8-ball that tells the gmem code whether we should do bypass mode 171 * for moar fps. 172 */ 173bool 174fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch) 175{ 176 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 177 178 process_results(at); 179 180 /* Only enable on gen's that opt-in (and actually have sample-passed 181 * collection wired up: 182 */ 183 if (!batch->ctx->screen->gmem_reason_mask) 184 return fallback_use_bypass(batch); 185 186 if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask) 187 return fallback_use_bypass(batch); 188 189 for (unsigned i = 0; i < pfb->nr_cbufs; i++) { 190 /* If ms-rtt is involved, force GMEM, as we don't currently 191 * implement a temporary render target that we can MSAA resolve 192 * from 193 */ 194 if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples) 195 return fallback_use_bypass(batch); 196 } 197 198 struct fd_batch_history *history = get_history(at, batch); 199 if (!history) 200 return fallback_use_bypass(batch); 201 202 batch->autotune_result = get_result(at, history); 203 batch->autotune_result->cost = batch->cost; 204 205 bool use_bypass = fallback_use_bypass(batch); 206 207 if (use_bypass) 208 return true; 209 210 if (history->num_results > 0) { 211 uint32_t total_samples = 0; 212 213 // TODO we should account for clears somehow 214 // TODO should we try to notice if there is a drastic change from 215 // frame to frame? 216 list_for_each_entry (struct fd_batch_result, result, &history->results, 217 node) { 218 total_samples += result->samples_passed; 219 } 220 221 float avg_samples = (float)total_samples / (float)history->num_results; 222 223 /* Low sample count could mean there was only a clear.. or there was 224 * a clear plus draws that touch no or few samples 225 */ 226 if (avg_samples < 500.0) 227 return true; 228 229 /* Cost-per-sample is an estimate for the average number of reads+ 230 * writes for a given passed sample. 231 */ 232 float sample_cost = batch->cost; 233 sample_cost /= batch->num_draws; 234 235 float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws; 236 DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, " 237 "total_draw_cost=%f\n", 238 batch->hash, batch->num_draws, total_samples, avg_samples, 239 sample_cost, total_draw_cost); 240 241 if (total_draw_cost < 3000.0) 242 return true; 243 } 244 245 return use_bypass; 246} 247 248void 249fd_autotune_init(struct fd_autotune *at, struct fd_device *dev) 250{ 251 at->ht = 252 _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals); 253 list_inithead(&at->lru); 254 255 at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results), 256 0, "autotune"); 257 at->results = fd_bo_map(at->results_mem); 258 259 list_inithead(&at->pending_results); 260} 261 262void 263fd_autotune_fini(struct fd_autotune *at) 264{ 265 _mesa_hash_table_destroy(at->ht, NULL); 266 fd_bo_del(at->results_mem); 267} 268