17ec681f3Smrg/* 27ec681f3Smrg * Copyright (c) 2020 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors (Collabora): 247ec681f3Smrg * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg/* Index buffer min/max cache. We need to calculate the min/max for arbitrary 287ec681f3Smrg * slices (start, start + count) of the index buffer at drawtime. As this can 297ec681f3Smrg * be quite expensive, we cache. Conceptually, we just use a hash table mapping 307ec681f3Smrg * the key (start, count) to the value (min, max). In practice, mesa's hash 317ec681f3Smrg * table implementation is higher overhead than we would like and makes 327ec681f3Smrg * handling memory usage a little complicated. So we use this data structure 337ec681f3Smrg * instead. Searching is O(n) to the size, but the size is capped at the 347ec681f3Smrg * PANFROST_MINMAX_SIZE constant (so this is a tradeoff between cache hit/miss 357ec681f3Smrg * ratio and cache search speed). Note that keys are adjacent so we get cache 367ec681f3Smrg * line alignment benefits. Insertion is O(1) and in-order until the cache 377ec681f3Smrg * fills up, after that it evicts the oldest cached value in a ring facilitated 387ec681f3Smrg * by index. 397ec681f3Smrg */ 407ec681f3Smrg 417ec681f3Smrg#include "pan_minmax_cache.h" 427ec681f3Smrg 437ec681f3Smrgbool 447ec681f3Smrgpanfrost_minmax_cache_get(struct panfrost_minmax_cache *cache, unsigned start, unsigned count, 457ec681f3Smrg unsigned *min_index, unsigned *max_index) 467ec681f3Smrg{ 477ec681f3Smrg uint64_t ht_key = (((uint64_t)count) << 32) | start; 487ec681f3Smrg bool found = false; 497ec681f3Smrg 507ec681f3Smrg if (!cache) 517ec681f3Smrg return false; 527ec681f3Smrg 537ec681f3Smrg for (unsigned i = 0; i < cache->size; ++i) { 547ec681f3Smrg if (cache->keys[i] == ht_key) { 557ec681f3Smrg uint64_t hit = cache->values[i]; 567ec681f3Smrg 577ec681f3Smrg *min_index = hit & 0xffffffff; 587ec681f3Smrg *max_index = hit >> 32; 597ec681f3Smrg found = true; 607ec681f3Smrg break; 617ec681f3Smrg } 627ec681f3Smrg } 637ec681f3Smrg 647ec681f3Smrg return found; 657ec681f3Smrg} 667ec681f3Smrg 677ec681f3Smrgvoid 687ec681f3Smrgpanfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start, unsigned count, 697ec681f3Smrg unsigned min_index, unsigned max_index) 707ec681f3Smrg{ 717ec681f3Smrg uint64_t ht_key = (((uint64_t)count) << 32) | start; 727ec681f3Smrg uint64_t value = min_index | (((uint64_t)max_index) << 32); 737ec681f3Smrg unsigned index = 0; 747ec681f3Smrg 757ec681f3Smrg if (!cache) 767ec681f3Smrg return; 777ec681f3Smrg 787ec681f3Smrg if (cache->size == PANFROST_MINMAX_SIZE) { 797ec681f3Smrg index = cache->index++; 807ec681f3Smrg cache->index = cache->index % PANFROST_MINMAX_SIZE; 817ec681f3Smrg } else { 827ec681f3Smrg index = cache->size++; 837ec681f3Smrg } 847ec681f3Smrg 857ec681f3Smrg cache->keys[index] = ht_key; 867ec681f3Smrg cache->values[index] = value; 877ec681f3Smrg 887ec681f3Smrg} 897ec681f3Smrg 907ec681f3Smrg/* If we've been caching min/max indices and we update the index 917ec681f3Smrg * buffer, that may invalidate the min/max. Check what's been cached vs 927ec681f3Smrg * what we've written, and throw out invalid entries. */ 937ec681f3Smrg 947ec681f3Smrgvoid 957ec681f3Smrgpanfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache, struct pipe_transfer *transfer) 967ec681f3Smrg{ 977ec681f3Smrg /* Ensure there is a cache to invalidate and a write */ 987ec681f3Smrg if (!cache) 997ec681f3Smrg return; 1007ec681f3Smrg 1017ec681f3Smrg if (!(transfer->usage & PIPE_MAP_WRITE)) 1027ec681f3Smrg return; 1037ec681f3Smrg 1047ec681f3Smrg unsigned valid_count = 0; 1057ec681f3Smrg 1067ec681f3Smrg for (unsigned i = 0; i < cache->size; ++i) { 1077ec681f3Smrg uint64_t key = cache->keys[i]; 1087ec681f3Smrg 1097ec681f3Smrg uint32_t start = key & 0xffffffff; 1107ec681f3Smrg uint32_t count = key >> 32; 1117ec681f3Smrg 1127ec681f3Smrg /* 1D range intersection */ 1137ec681f3Smrg bool invalid = MAX2(transfer->box.x, start) < MIN2(transfer->box.x + transfer->box.width, start + count); 1147ec681f3Smrg if (!invalid) { 1157ec681f3Smrg cache->keys[valid_count] = key; 1167ec681f3Smrg cache->values[valid_count] = cache->values[i]; 1177ec681f3Smrg valid_count++; 1187ec681f3Smrg } 1197ec681f3Smrg } 1207ec681f3Smrg 1217ec681f3Smrg cache->size = valid_count; 1227ec681f3Smrg cache->index = 0; 1237ec681f3Smrg} 124