1b8e80941Smrg/* 2b8e80941Smrg * Mesa 3-D graphics library 3b8e80941Smrg * 4b8e80941Smrg * Copyright 2003 VMware, Inc. 5b8e80941Smrg * Copyright 2009 VMware, Inc. 6b8e80941Smrg * All Rights Reserved. 7b8e80941Smrg * Copyright (C) 2016 Advanced Micro Devices, Inc. 8b8e80941Smrg * 9b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 10b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 11b8e80941Smrg * to deal in the Software without restriction, including without limitation 12b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 14b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 15b8e80941Smrg * 16b8e80941Smrg * The above copyright notice and this permission notice (including the next 17b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 18b8e80941Smrg * Software. 19b8e80941Smrg * 20b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 23b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 24b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 25b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 26b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 27b8e80941Smrg */ 28b8e80941Smrg 29b8e80941Smrg#include "main/glheader.h" 30b8e80941Smrg#include "main/context.h" 31b8e80941Smrg#include "main/varray.h" 32b8e80941Smrg#include "main/macros.h" 33b8e80941Smrg#include "main/sse_minmax.h" 34b8e80941Smrg#include "x86/common_x86_asm.h" 35b8e80941Smrg#include "util/hash_table.h" 36b8e80941Smrg 37b8e80941Smrg 38b8e80941Smrgstruct minmax_cache_key { 39b8e80941Smrg GLintptr offset; 40b8e80941Smrg GLuint count; 41b8e80941Smrg unsigned index_size; 42b8e80941Smrg}; 43b8e80941Smrg 44b8e80941Smrg 45b8e80941Smrgstruct minmax_cache_entry { 46b8e80941Smrg struct minmax_cache_key key; 47b8e80941Smrg GLuint min; 48b8e80941Smrg GLuint max; 49b8e80941Smrg}; 50b8e80941Smrg 51b8e80941Smrg 52b8e80941Smrgstatic uint32_t 53b8e80941Smrgvbo_minmax_cache_hash(const struct minmax_cache_key *key) 54b8e80941Smrg{ 55b8e80941Smrg return _mesa_hash_data(key, sizeof(*key)); 56b8e80941Smrg} 57b8e80941Smrg 58b8e80941Smrg 59b8e80941Smrgstatic bool 60b8e80941Smrgvbo_minmax_cache_key_equal(const struct minmax_cache_key *a, 61b8e80941Smrg const struct minmax_cache_key *b) 62b8e80941Smrg{ 63b8e80941Smrg return (a->offset == b->offset) && (a->count == b->count) && 64b8e80941Smrg (a->index_size == b->index_size); 65b8e80941Smrg} 66b8e80941Smrg 67b8e80941Smrg 68b8e80941Smrgstatic void 69b8e80941Smrgvbo_minmax_cache_delete_entry(struct hash_entry *entry) 70b8e80941Smrg{ 71b8e80941Smrg free(entry->data); 72b8e80941Smrg} 73b8e80941Smrg 74b8e80941Smrg 75b8e80941Smrgstatic GLboolean 76b8e80941Smrgvbo_use_minmax_cache(struct gl_buffer_object *bufferObj) 77b8e80941Smrg{ 78b8e80941Smrg if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER | 79b8e80941Smrg USAGE_ATOMIC_COUNTER_BUFFER | 80b8e80941Smrg USAGE_SHADER_STORAGE_BUFFER | 81b8e80941Smrg USAGE_TRANSFORM_FEEDBACK_BUFFER | 82b8e80941Smrg USAGE_PIXEL_PACK_BUFFER | 83b8e80941Smrg USAGE_DISABLE_MINMAX_CACHE)) 84b8e80941Smrg return GL_FALSE; 85b8e80941Smrg 86b8e80941Smrg if ((bufferObj->Mappings[MAP_USER].AccessFlags & 87b8e80941Smrg (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) == 88b8e80941Smrg (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) 89b8e80941Smrg return GL_FALSE; 90b8e80941Smrg 91b8e80941Smrg return GL_TRUE; 92b8e80941Smrg} 93b8e80941Smrg 94b8e80941Smrg 95b8e80941Smrgvoid 96b8e80941Smrgvbo_delete_minmax_cache(struct gl_buffer_object *bufferObj) 97b8e80941Smrg{ 98b8e80941Smrg _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); 99b8e80941Smrg bufferObj->MinMaxCache = NULL; 100b8e80941Smrg} 101b8e80941Smrg 102b8e80941Smrg 103b8e80941Smrgstatic GLboolean 104b8e80941Smrgvbo_get_minmax_cached(struct gl_buffer_object *bufferObj, 105b8e80941Smrg unsigned index_size, GLintptr offset, GLuint count, 106b8e80941Smrg GLuint *min_index, GLuint *max_index) 107b8e80941Smrg{ 108b8e80941Smrg GLboolean found = GL_FALSE; 109b8e80941Smrg struct minmax_cache_key key; 110b8e80941Smrg uint32_t hash; 111b8e80941Smrg struct hash_entry *result; 112b8e80941Smrg 113b8e80941Smrg if (!bufferObj->MinMaxCache) 114b8e80941Smrg return GL_FALSE; 115b8e80941Smrg if (!vbo_use_minmax_cache(bufferObj)) 116b8e80941Smrg return GL_FALSE; 117b8e80941Smrg 118b8e80941Smrg simple_mtx_lock(&bufferObj->MinMaxCacheMutex); 119b8e80941Smrg 120b8e80941Smrg if (bufferObj->MinMaxCacheDirty) { 121b8e80941Smrg /* Disable the cache permanently for this BO if the number of hits 122b8e80941Smrg * is asymptotically less than the number of misses. This happens when 123b8e80941Smrg * applications use the BO for streaming. 124b8e80941Smrg * 125b8e80941Smrg * However, some initial optimism allows applications that interleave 126b8e80941Smrg * draw calls with glBufferSubData during warmup. 127b8e80941Smrg */ 128b8e80941Smrg unsigned optimism = bufferObj->Size; 129b8e80941Smrg if (bufferObj->MinMaxCacheMissIndices > optimism && 130b8e80941Smrg bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) { 131b8e80941Smrg bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE; 132b8e80941Smrg vbo_delete_minmax_cache(bufferObj); 133b8e80941Smrg goto out_disable; 134b8e80941Smrg } 135b8e80941Smrg 136b8e80941Smrg _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry); 137b8e80941Smrg bufferObj->MinMaxCacheDirty = false; 138b8e80941Smrg goto out_invalidate; 139b8e80941Smrg } 140b8e80941Smrg 141b8e80941Smrg key.index_size = index_size; 142b8e80941Smrg key.offset = offset; 143b8e80941Smrg key.count = count; 144b8e80941Smrg hash = vbo_minmax_cache_hash(&key); 145b8e80941Smrg result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key); 146b8e80941Smrg if (result) { 147b8e80941Smrg struct minmax_cache_entry *entry = result->data; 148b8e80941Smrg *min_index = entry->min; 149b8e80941Smrg *max_index = entry->max; 150b8e80941Smrg found = GL_TRUE; 151b8e80941Smrg } 152b8e80941Smrg 153b8e80941Smrgout_invalidate: 154b8e80941Smrg if (found) { 155b8e80941Smrg /* The hit counter saturates so that we don't accidently disable the 156b8e80941Smrg * cache in a long-running program. 157b8e80941Smrg */ 158b8e80941Smrg unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count; 159b8e80941Smrg 160b8e80941Smrg if (new_hit_count >= bufferObj->MinMaxCacheHitIndices) 161b8e80941Smrg bufferObj->MinMaxCacheHitIndices = new_hit_count; 162b8e80941Smrg else 163b8e80941Smrg bufferObj->MinMaxCacheHitIndices = ~(unsigned)0; 164b8e80941Smrg } else { 165b8e80941Smrg bufferObj->MinMaxCacheMissIndices += count; 166b8e80941Smrg } 167b8e80941Smrg 168b8e80941Smrgout_disable: 169b8e80941Smrg simple_mtx_unlock(&bufferObj->MinMaxCacheMutex); 170b8e80941Smrg return found; 171b8e80941Smrg} 172b8e80941Smrg 173b8e80941Smrg 174b8e80941Smrgstatic void 175b8e80941Smrgvbo_minmax_cache_store(struct gl_context *ctx, 176b8e80941Smrg struct gl_buffer_object *bufferObj, 177b8e80941Smrg unsigned index_size, GLintptr offset, GLuint count, 178b8e80941Smrg GLuint min, GLuint max) 179b8e80941Smrg{ 180b8e80941Smrg struct minmax_cache_entry *entry; 181b8e80941Smrg struct hash_entry *table_entry; 182b8e80941Smrg uint32_t hash; 183b8e80941Smrg 184b8e80941Smrg if (!vbo_use_minmax_cache(bufferObj)) 185b8e80941Smrg return; 186b8e80941Smrg 187b8e80941Smrg simple_mtx_lock(&bufferObj->MinMaxCacheMutex); 188b8e80941Smrg 189b8e80941Smrg if (!bufferObj->MinMaxCache) { 190b8e80941Smrg bufferObj->MinMaxCache = 191b8e80941Smrg _mesa_hash_table_create(NULL, 192b8e80941Smrg (uint32_t (*)(const void *))vbo_minmax_cache_hash, 193b8e80941Smrg (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal); 194b8e80941Smrg if (!bufferObj->MinMaxCache) 195b8e80941Smrg goto out; 196b8e80941Smrg } 197b8e80941Smrg 198b8e80941Smrg entry = MALLOC_STRUCT(minmax_cache_entry); 199b8e80941Smrg if (!entry) 200b8e80941Smrg goto out; 201b8e80941Smrg 202b8e80941Smrg entry->key.offset = offset; 203b8e80941Smrg entry->key.count = count; 204b8e80941Smrg entry->key.index_size = index_size; 205b8e80941Smrg entry->min = min; 206b8e80941Smrg entry->max = max; 207b8e80941Smrg hash = vbo_minmax_cache_hash(&entry->key); 208b8e80941Smrg 209b8e80941Smrg table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, 210b8e80941Smrg hash, &entry->key); 211b8e80941Smrg if (table_entry) { 212b8e80941Smrg /* It seems like this could happen when two contexts are rendering using 213b8e80941Smrg * the same buffer object from multiple threads. 214b8e80941Smrg */ 215b8e80941Smrg _mesa_debug(ctx, "duplicate entry in minmax cache\n"); 216b8e80941Smrg free(entry); 217b8e80941Smrg goto out; 218b8e80941Smrg } 219b8e80941Smrg 220b8e80941Smrg table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache, 221b8e80941Smrg hash, &entry->key, entry); 222b8e80941Smrg if (!table_entry) 223b8e80941Smrg free(entry); 224b8e80941Smrg 225b8e80941Smrgout: 226b8e80941Smrg simple_mtx_unlock(&bufferObj->MinMaxCacheMutex); 227b8e80941Smrg} 228b8e80941Smrg 229b8e80941Smrg 230b8e80941Smrg/** 231b8e80941Smrg * Compute min and max elements by scanning the index buffer for 232b8e80941Smrg * glDraw[Range]Elements() calls. 233b8e80941Smrg * If primitive restart is enabled, we need to ignore restart 234b8e80941Smrg * indexes when computing min/max. 235b8e80941Smrg */ 236b8e80941Smrgstatic void 237b8e80941Smrgvbo_get_minmax_index(struct gl_context *ctx, 238b8e80941Smrg const struct _mesa_prim *prim, 239b8e80941Smrg const struct _mesa_index_buffer *ib, 240b8e80941Smrg GLuint *min_index, GLuint *max_index, 241b8e80941Smrg const GLuint count) 242b8e80941Smrg{ 243b8e80941Smrg const GLboolean restart = ctx->Array._PrimitiveRestart; 244b8e80941Smrg const GLuint restartIndex = 245b8e80941Smrg _mesa_primitive_restart_index(ctx, ib->index_size); 246b8e80941Smrg const char *indices; 247b8e80941Smrg GLuint i; 248b8e80941Smrg GLintptr offset = 0; 249b8e80941Smrg 250b8e80941Smrg indices = (char *) ib->ptr + prim->start * ib->index_size; 251b8e80941Smrg if (_mesa_is_bufferobj(ib->obj)) { 252b8e80941Smrg GLsizeiptr size = MIN2(count * ib->index_size, ib->obj->Size); 253b8e80941Smrg 254b8e80941Smrg if (vbo_get_minmax_cached(ib->obj, ib->index_size, (GLintptr) indices, 255b8e80941Smrg count, min_index, max_index)) 256b8e80941Smrg return; 257b8e80941Smrg 258b8e80941Smrg offset = (GLintptr) indices; 259b8e80941Smrg indices = ctx->Driver.MapBufferRange(ctx, offset, size, 260b8e80941Smrg GL_MAP_READ_BIT, ib->obj, 261b8e80941Smrg MAP_INTERNAL); 262b8e80941Smrg } 263b8e80941Smrg 264b8e80941Smrg switch (ib->index_size) { 265b8e80941Smrg case 4: { 266b8e80941Smrg const GLuint *ui_indices = (const GLuint *)indices; 267b8e80941Smrg GLuint max_ui = 0; 268b8e80941Smrg GLuint min_ui = ~0U; 269b8e80941Smrg if (restart) { 270b8e80941Smrg for (i = 0; i < count; i++) { 271b8e80941Smrg if (ui_indices[i] != restartIndex) { 272b8e80941Smrg if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; 273b8e80941Smrg if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; 274b8e80941Smrg } 275b8e80941Smrg } 276b8e80941Smrg } 277b8e80941Smrg else { 278b8e80941Smrg#if defined(USE_SSE41) 279b8e80941Smrg if (cpu_has_sse4_1) { 280b8e80941Smrg _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count); 281b8e80941Smrg } 282b8e80941Smrg else 283b8e80941Smrg#endif 284b8e80941Smrg for (i = 0; i < count; i++) { 285b8e80941Smrg if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; 286b8e80941Smrg if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; 287b8e80941Smrg } 288b8e80941Smrg } 289b8e80941Smrg *min_index = min_ui; 290b8e80941Smrg *max_index = max_ui; 291b8e80941Smrg break; 292b8e80941Smrg } 293b8e80941Smrg case 2: { 294b8e80941Smrg const GLushort *us_indices = (const GLushort *)indices; 295b8e80941Smrg GLuint max_us = 0; 296b8e80941Smrg GLuint min_us = ~0U; 297b8e80941Smrg if (restart) { 298b8e80941Smrg for (i = 0; i < count; i++) { 299b8e80941Smrg if (us_indices[i] != restartIndex) { 300b8e80941Smrg if (us_indices[i] > max_us) max_us = us_indices[i]; 301b8e80941Smrg if (us_indices[i] < min_us) min_us = us_indices[i]; 302b8e80941Smrg } 303b8e80941Smrg } 304b8e80941Smrg } 305b8e80941Smrg else { 306b8e80941Smrg for (i = 0; i < count; i++) { 307b8e80941Smrg if (us_indices[i] > max_us) max_us = us_indices[i]; 308b8e80941Smrg if (us_indices[i] < min_us) min_us = us_indices[i]; 309b8e80941Smrg } 310b8e80941Smrg } 311b8e80941Smrg *min_index = min_us; 312b8e80941Smrg *max_index = max_us; 313b8e80941Smrg break; 314b8e80941Smrg } 315b8e80941Smrg case 1: { 316b8e80941Smrg const GLubyte *ub_indices = (const GLubyte *)indices; 317b8e80941Smrg GLuint max_ub = 0; 318b8e80941Smrg GLuint min_ub = ~0U; 319b8e80941Smrg if (restart) { 320b8e80941Smrg for (i = 0; i < count; i++) { 321b8e80941Smrg if (ub_indices[i] != restartIndex) { 322b8e80941Smrg if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; 323b8e80941Smrg if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; 324b8e80941Smrg } 325b8e80941Smrg } 326b8e80941Smrg } 327b8e80941Smrg else { 328b8e80941Smrg for (i = 0; i < count; i++) { 329b8e80941Smrg if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; 330b8e80941Smrg if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; 331b8e80941Smrg } 332b8e80941Smrg } 333b8e80941Smrg *min_index = min_ub; 334b8e80941Smrg *max_index = max_ub; 335b8e80941Smrg break; 336b8e80941Smrg } 337b8e80941Smrg default: 338b8e80941Smrg unreachable("not reached"); 339b8e80941Smrg } 340b8e80941Smrg 341b8e80941Smrg if (_mesa_is_bufferobj(ib->obj)) { 342b8e80941Smrg vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset, 343b8e80941Smrg count, *min_index, *max_index); 344b8e80941Smrg ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL); 345b8e80941Smrg } 346b8e80941Smrg} 347b8e80941Smrg 348b8e80941Smrg/** 349b8e80941Smrg * Compute min and max elements for nr_prims 350b8e80941Smrg */ 351b8e80941Smrgvoid 352b8e80941Smrgvbo_get_minmax_indices(struct gl_context *ctx, 353b8e80941Smrg const struct _mesa_prim *prims, 354b8e80941Smrg const struct _mesa_index_buffer *ib, 355b8e80941Smrg GLuint *min_index, 356b8e80941Smrg GLuint *max_index, 357b8e80941Smrg GLuint nr_prims) 358b8e80941Smrg{ 359b8e80941Smrg GLuint tmp_min, tmp_max; 360b8e80941Smrg GLuint i; 361b8e80941Smrg GLuint count; 362b8e80941Smrg 363b8e80941Smrg *min_index = ~0; 364b8e80941Smrg *max_index = 0; 365b8e80941Smrg 366b8e80941Smrg for (i = 0; i < nr_prims; i++) { 367b8e80941Smrg const struct _mesa_prim *start_prim; 368b8e80941Smrg 369b8e80941Smrg start_prim = &prims[i]; 370b8e80941Smrg count = start_prim->count; 371b8e80941Smrg /* Do combination if possible to reduce map/unmap count */ 372b8e80941Smrg while ((i + 1 < nr_prims) && 373b8e80941Smrg (prims[i].start + prims[i].count == prims[i+1].start)) { 374b8e80941Smrg count += prims[i+1].count; 375b8e80941Smrg i++; 376b8e80941Smrg } 377b8e80941Smrg vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count); 378b8e80941Smrg *min_index = MIN2(*min_index, tmp_min); 379b8e80941Smrg *max_index = MAX2(*max_index, tmp_max); 380b8e80941Smrg } 381b8e80941Smrg} 382