1b8e80941Smrg/*
2b8e80941Smrg * Mesa 3-D graphics library
3b8e80941Smrg *
4b8e80941Smrg * Copyright 2003 VMware, Inc.
5b8e80941Smrg * Copyright 2009 VMware, Inc.
6b8e80941Smrg * All Rights Reserved.
7b8e80941Smrg * Copyright (C) 2016 Advanced Micro Devices, Inc.
8b8e80941Smrg *
9b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
10b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
11b8e80941Smrg * to deal in the Software without restriction, including without limitation
12b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
14b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
15b8e80941Smrg *
16b8e80941Smrg * The above copyright notice and this permission notice (including the next
17b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
18b8e80941Smrg * Software.
19b8e80941Smrg *
20b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
27b8e80941Smrg */
28b8e80941Smrg
29b8e80941Smrg#include "main/glheader.h"
30b8e80941Smrg#include "main/context.h"
31b8e80941Smrg#include "main/varray.h"
32b8e80941Smrg#include "main/macros.h"
33b8e80941Smrg#include "main/sse_minmax.h"
34b8e80941Smrg#include "x86/common_x86_asm.h"
35b8e80941Smrg#include "util/hash_table.h"
36b8e80941Smrg
37b8e80941Smrg
38b8e80941Smrgstruct minmax_cache_key {
39b8e80941Smrg   GLintptr offset;
40b8e80941Smrg   GLuint count;
41b8e80941Smrg   unsigned index_size;
42b8e80941Smrg};
43b8e80941Smrg
44b8e80941Smrg
45b8e80941Smrgstruct minmax_cache_entry {
46b8e80941Smrg   struct minmax_cache_key key;
47b8e80941Smrg   GLuint min;
48b8e80941Smrg   GLuint max;
49b8e80941Smrg};
50b8e80941Smrg
51b8e80941Smrg
52b8e80941Smrgstatic uint32_t
53b8e80941Smrgvbo_minmax_cache_hash(const struct minmax_cache_key *key)
54b8e80941Smrg{
55b8e80941Smrg   return _mesa_hash_data(key, sizeof(*key));
56b8e80941Smrg}
57b8e80941Smrg
58b8e80941Smrg
59b8e80941Smrgstatic bool
60b8e80941Smrgvbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
61b8e80941Smrg                           const struct minmax_cache_key *b)
62b8e80941Smrg{
63b8e80941Smrg   return (a->offset == b->offset) && (a->count == b->count) &&
64b8e80941Smrg          (a->index_size == b->index_size);
65b8e80941Smrg}
66b8e80941Smrg
67b8e80941Smrg
68b8e80941Smrgstatic void
69b8e80941Smrgvbo_minmax_cache_delete_entry(struct hash_entry *entry)
70b8e80941Smrg{
71b8e80941Smrg   free(entry->data);
72b8e80941Smrg}
73b8e80941Smrg
74b8e80941Smrg
75b8e80941Smrgstatic GLboolean
76b8e80941Smrgvbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
77b8e80941Smrg{
78b8e80941Smrg   if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
79b8e80941Smrg                                  USAGE_ATOMIC_COUNTER_BUFFER |
80b8e80941Smrg                                  USAGE_SHADER_STORAGE_BUFFER |
81b8e80941Smrg                                  USAGE_TRANSFORM_FEEDBACK_BUFFER |
82b8e80941Smrg                                  USAGE_PIXEL_PACK_BUFFER |
83b8e80941Smrg                                  USAGE_DISABLE_MINMAX_CACHE))
84b8e80941Smrg      return GL_FALSE;
85b8e80941Smrg
86b8e80941Smrg   if ((bufferObj->Mappings[MAP_USER].AccessFlags &
87b8e80941Smrg        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
88b8e80941Smrg       (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
89b8e80941Smrg      return GL_FALSE;
90b8e80941Smrg
91b8e80941Smrg   return GL_TRUE;
92b8e80941Smrg}
93b8e80941Smrg
94b8e80941Smrg
95b8e80941Smrgvoid
96b8e80941Smrgvbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
97b8e80941Smrg{
98b8e80941Smrg   _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
99b8e80941Smrg   bufferObj->MinMaxCache = NULL;
100b8e80941Smrg}
101b8e80941Smrg
102b8e80941Smrg
103b8e80941Smrgstatic GLboolean
104b8e80941Smrgvbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
105b8e80941Smrg                      unsigned index_size, GLintptr offset, GLuint count,
106b8e80941Smrg                      GLuint *min_index, GLuint *max_index)
107b8e80941Smrg{
108b8e80941Smrg   GLboolean found = GL_FALSE;
109b8e80941Smrg   struct minmax_cache_key key;
110b8e80941Smrg   uint32_t hash;
111b8e80941Smrg   struct hash_entry *result;
112b8e80941Smrg
113b8e80941Smrg   if (!bufferObj->MinMaxCache)
114b8e80941Smrg      return GL_FALSE;
115b8e80941Smrg   if (!vbo_use_minmax_cache(bufferObj))
116b8e80941Smrg      return GL_FALSE;
117b8e80941Smrg
118b8e80941Smrg   simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
119b8e80941Smrg
120b8e80941Smrg   if (bufferObj->MinMaxCacheDirty) {
121b8e80941Smrg      /* Disable the cache permanently for this BO if the number of hits
122b8e80941Smrg       * is asymptotically less than the number of misses. This happens when
123b8e80941Smrg       * applications use the BO for streaming.
124b8e80941Smrg       *
125b8e80941Smrg       * However, some initial optimism allows applications that interleave
126b8e80941Smrg       * draw calls with glBufferSubData during warmup.
127b8e80941Smrg       */
128b8e80941Smrg      unsigned optimism = bufferObj->Size;
129b8e80941Smrg      if (bufferObj->MinMaxCacheMissIndices > optimism &&
130b8e80941Smrg          bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
131b8e80941Smrg         bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
132b8e80941Smrg         vbo_delete_minmax_cache(bufferObj);
133b8e80941Smrg         goto out_disable;
134b8e80941Smrg      }
135b8e80941Smrg
136b8e80941Smrg      _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
137b8e80941Smrg      bufferObj->MinMaxCacheDirty = false;
138b8e80941Smrg      goto out_invalidate;
139b8e80941Smrg   }
140b8e80941Smrg
141b8e80941Smrg   key.index_size = index_size;
142b8e80941Smrg   key.offset = offset;
143b8e80941Smrg   key.count = count;
144b8e80941Smrg   hash = vbo_minmax_cache_hash(&key);
145b8e80941Smrg   result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
146b8e80941Smrg   if (result) {
147b8e80941Smrg      struct minmax_cache_entry *entry = result->data;
148b8e80941Smrg      *min_index = entry->min;
149b8e80941Smrg      *max_index = entry->max;
150b8e80941Smrg      found = GL_TRUE;
151b8e80941Smrg   }
152b8e80941Smrg
153b8e80941Smrgout_invalidate:
154b8e80941Smrg   if (found) {
155b8e80941Smrg      /* The hit counter saturates so that we don't accidently disable the
156b8e80941Smrg       * cache in a long-running program.
157b8e80941Smrg       */
158b8e80941Smrg      unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
159b8e80941Smrg
160b8e80941Smrg      if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
161b8e80941Smrg         bufferObj->MinMaxCacheHitIndices = new_hit_count;
162b8e80941Smrg      else
163b8e80941Smrg         bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
164b8e80941Smrg   } else {
165b8e80941Smrg      bufferObj->MinMaxCacheMissIndices += count;
166b8e80941Smrg   }
167b8e80941Smrg
168b8e80941Smrgout_disable:
169b8e80941Smrg   simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
170b8e80941Smrg   return found;
171b8e80941Smrg}
172b8e80941Smrg
173b8e80941Smrg
174b8e80941Smrgstatic void
175b8e80941Smrgvbo_minmax_cache_store(struct gl_context *ctx,
176b8e80941Smrg                       struct gl_buffer_object *bufferObj,
177b8e80941Smrg                       unsigned index_size, GLintptr offset, GLuint count,
178b8e80941Smrg                       GLuint min, GLuint max)
179b8e80941Smrg{
180b8e80941Smrg   struct minmax_cache_entry *entry;
181b8e80941Smrg   struct hash_entry *table_entry;
182b8e80941Smrg   uint32_t hash;
183b8e80941Smrg
184b8e80941Smrg   if (!vbo_use_minmax_cache(bufferObj))
185b8e80941Smrg      return;
186b8e80941Smrg
187b8e80941Smrg   simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
188b8e80941Smrg
189b8e80941Smrg   if (!bufferObj->MinMaxCache) {
190b8e80941Smrg      bufferObj->MinMaxCache =
191b8e80941Smrg         _mesa_hash_table_create(NULL,
192b8e80941Smrg                                 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
193b8e80941Smrg                                 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
194b8e80941Smrg      if (!bufferObj->MinMaxCache)
195b8e80941Smrg         goto out;
196b8e80941Smrg   }
197b8e80941Smrg
198b8e80941Smrg   entry = MALLOC_STRUCT(minmax_cache_entry);
199b8e80941Smrg   if (!entry)
200b8e80941Smrg      goto out;
201b8e80941Smrg
202b8e80941Smrg   entry->key.offset = offset;
203b8e80941Smrg   entry->key.count = count;
204b8e80941Smrg   entry->key.index_size = index_size;
205b8e80941Smrg   entry->min = min;
206b8e80941Smrg   entry->max = max;
207b8e80941Smrg   hash = vbo_minmax_cache_hash(&entry->key);
208b8e80941Smrg
209b8e80941Smrg   table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
210b8e80941Smrg                                                    hash, &entry->key);
211b8e80941Smrg   if (table_entry) {
212b8e80941Smrg      /* It seems like this could happen when two contexts are rendering using
213b8e80941Smrg       * the same buffer object from multiple threads.
214b8e80941Smrg       */
215b8e80941Smrg      _mesa_debug(ctx, "duplicate entry in minmax cache\n");
216b8e80941Smrg      free(entry);
217b8e80941Smrg      goto out;
218b8e80941Smrg   }
219b8e80941Smrg
220b8e80941Smrg   table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
221b8e80941Smrg                                                    hash, &entry->key, entry);
222b8e80941Smrg   if (!table_entry)
223b8e80941Smrg      free(entry);
224b8e80941Smrg
225b8e80941Smrgout:
226b8e80941Smrg   simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
227b8e80941Smrg}
228b8e80941Smrg
229b8e80941Smrg
230b8e80941Smrg/**
231b8e80941Smrg * Compute min and max elements by scanning the index buffer for
232b8e80941Smrg * glDraw[Range]Elements() calls.
233b8e80941Smrg * If primitive restart is enabled, we need to ignore restart
234b8e80941Smrg * indexes when computing min/max.
235b8e80941Smrg */
236b8e80941Smrgstatic void
237b8e80941Smrgvbo_get_minmax_index(struct gl_context *ctx,
238b8e80941Smrg                     const struct _mesa_prim *prim,
239b8e80941Smrg                     const struct _mesa_index_buffer *ib,
240b8e80941Smrg                     GLuint *min_index, GLuint *max_index,
241b8e80941Smrg                     const GLuint count)
242b8e80941Smrg{
243b8e80941Smrg   const GLboolean restart = ctx->Array._PrimitiveRestart;
244b8e80941Smrg   const GLuint restartIndex =
245b8e80941Smrg      _mesa_primitive_restart_index(ctx, ib->index_size);
246b8e80941Smrg   const char *indices;
247b8e80941Smrg   GLuint i;
248b8e80941Smrg   GLintptr offset = 0;
249b8e80941Smrg
250b8e80941Smrg   indices = (char *) ib->ptr + prim->start * ib->index_size;
251b8e80941Smrg   if (_mesa_is_bufferobj(ib->obj)) {
252b8e80941Smrg      GLsizeiptr size = MIN2(count * ib->index_size, ib->obj->Size);
253b8e80941Smrg
254b8e80941Smrg      if (vbo_get_minmax_cached(ib->obj, ib->index_size, (GLintptr) indices,
255b8e80941Smrg                                count, min_index, max_index))
256b8e80941Smrg         return;
257b8e80941Smrg
258b8e80941Smrg      offset = (GLintptr) indices;
259b8e80941Smrg      indices = ctx->Driver.MapBufferRange(ctx, offset, size,
260b8e80941Smrg                                           GL_MAP_READ_BIT, ib->obj,
261b8e80941Smrg                                           MAP_INTERNAL);
262b8e80941Smrg   }
263b8e80941Smrg
264b8e80941Smrg   switch (ib->index_size) {
265b8e80941Smrg   case 4: {
266b8e80941Smrg      const GLuint *ui_indices = (const GLuint *)indices;
267b8e80941Smrg      GLuint max_ui = 0;
268b8e80941Smrg      GLuint min_ui = ~0U;
269b8e80941Smrg      if (restart) {
270b8e80941Smrg         for (i = 0; i < count; i++) {
271b8e80941Smrg            if (ui_indices[i] != restartIndex) {
272b8e80941Smrg               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
273b8e80941Smrg               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
274b8e80941Smrg            }
275b8e80941Smrg         }
276b8e80941Smrg      }
277b8e80941Smrg      else {
278b8e80941Smrg#if defined(USE_SSE41)
279b8e80941Smrg         if (cpu_has_sse4_1) {
280b8e80941Smrg            _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
281b8e80941Smrg         }
282b8e80941Smrg         else
283b8e80941Smrg#endif
284b8e80941Smrg            for (i = 0; i < count; i++) {
285b8e80941Smrg               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
286b8e80941Smrg               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
287b8e80941Smrg            }
288b8e80941Smrg      }
289b8e80941Smrg      *min_index = min_ui;
290b8e80941Smrg      *max_index = max_ui;
291b8e80941Smrg      break;
292b8e80941Smrg   }
293b8e80941Smrg   case 2: {
294b8e80941Smrg      const GLushort *us_indices = (const GLushort *)indices;
295b8e80941Smrg      GLuint max_us = 0;
296b8e80941Smrg      GLuint min_us = ~0U;
297b8e80941Smrg      if (restart) {
298b8e80941Smrg         for (i = 0; i < count; i++) {
299b8e80941Smrg            if (us_indices[i] != restartIndex) {
300b8e80941Smrg               if (us_indices[i] > max_us) max_us = us_indices[i];
301b8e80941Smrg               if (us_indices[i] < min_us) min_us = us_indices[i];
302b8e80941Smrg            }
303b8e80941Smrg         }
304b8e80941Smrg      }
305b8e80941Smrg      else {
306b8e80941Smrg         for (i = 0; i < count; i++) {
307b8e80941Smrg            if (us_indices[i] > max_us) max_us = us_indices[i];
308b8e80941Smrg            if (us_indices[i] < min_us) min_us = us_indices[i];
309b8e80941Smrg         }
310b8e80941Smrg      }
311b8e80941Smrg      *min_index = min_us;
312b8e80941Smrg      *max_index = max_us;
313b8e80941Smrg      break;
314b8e80941Smrg   }
315b8e80941Smrg   case 1: {
316b8e80941Smrg      const GLubyte *ub_indices = (const GLubyte *)indices;
317b8e80941Smrg      GLuint max_ub = 0;
318b8e80941Smrg      GLuint min_ub = ~0U;
319b8e80941Smrg      if (restart) {
320b8e80941Smrg         for (i = 0; i < count; i++) {
321b8e80941Smrg            if (ub_indices[i] != restartIndex) {
322b8e80941Smrg               if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
323b8e80941Smrg               if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
324b8e80941Smrg            }
325b8e80941Smrg         }
326b8e80941Smrg      }
327b8e80941Smrg      else {
328b8e80941Smrg         for (i = 0; i < count; i++) {
329b8e80941Smrg            if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
330b8e80941Smrg            if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
331b8e80941Smrg         }
332b8e80941Smrg      }
333b8e80941Smrg      *min_index = min_ub;
334b8e80941Smrg      *max_index = max_ub;
335b8e80941Smrg      break;
336b8e80941Smrg   }
337b8e80941Smrg   default:
338b8e80941Smrg      unreachable("not reached");
339b8e80941Smrg   }
340b8e80941Smrg
341b8e80941Smrg   if (_mesa_is_bufferobj(ib->obj)) {
342b8e80941Smrg      vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset,
343b8e80941Smrg                             count, *min_index, *max_index);
344b8e80941Smrg      ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
345b8e80941Smrg   }
346b8e80941Smrg}
347b8e80941Smrg
348b8e80941Smrg/**
349b8e80941Smrg * Compute min and max elements for nr_prims
350b8e80941Smrg */
351b8e80941Smrgvoid
352b8e80941Smrgvbo_get_minmax_indices(struct gl_context *ctx,
353b8e80941Smrg                       const struct _mesa_prim *prims,
354b8e80941Smrg                       const struct _mesa_index_buffer *ib,
355b8e80941Smrg                       GLuint *min_index,
356b8e80941Smrg                       GLuint *max_index,
357b8e80941Smrg                       GLuint nr_prims)
358b8e80941Smrg{
359b8e80941Smrg   GLuint tmp_min, tmp_max;
360b8e80941Smrg   GLuint i;
361b8e80941Smrg   GLuint count;
362b8e80941Smrg
363b8e80941Smrg   *min_index = ~0;
364b8e80941Smrg   *max_index = 0;
365b8e80941Smrg
366b8e80941Smrg   for (i = 0; i < nr_prims; i++) {
367b8e80941Smrg      const struct _mesa_prim *start_prim;
368b8e80941Smrg
369b8e80941Smrg      start_prim = &prims[i];
370b8e80941Smrg      count = start_prim->count;
371b8e80941Smrg      /* Do combination if possible to reduce map/unmap count */
372b8e80941Smrg      while ((i + 1 < nr_prims) &&
373b8e80941Smrg             (prims[i].start + prims[i].count == prims[i+1].start)) {
374b8e80941Smrg         count += prims[i+1].count;
375b8e80941Smrg         i++;
376b8e80941Smrg      }
377b8e80941Smrg      vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
378b8e80941Smrg      *min_index = MIN2(*min_index, tmp_min);
379b8e80941Smrg      *max_index = MAX2(*max_index, tmp_max);
380b8e80941Smrg   }
381b8e80941Smrg}
382