vbo_minmax_index.c revision 01e04c3f
1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
6 * All Rights Reserved.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29#include "main/glheader.h"
30#include "main/context.h"
31#include "main/varray.h"
32#include "main/macros.h"
33#include "main/sse_minmax.h"
34#include "x86/common_x86_asm.h"
35#include "util/hash_table.h"
36
37
38struct minmax_cache_key {
39   GLintptr offset;
40   GLuint count;
41   unsigned index_size;
42};
43
44
45struct minmax_cache_entry {
46   struct minmax_cache_key key;
47   GLuint min;
48   GLuint max;
49};
50
51
52static uint32_t
53vbo_minmax_cache_hash(const struct minmax_cache_key *key)
54{
55   return _mesa_hash_data(key, sizeof(*key));
56}
57
58
59static bool
60vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
61                           const struct minmax_cache_key *b)
62{
63   return (a->offset == b->offset) && (a->count == b->count) &&
64          (a->index_size == b->index_size);
65}
66
67
68static void
69vbo_minmax_cache_delete_entry(struct hash_entry *entry)
70{
71   free(entry->data);
72}
73
74
75static GLboolean
76vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
77{
78   if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
79                                  USAGE_ATOMIC_COUNTER_BUFFER |
80                                  USAGE_SHADER_STORAGE_BUFFER |
81                                  USAGE_TRANSFORM_FEEDBACK_BUFFER |
82                                  USAGE_PIXEL_PACK_BUFFER |
83                                  USAGE_DISABLE_MINMAX_CACHE))
84      return GL_FALSE;
85
86   if ((bufferObj->Mappings[MAP_USER].AccessFlags &
87        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
88       (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
89      return GL_FALSE;
90
91   return GL_TRUE;
92}
93
94
95void
96vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
97{
98   _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
99   bufferObj->MinMaxCache = NULL;
100}
101
102
103static GLboolean
104vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
105                      unsigned index_size, GLintptr offset, GLuint count,
106                      GLuint *min_index, GLuint *max_index)
107{
108   GLboolean found = GL_FALSE;
109   struct minmax_cache_key key;
110   uint32_t hash;
111   struct hash_entry *result;
112
113   if (!bufferObj->MinMaxCache)
114      return GL_FALSE;
115   if (!vbo_use_minmax_cache(bufferObj))
116      return GL_FALSE;
117
118   simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
119
120   if (bufferObj->MinMaxCacheDirty) {
121      /* Disable the cache permanently for this BO if the number of hits
122       * is asymptotically less than the number of misses. This happens when
123       * applications use the BO for streaming.
124       *
125       * However, some initial optimism allows applications that interleave
126       * draw calls with glBufferSubData during warmup.
127       */
128      unsigned optimism = bufferObj->Size;
129      if (bufferObj->MinMaxCacheMissIndices > optimism &&
130          bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
131         bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
132         vbo_delete_minmax_cache(bufferObj);
133         goto out_disable;
134      }
135
136      _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
137      bufferObj->MinMaxCacheDirty = false;
138      goto out_invalidate;
139   }
140
141   key.index_size = index_size;
142   key.offset = offset;
143   key.count = count;
144   hash = vbo_minmax_cache_hash(&key);
145   result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
146   if (result) {
147      struct minmax_cache_entry *entry = result->data;
148      *min_index = entry->min;
149      *max_index = entry->max;
150      found = GL_TRUE;
151   }
152
153out_invalidate:
154   if (found) {
155      /* The hit counter saturates so that we don't accidently disable the
156       * cache in a long-running program.
157       */
158      unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
159
160      if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
161         bufferObj->MinMaxCacheHitIndices = new_hit_count;
162      else
163         bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
164   } else {
165      bufferObj->MinMaxCacheMissIndices += count;
166   }
167
168out_disable:
169   simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
170   return found;
171}
172
173
174static void
175vbo_minmax_cache_store(struct gl_context *ctx,
176                       struct gl_buffer_object *bufferObj,
177                       unsigned index_size, GLintptr offset, GLuint count,
178                       GLuint min, GLuint max)
179{
180   struct minmax_cache_entry *entry;
181   struct hash_entry *table_entry;
182   uint32_t hash;
183
184   if (!vbo_use_minmax_cache(bufferObj))
185      return;
186
187   simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
188
189   if (!bufferObj->MinMaxCache) {
190      bufferObj->MinMaxCache =
191         _mesa_hash_table_create(NULL,
192                                 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
193                                 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
194      if (!bufferObj->MinMaxCache)
195         goto out;
196   }
197
198   entry = MALLOC_STRUCT(minmax_cache_entry);
199   if (!entry)
200      goto out;
201
202   entry->key.offset = offset;
203   entry->key.count = count;
204   entry->key.index_size = index_size;
205   entry->min = min;
206   entry->max = max;
207   hash = vbo_minmax_cache_hash(&entry->key);
208
209   table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
210                                                    hash, &entry->key);
211   if (table_entry) {
212      /* It seems like this could happen when two contexts are rendering using
213       * the same buffer object from multiple threads.
214       */
215      _mesa_debug(ctx, "duplicate entry in minmax cache\n");
216      free(entry);
217      goto out;
218   }
219
220   table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
221                                                    hash, &entry->key, entry);
222   if (!table_entry)
223      free(entry);
224
225out:
226   simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
227}
228
229
230/**
231 * Compute min and max elements by scanning the index buffer for
232 * glDraw[Range]Elements() calls.
233 * If primitive restart is enabled, we need to ignore restart
234 * indexes when computing min/max.
235 */
236static void
237vbo_get_minmax_index(struct gl_context *ctx,
238                     const struct _mesa_prim *prim,
239                     const struct _mesa_index_buffer *ib,
240                     GLuint *min_index, GLuint *max_index,
241                     const GLuint count)
242{
243   const GLboolean restart = ctx->Array._PrimitiveRestart;
244   const GLuint restartIndex =
245      _mesa_primitive_restart_index(ctx, ib->index_size);
246   const char *indices;
247   GLuint i;
248   GLintptr offset = 0;
249
250   indices = (char *) ib->ptr + prim->start * ib->index_size;
251   if (_mesa_is_bufferobj(ib->obj)) {
252      GLsizeiptr size = MIN2(count * ib->index_size, ib->obj->Size);
253
254      if (vbo_get_minmax_cached(ib->obj, ib->index_size, (GLintptr) indices,
255                                count, min_index, max_index))
256         return;
257
258      offset = (GLintptr) indices;
259      indices = ctx->Driver.MapBufferRange(ctx, offset, size,
260                                           GL_MAP_READ_BIT, ib->obj,
261                                           MAP_INTERNAL);
262   }
263
264   switch (ib->index_size) {
265   case 4: {
266      const GLuint *ui_indices = (const GLuint *)indices;
267      GLuint max_ui = 0;
268      GLuint min_ui = ~0U;
269      if (restart) {
270         for (i = 0; i < count; i++) {
271            if (ui_indices[i] != restartIndex) {
272               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
273               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
274            }
275         }
276      }
277      else {
278#if defined(USE_SSE41)
279         if (cpu_has_sse4_1) {
280            _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
281         }
282         else
283#endif
284            for (i = 0; i < count; i++) {
285               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
286               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
287            }
288      }
289      *min_index = min_ui;
290      *max_index = max_ui;
291      break;
292   }
293   case 2: {
294      const GLushort *us_indices = (const GLushort *)indices;
295      GLuint max_us = 0;
296      GLuint min_us = ~0U;
297      if (restart) {
298         for (i = 0; i < count; i++) {
299            if (us_indices[i] != restartIndex) {
300               if (us_indices[i] > max_us) max_us = us_indices[i];
301               if (us_indices[i] < min_us) min_us = us_indices[i];
302            }
303         }
304      }
305      else {
306         for (i = 0; i < count; i++) {
307            if (us_indices[i] > max_us) max_us = us_indices[i];
308            if (us_indices[i] < min_us) min_us = us_indices[i];
309         }
310      }
311      *min_index = min_us;
312      *max_index = max_us;
313      break;
314   }
315   case 1: {
316      const GLubyte *ub_indices = (const GLubyte *)indices;
317      GLuint max_ub = 0;
318      GLuint min_ub = ~0U;
319      if (restart) {
320         for (i = 0; i < count; i++) {
321            if (ub_indices[i] != restartIndex) {
322               if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
323               if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
324            }
325         }
326      }
327      else {
328         for (i = 0; i < count; i++) {
329            if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
330            if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
331         }
332      }
333      *min_index = min_ub;
334      *max_index = max_ub;
335      break;
336   }
337   default:
338      unreachable("not reached");
339   }
340
341   if (_mesa_is_bufferobj(ib->obj)) {
342      vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset,
343                             count, *min_index, *max_index);
344      ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
345   }
346}
347
348/**
349 * Compute min and max elements for nr_prims
350 */
351void
352vbo_get_minmax_indices(struct gl_context *ctx,
353                       const struct _mesa_prim *prims,
354                       const struct _mesa_index_buffer *ib,
355                       GLuint *min_index,
356                       GLuint *max_index,
357                       GLuint nr_prims)
358{
359   GLuint tmp_min, tmp_max;
360   GLuint i;
361   GLuint count;
362
363   *min_index = ~0;
364   *max_index = 0;
365
366   for (i = 0; i < nr_prims; i++) {
367      const struct _mesa_prim *start_prim;
368
369      start_prim = &prims[i];
370      count = start_prim->count;
371      /* Do combination if possible to reduce map/unmap count */
372      while ((i + 1 < nr_prims) &&
373             (prims[i].start + prims[i].count == prims[i+1].start)) {
374         count += prims[i+1].count;
375         i++;
376      }
377      vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
378      *min_index = MIN2(*min_index, tmp_min);
379      *max_index = MAX2(*max_index, tmp_max);
380   }
381}
382