vbo_minmax_index.c revision 7ec681f3
1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
6 * All Rights Reserved.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29#include "main/glheader.h"
30#include "main/context.h"
31#include "main/varray.h"
32#include "main/macros.h"
33#include "main/sse_minmax.h"
34#include "x86/common_x86_asm.h"
35#include "util/hash_table.h"
36#include "util/u_memory.h"
37#include "pipe/p_state.h"
38
39
40struct minmax_cache_key {
41   GLintptr offset;
42   GLuint count;
43   unsigned index_size;
44};
45
46
47struct minmax_cache_entry {
48   struct minmax_cache_key key;
49   GLuint min;
50   GLuint max;
51};
52
53
54static uint32_t
55vbo_minmax_cache_hash(const struct minmax_cache_key *key)
56{
57   return _mesa_hash_data(key, sizeof(*key));
58}
59
60
61static bool
62vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
63                           const struct minmax_cache_key *b)
64{
65   return (a->offset == b->offset) && (a->count == b->count) &&
66          (a->index_size == b->index_size);
67}
68
69
70static void
71vbo_minmax_cache_delete_entry(struct hash_entry *entry)
72{
73   free(entry->data);
74}
75
76
77static GLboolean
78vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
79{
80   if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
81                                  USAGE_ATOMIC_COUNTER_BUFFER |
82                                  USAGE_SHADER_STORAGE_BUFFER |
83                                  USAGE_TRANSFORM_FEEDBACK_BUFFER |
84                                  USAGE_PIXEL_PACK_BUFFER |
85                                  USAGE_DISABLE_MINMAX_CACHE))
86      return GL_FALSE;
87
88   if ((bufferObj->Mappings[MAP_USER].AccessFlags &
89        (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
90       (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
91      return GL_FALSE;
92
93   return GL_TRUE;
94}
95
96
97void
98vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
99{
100   _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
101   bufferObj->MinMaxCache = NULL;
102}
103
104
105static GLboolean
106vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
107                      unsigned index_size, GLintptr offset, GLuint count,
108                      GLuint *min_index, GLuint *max_index)
109{
110   GLboolean found = GL_FALSE;
111   struct minmax_cache_key key;
112   uint32_t hash;
113   struct hash_entry *result;
114
115   if (!bufferObj->MinMaxCache)
116      return GL_FALSE;
117   if (!vbo_use_minmax_cache(bufferObj))
118      return GL_FALSE;
119
120   simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
121
122   if (bufferObj->MinMaxCacheDirty) {
123      /* Disable the cache permanently for this BO if the number of hits
124       * is asymptotically less than the number of misses. This happens when
125       * applications use the BO for streaming.
126       *
127       * However, some initial optimism allows applications that interleave
128       * draw calls with glBufferSubData during warmup.
129       */
130      unsigned optimism = bufferObj->Size;
131      if (bufferObj->MinMaxCacheMissIndices > optimism &&
132          bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
133         bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
134         vbo_delete_minmax_cache(bufferObj);
135         goto out_disable;
136      }
137
138      _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
139      bufferObj->MinMaxCacheDirty = false;
140      goto out_invalidate;
141   }
142
143   key.index_size = index_size;
144   key.offset = offset;
145   key.count = count;
146   hash = vbo_minmax_cache_hash(&key);
147   result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
148   if (result) {
149      struct minmax_cache_entry *entry = result->data;
150      *min_index = entry->min;
151      *max_index = entry->max;
152      found = GL_TRUE;
153   }
154
155out_invalidate:
156   if (found) {
157      /* The hit counter saturates so that we don't accidently disable the
158       * cache in a long-running program.
159       */
160      unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
161
162      if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
163         bufferObj->MinMaxCacheHitIndices = new_hit_count;
164      else
165         bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
166   } else {
167      bufferObj->MinMaxCacheMissIndices += count;
168   }
169
170out_disable:
171   simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
172   return found;
173}
174
175
176static void
177vbo_minmax_cache_store(struct gl_context *ctx,
178                       struct gl_buffer_object *bufferObj,
179                       unsigned index_size, GLintptr offset, GLuint count,
180                       GLuint min, GLuint max)
181{
182   struct minmax_cache_entry *entry;
183   struct hash_entry *table_entry;
184   uint32_t hash;
185
186   if (!vbo_use_minmax_cache(bufferObj))
187      return;
188
189   simple_mtx_lock(&bufferObj->MinMaxCacheMutex);
190
191   if (!bufferObj->MinMaxCache) {
192      bufferObj->MinMaxCache =
193         _mesa_hash_table_create(NULL,
194                                 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
195                                 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
196      if (!bufferObj->MinMaxCache)
197         goto out;
198   }
199
200   entry = MALLOC_STRUCT(minmax_cache_entry);
201   if (!entry)
202      goto out;
203
204   entry->key.offset = offset;
205   entry->key.count = count;
206   entry->key.index_size = index_size;
207   entry->min = min;
208   entry->max = max;
209   hash = vbo_minmax_cache_hash(&entry->key);
210
211   table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
212                                                    hash, &entry->key);
213   if (table_entry) {
214      /* It seems like this could happen when two contexts are rendering using
215       * the same buffer object from multiple threads.
216       */
217      _mesa_debug(ctx, "duplicate entry in minmax cache\n");
218      free(entry);
219      goto out;
220   }
221
222   table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
223                                                    hash, &entry->key, entry);
224   if (!table_entry)
225      free(entry);
226
227out:
228   simple_mtx_unlock(&bufferObj->MinMaxCacheMutex);
229}
230
231
232void
233vbo_get_minmax_index_mapped(unsigned count, unsigned index_size,
234                            unsigned restartIndex, bool restart,
235                            const void *indices,
236                            unsigned *min_index, unsigned *max_index)
237{
238   switch (index_size) {
239   case 4: {
240      const GLuint *ui_indices = (const GLuint *)indices;
241      GLuint max_ui = 0;
242      GLuint min_ui = ~0U;
243      if (restart) {
244         for (unsigned i = 0; i < count; i++) {
245            if (ui_indices[i] != restartIndex) {
246               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
247               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
248            }
249         }
250      }
251      else {
252#if defined(USE_SSE41)
253         if (cpu_has_sse4_1) {
254            _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
255         }
256         else
257#endif
258            for (unsigned i = 0; i < count; i++) {
259               if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
260               if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
261            }
262      }
263      *min_index = min_ui;
264      *max_index = max_ui;
265      break;
266   }
267   case 2: {
268      const GLushort *us_indices = (const GLushort *)indices;
269      GLuint max_us = 0;
270      GLuint min_us = ~0U;
271      if (restart) {
272         for (unsigned i = 0; i < count; i++) {
273            if (us_indices[i] != restartIndex) {
274               if (us_indices[i] > max_us) max_us = us_indices[i];
275               if (us_indices[i] < min_us) min_us = us_indices[i];
276            }
277         }
278      }
279      else {
280         for (unsigned i = 0; i < count; i++) {
281            if (us_indices[i] > max_us) max_us = us_indices[i];
282            if (us_indices[i] < min_us) min_us = us_indices[i];
283         }
284      }
285      *min_index = min_us;
286      *max_index = max_us;
287      break;
288   }
289   case 1: {
290      const GLubyte *ub_indices = (const GLubyte *)indices;
291      GLuint max_ub = 0;
292      GLuint min_ub = ~0U;
293      if (restart) {
294         for (unsigned i = 0; i < count; i++) {
295            if (ub_indices[i] != restartIndex) {
296               if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
297               if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
298            }
299         }
300      }
301      else {
302         for (unsigned i = 0; i < count; i++) {
303            if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
304            if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
305         }
306      }
307      *min_index = min_ub;
308      *max_index = max_ub;
309      break;
310   }
311   default:
312      unreachable("not reached");
313   }
314}
315
316
317/**
318 * Compute min and max elements by scanning the index buffer for
319 * glDraw[Range]Elements() calls.
320 * If primitive restart is enabled, we need to ignore restart
321 * indexes when computing min/max.
322 */
323static void
324vbo_get_minmax_index(struct gl_context *ctx, struct gl_buffer_object *obj,
325                     const void *ptr, GLintptr offset, unsigned count,
326                     unsigned index_size, bool primitive_restart,
327                     unsigned restart_index, GLuint *min_index,
328                     GLuint *max_index)
329{
330   const char *indices;
331
332   if (!obj) {
333      indices = (const char *)ptr + offset;
334   } else {
335      GLsizeiptr size = MIN2((GLsizeiptr)count * index_size, obj->Size);
336
337      if (vbo_get_minmax_cached(obj, index_size, offset, count, min_index,
338                                max_index))
339         return;
340
341      indices = ctx->Driver.MapBufferRange(ctx, offset, size, GL_MAP_READ_BIT,
342                                           obj, MAP_INTERNAL);
343   }
344
345   vbo_get_minmax_index_mapped(count, index_size, restart_index,
346                               primitive_restart, indices,
347                               min_index, max_index);
348
349   if (obj) {
350      vbo_minmax_cache_store(ctx, obj, index_size, offset, count, *min_index,
351                             *max_index);
352      ctx->Driver.UnmapBuffer(ctx, obj, MAP_INTERNAL);
353   }
354}
355
356/**
357 * Compute min and max elements for nr_prims
358 */
359void
360vbo_get_minmax_indices(struct gl_context *ctx,
361                       const struct _mesa_prim *prims,
362                       const struct _mesa_index_buffer *ib,
363                       GLuint *min_index,
364                       GLuint *max_index,
365                       GLuint nr_prims,
366                       bool primitive_restart,
367                       unsigned restart_index)
368{
369   GLuint tmp_min, tmp_max;
370   GLuint i;
371   GLuint count;
372
373   *min_index = ~0;
374   *max_index = 0;
375
376   for (i = 0; i < nr_prims; i++) {
377      const struct _mesa_prim *start_prim;
378
379      start_prim = &prims[i];
380      count = start_prim->count;
381      /* Do combination if possible to reduce map/unmap count */
382      while ((i + 1 < nr_prims) &&
383             (prims[i].start + prims[i].count == prims[i+1].start)) {
384         count += prims[i+1].count;
385         i++;
386      }
387      vbo_get_minmax_index(ctx, ib->obj, ib->ptr,
388                           (ib->obj ? (GLintptr)ib->ptr : 0) +
389                           (start_prim->start << ib->index_size_shift),
390                           count, 1 << ib->index_size_shift,
391                           primitive_restart, restart_index,
392                           &tmp_min, &tmp_max);
393      *min_index = MIN2(*min_index, tmp_min);
394      *max_index = MAX2(*max_index, tmp_max);
395   }
396}
397
398/**
399 * Same as vbo_get_minmax_index, but using gallium draw structures.
400 */
401bool
402vbo_get_minmax_indices_gallium(struct gl_context *ctx,
403                               struct pipe_draw_info *info,
404                               const struct pipe_draw_start_count_bias *draws,
405                               unsigned num_draws)
406{
407   info->min_index = ~0;
408   info->max_index = 0;
409
410   for (unsigned i = 0; i < num_draws; i++) {
411      struct pipe_draw_start_count_bias draw = draws[i];
412
413      /* Do combination if possible to reduce map/unmap count */
414      while ((i + 1 < num_draws) &&
415             (draws[i].start + draws[i].count == draws[i+1].start)) {
416         draw.count += draws[i+1].count;
417         i++;
418      }
419
420      if (!draw.count)
421         continue;
422
423      unsigned tmp_min, tmp_max;
424      vbo_get_minmax_index(ctx, info->has_user_indices ?
425                              NULL : info->index.gl_bo,
426                           info->index.user,
427                           (GLintptr)draw.start * info->index_size,
428                           draw.count, info->index_size,
429                           info->primitive_restart, info->restart_index,
430                           &tmp_min, &tmp_max);
431      info->min_index = MIN2(info->min_index, tmp_min);
432      info->max_index = MAX2(info->max_index, tmp_max);
433   }
434
435   return info->min_index <= info->max_index;
436}
437