1/**********************************************************
2 * Copyright 2008-2009 VMware, Inc.  All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26#include "util/u_math.h"
27#include "util/u_memory.h"
28#include "util/crc32.h"
29
30#include "svga_debug.h"
31#include "svga_format.h"
32#include "svga_winsys.h"
33#include "svga_screen.h"
34#include "svga_screen_cache.h"
35#include "svga_context.h"
36#include "svga_cmd.h"
37
38#define SVGA_SURFACE_CACHE_ENABLED 1
39
40
41/**
42 * Return the size of the surface described by the key (in bytes).
43 */
44unsigned
45svga_surface_size(const struct svga_host_surface_cache_key *key)
46{
47   unsigned bw, bh, bpb, total_size, i;
48
49   assert(key->numMipLevels > 0);
50   assert(key->numFaces > 0);
51   assert(key->arraySize > 0);
52
53   if (key->format == SVGA3D_BUFFER) {
54      /* Special case: we don't want to count vertex/index buffers
55       * against the cache size limit, so view them as zero-sized.
56       */
57      return 0;
58   }
59
60   svga_format_size(key->format, &bw, &bh, &bpb);
61
62   total_size = 0;
63
64   for (i = 0; i < key->numMipLevels; i++) {
65      unsigned w = u_minify(key->size.width, i);
66      unsigned h = u_minify(key->size.height, i);
67      unsigned d = u_minify(key->size.depth, i);
68      unsigned img_size = ((w + bw - 1) / bw) * ((h + bh - 1) / bh) * d * bpb;
69      total_size += img_size;
70   }
71
72   total_size *= key->numFaces * key->arraySize * MAX2(1, key->sampleCount);
73
74   return total_size;
75}
76
77
78/**
79 * Compute the bucket for this key.
80 */
81static inline unsigned
82svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
83{
84   return util_hash_crc32(key, sizeof *key) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
85}
86
87
88/**
89 * Search the cache for a surface that matches the key.  If a match is
90 * found, remove it from the cache and return the surface pointer.
91 * Return NULL otherwise.
92 */
93static struct svga_winsys_surface *
94svga_screen_cache_lookup(struct svga_screen *svgascreen,
95                         const struct svga_host_surface_cache_key *key)
96{
97   struct svga_host_surface_cache *cache = &svgascreen->cache;
98   struct svga_winsys_screen *sws = svgascreen->sws;
99   struct svga_host_surface_cache_entry *entry;
100   struct svga_winsys_surface *handle = NULL;
101   struct list_head *curr, *next;
102   unsigned bucket;
103   unsigned tries = 0;
104
105   assert(key->cachable);
106
107   bucket = svga_screen_cache_bucket(key);
108
109   mtx_lock(&cache->mutex);
110
111   curr = cache->bucket[bucket].next;
112   next = curr->next;
113   while (curr != &cache->bucket[bucket]) {
114      ++tries;
115
116      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, bucket_head);
117
118      assert(entry->handle);
119
120      /* If the key matches and the fence is signalled (the surface is no
121       * longer needed) the lookup was successful.  We found a surface that
122       * can be reused.
123       * We unlink the surface from the cache entry and we add the entry to
124       * the 'empty' list.
125       */
126      if (memcmp(&entry->key, key, sizeof *key) == 0 &&
127          sws->fence_signalled(sws, entry->fence, 0) == 0) {
128         unsigned surf_size;
129
130         assert(sws->surface_is_flushed(sws, entry->handle));
131
132         handle = entry->handle; /* Reference is transfered here. */
133         entry->handle = NULL;
134
135         /* Remove from hash table */
136         list_del(&entry->bucket_head);
137
138         /* remove from LRU list */
139         list_del(&entry->head);
140
141         /* Add the cache entry (but not the surface!) to the empty list */
142         list_add(&entry->head, &cache->empty);
143
144         /* update the cache size */
145         surf_size = svga_surface_size(&entry->key);
146         assert(surf_size <= cache->total_size);
147         if (surf_size > cache->total_size)
148            cache->total_size = 0; /* should never happen, but be safe */
149         else
150            cache->total_size -= surf_size;
151
152         break;
153      }
154
155      curr = next;
156      next = curr->next;
157   }
158
159   mtx_unlock(&cache->mutex);
160
161   if (SVGA_DEBUG & DEBUG_DMA)
162      debug_printf("%s: cache %s after %u tries (bucket %d)\n", __FUNCTION__,
163                   handle ? "hit" : "miss", tries, bucket);
164
165   return handle;
166}
167
168
169/**
170 * Free the least recently used entries in the surface cache until the
171 * cache size is <= the target size OR there are no unused entries left
172 * to discard.  We don't do any flushing to try to free up additional
173 * surfaces.
174 */
175static void
176svga_screen_cache_shrink(struct svga_screen *svgascreen,
177                         unsigned target_size)
178{
179   struct svga_host_surface_cache *cache = &svgascreen->cache;
180   struct svga_winsys_screen *sws = svgascreen->sws;
181   struct svga_host_surface_cache_entry *entry = NULL, *next_entry;
182
183   /* Walk over the list of unused buffers in reverse order: from oldest
184    * to newest.
185    */
186   LIST_FOR_EACH_ENTRY_SAFE_REV(entry, next_entry, &cache->unused, head) {
187      if (entry->key.format != SVGA3D_BUFFER) {
188         /* we don't want to discard vertex/index buffers */
189
190         cache->total_size -= svga_surface_size(&entry->key);
191
192         assert(entry->handle);
193         sws->surface_reference(sws, &entry->handle, NULL);
194
195         list_del(&entry->bucket_head);
196         list_del(&entry->head);
197         list_add(&entry->head, &cache->empty);
198
199         if (cache->total_size <= target_size) {
200            /* all done */
201            break;
202         }
203      }
204   }
205}
206
207
208/**
209 * Add a surface to the cache.  This is done when the driver deletes
210 * the surface.  Note: transfers a handle reference.
211 */
212static void
213svga_screen_cache_add(struct svga_screen *svgascreen,
214                      const struct svga_host_surface_cache_key *key,
215                      struct svga_winsys_surface **p_handle)
216{
217   struct svga_host_surface_cache *cache = &svgascreen->cache;
218   struct svga_winsys_screen *sws = svgascreen->sws;
219   struct svga_host_surface_cache_entry *entry = NULL;
220   struct svga_winsys_surface *handle = *p_handle;
221   unsigned surf_size;
222
223   assert(key->cachable);
224
225   if (!handle)
226      return;
227
228   surf_size = svga_surface_size(key);
229
230   *p_handle = NULL;
231   mtx_lock(&cache->mutex);
232
233   if (surf_size >= SVGA_HOST_SURFACE_CACHE_BYTES) {
234      /* this surface is too large to cache, just free it */
235      sws->surface_reference(sws, &handle, NULL);
236      mtx_unlock(&cache->mutex);
237      return;
238   }
239
240   if (cache->total_size + surf_size > SVGA_HOST_SURFACE_CACHE_BYTES) {
241      /* Adding this surface would exceed the cache size.
242       * Try to discard least recently used entries until we hit the
243       * new target cache size.
244       */
245      unsigned target_size = SVGA_HOST_SURFACE_CACHE_BYTES - surf_size;
246
247      svga_screen_cache_shrink(svgascreen, target_size);
248
249      if (cache->total_size > target_size) {
250         /* we weren't able to shrink the cache as much as we wanted so
251          * just discard this surface.
252          */
253         sws->surface_reference(sws, &handle, NULL);
254         mtx_unlock(&cache->mutex);
255         return;
256      }
257   }
258
259   if (!list_is_empty(&cache->empty)) {
260      /* An empty entry has no surface associated with it.
261       * Use the first empty entry.
262       */
263      entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
264                         cache->empty.next, head);
265
266      /* Remove from LRU list */
267      list_del(&entry->head);
268   }
269   else if (!list_is_empty(&cache->unused)) {
270      /* free the last used buffer and reuse its entry */
271      entry = LIST_ENTRY(struct svga_host_surface_cache_entry,
272                         cache->unused.prev, head);
273      SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
274               "unref sid %p (make space)\n", entry->handle);
275
276      cache->total_size -= svga_surface_size(&entry->key);
277
278      sws->surface_reference(sws, &entry->handle, NULL);
279
280      /* Remove from hash table */
281      list_del(&entry->bucket_head);
282
283      /* Remove from LRU list */
284      list_del(&entry->head);
285   }
286
287   if (entry) {
288      assert(entry->handle == NULL);
289      entry->handle = handle;
290      memcpy(&entry->key, key, sizeof entry->key);
291
292      SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
293               "cache sid %p\n", entry->handle);
294
295      /* If we don't have gb objects, we don't need to invalidate. */
296      if (sws->have_gb_objects)
297         list_add(&entry->head, &cache->validated);
298      else
299         list_add(&entry->head, &cache->invalidated);
300
301      cache->total_size += surf_size;
302   }
303   else {
304      /* Couldn't cache the buffer -- this really shouldn't happen */
305      SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
306               "unref sid %p (couldn't find space)\n", handle);
307      sws->surface_reference(sws, &handle, NULL);
308   }
309
310   mtx_unlock(&cache->mutex);
311}
312
313
314/* Maximum number of invalidate surface commands in a command buffer */
315# define SVGA_MAX_SURFACE_TO_INVALIDATE 1000
316
317/**
318 * Called during the screen flush to move all buffers not in a validate list
319 * into the unused list.
320 */
321void
322svga_screen_cache_flush(struct svga_screen *svgascreen,
323                        struct svga_context *svga,
324                        struct pipe_fence_handle *fence)
325{
326   struct svga_host_surface_cache *cache = &svgascreen->cache;
327   struct svga_winsys_screen *sws = svgascreen->sws;
328   struct svga_host_surface_cache_entry *entry;
329   struct list_head *curr, *next;
330   unsigned bucket;
331
332   mtx_lock(&cache->mutex);
333
334   /* Loop over entries in the invalidated list */
335   curr = cache->invalidated.next;
336   next = curr->next;
337   while (curr != &cache->invalidated) {
338      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head);
339
340      assert(entry->handle);
341
342      if (sws->surface_is_flushed(sws, entry->handle)) {
343         /* remove entry from the invalidated list */
344         list_del(&entry->head);
345
346         sws->fence_reference(sws, &entry->fence, fence);
347
348         /* Add entry to the unused list */
349         list_add(&entry->head, &cache->unused);
350
351         /* Add entry to the hash table bucket */
352         bucket = svga_screen_cache_bucket(&entry->key);
353         list_add(&entry->bucket_head, &cache->bucket[bucket]);
354      }
355
356      curr = next;
357      next = curr->next;
358   }
359
360   unsigned nsurf = 0;
361   curr = cache->validated.next;
362   next = curr->next;
363   while (curr != &cache->validated) {
364      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head);
365
366      assert(entry->handle);
367      assert(svga_have_gb_objects(svga));
368
369      if (sws->surface_is_flushed(sws, entry->handle)) {
370         /* remove entry from the validated list */
371         list_del(&entry->head);
372
373         /* It is now safe to invalidate the surface content.
374          * It will be done using the current context.
375          */
376         if (SVGA_TRY(SVGA3D_InvalidateGBSurface(svga->swc, entry->handle))
377             != PIPE_OK) {
378            ASSERTED enum pipe_error ret;
379
380            /* Even though surface invalidation here is done after the command
381             * buffer is flushed, it is still possible that it will
382             * fail because there might be just enough of this command that is
383             * filling up the command buffer, so in this case we will call
384             * the winsys flush directly to flush the buffer.
385             * Note, we don't want to call svga_context_flush() here because
386             * this function itself is called inside svga_context_flush().
387             */
388            svga_retry_enter(svga);
389            svga->swc->flush(svga->swc, NULL);
390            nsurf = 0;
391            ret = SVGA3D_InvalidateGBSurface(svga->swc, entry->handle);
392            svga_retry_exit(svga);
393            assert(ret == PIPE_OK);
394         }
395
396         /* add the entry to the invalidated list */
397
398         list_add(&entry->head, &cache->invalidated);
399         nsurf++;
400      }
401
402      curr = next;
403      next = curr->next;
404   }
405
406   mtx_unlock(&cache->mutex);
407
408   /**
409    * In some rare cases (when running ARK survival), we hit the max number
410    * of surface relocations with invalidated surfaces during context flush.
411    * So if the number of invalidated surface exceeds a certain limit (1000),
412    * we'll do another winsys flush.
413    */
414   if (nsurf > SVGA_MAX_SURFACE_TO_INVALIDATE) {
415      svga->swc->flush(svga->swc, NULL);
416   }
417}
418
419
420/**
421 * Free all the surfaces in the cache.
422 * Called when destroying the svga screen object.
423 */
424void
425svga_screen_cache_cleanup(struct svga_screen *svgascreen)
426{
427   struct svga_host_surface_cache *cache = &svgascreen->cache;
428   struct svga_winsys_screen *sws = svgascreen->sws;
429   unsigned i;
430
431   for (i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) {
432      if (cache->entries[i].handle) {
433	 SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
434                  "unref sid %p (shutdown)\n", cache->entries[i].handle);
435	 sws->surface_reference(sws, &cache->entries[i].handle, NULL);
436
437         cache->total_size -= svga_surface_size(&cache->entries[i].key);
438      }
439
440      if (cache->entries[i].fence)
441         sws->fence_reference(sws, &cache->entries[i].fence, NULL);
442   }
443
444   mtx_destroy(&cache->mutex);
445}
446
447
448enum pipe_error
449svga_screen_cache_init(struct svga_screen *svgascreen)
450{
451   struct svga_host_surface_cache *cache = &svgascreen->cache;
452   unsigned i;
453
454   assert(cache->total_size == 0);
455
456   (void) mtx_init(&cache->mutex, mtx_plain);
457
458   for (i = 0; i < SVGA_HOST_SURFACE_CACHE_BUCKETS; ++i)
459      list_inithead(&cache->bucket[i]);
460
461   list_inithead(&cache->unused);
462
463   list_inithead(&cache->validated);
464
465   list_inithead(&cache->invalidated);
466
467   list_inithead(&cache->empty);
468   for (i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i)
469      list_addtail(&cache->entries[i].head, &cache->empty);
470
471   return PIPE_OK;
472}
473
474
475/**
476 * Allocate a new host-side surface.  If the surface is marked as cachable,
477 * first try re-using a surface in the cache of freed surfaces.  Otherwise,
478 * allocate a new surface.
479 * \param bind_flags  bitmask of PIPE_BIND_x flags
480 * \param usage  one of PIPE_USAGE_x values
481 * \param validated return True if the surface is a reused surface
482 */
483struct svga_winsys_surface *
484svga_screen_surface_create(struct svga_screen *svgascreen,
485                           unsigned bind_flags, enum pipe_resource_usage usage,
486                           boolean *validated,
487                           struct svga_host_surface_cache_key *key)
488{
489   struct svga_winsys_screen *sws = svgascreen->sws;
490   struct svga_winsys_surface *handle = NULL;
491   boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
492
493   SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
494            "%s sz %dx%dx%d mips %d faces %d arraySize %d cachable %d\n",
495            __FUNCTION__,
496            key->size.width,
497            key->size.height,
498            key->size.depth,
499            key->numMipLevels,
500            key->numFaces,
501            key->arraySize,
502            key->cachable);
503
504   if (cachable) {
505      /* Try to re-cycle a previously freed, cached surface */
506      if (key->format == SVGA3D_BUFFER) {
507         SVGA3dSurfaceAllFlags hint_flag;
508
509         /* For buffers, round the buffer size up to the nearest power
510          * of two to increase the probability of cache hits.  Keep
511          * texture surface dimensions unchanged.
512          */
513         uint32_t size = 1;
514         while (size < key->size.width)
515            size <<= 1;
516         key->size.width = size;
517
518         /* Determine whether the buffer is static or dynamic.
519          * This is a bit of a heuristic which can be tuned as needed.
520          */
521         if (usage == PIPE_USAGE_DEFAULT ||
522             usage == PIPE_USAGE_IMMUTABLE) {
523            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
524         }
525         else if (bind_flags & PIPE_BIND_INDEX_BUFFER) {
526            /* Index buffers don't change too often.  Mark them as static.
527             */
528            hint_flag = SVGA3D_SURFACE_HINT_STATIC;
529         }
530         else {
531            /* Since we're reusing buffers we're effectively transforming all
532             * of them into dynamic buffers.
533             *
534             * It would be nice to not cache long lived static buffers. But there
535             * is no way to detect the long lived from short lived ones yet. A
536             * good heuristic would be buffer size.
537             */
538            hint_flag = SVGA3D_SURFACE_HINT_DYNAMIC;
539         }
540
541         key->flags &= ~(SVGA3D_SURFACE_HINT_STATIC |
542                         SVGA3D_SURFACE_HINT_DYNAMIC);
543         key->flags |= hint_flag;
544      }
545
546      handle = svga_screen_cache_lookup(svgascreen, key);
547      if (handle) {
548         if (key->format == SVGA3D_BUFFER)
549            SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
550                     "reuse sid %p sz %d (buffer)\n", handle,
551                     key->size.width);
552         else
553            SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
554                     "reuse sid %p sz %dx%dx%d mips %d faces %d arraySize %d\n", handle,
555                     key->size.width,
556                     key->size.height,
557                     key->size.depth,
558                     key->numMipLevels,
559                     key->numFaces,
560                     key->arraySize);
561         *validated = TRUE;
562      }
563   }
564
565   if (!handle) {
566      /* Unable to recycle surface, allocate a new one */
567      unsigned usage = 0;
568
569      if (!key->cachable)
570         usage |= SVGA_SURFACE_USAGE_SHARED;
571      if (key->scanout)
572         usage |= SVGA_SURFACE_USAGE_SCANOUT;
573      if (key->coherent)
574         usage |= SVGA_SURFACE_USAGE_COHERENT;
575
576      handle = sws->surface_create(sws,
577                                   key->flags,
578                                   key->format,
579                                   usage,
580                                   key->size,
581                                   key->numFaces * key->arraySize,
582                                   key->numMipLevels,
583                                   key->sampleCount);
584      if (handle)
585         SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
586                  "  CREATE sid %p sz %dx%dx%d\n",
587                  handle,
588                  key->size.width,
589                  key->size.height,
590                  key->size.depth);
591
592      *validated = FALSE;
593   }
594
595   return handle;
596}
597
598
599/**
600 * Release a surface.  We don't actually free the surface- we put
601 * it into the cache of freed surfaces (if it's cachable).
602 */
603void
604svga_screen_surface_destroy(struct svga_screen *svgascreen,
605                            const struct svga_host_surface_cache_key *key,
606                            struct svga_winsys_surface **p_handle)
607{
608   struct svga_winsys_screen *sws = svgascreen->sws;
609
610   /* We only set the cachable flag for surfaces of which we are the
611    * exclusive owner.  So just hold onto our existing reference in
612    * that case.
613    */
614   if (SVGA_SURFACE_CACHE_ENABLED && key->cachable) {
615      svga_screen_cache_add(svgascreen, key, p_handle);
616   }
617   else {
618      SVGA_DBG(DEBUG_DMA,
619               "unref sid %p (uncachable)\n", *p_handle);
620      sws->surface_reference(sws, p_handle, NULL);
621   }
622}
623
624
625/**
626 * Print/dump the contents of the screen cache.  For debugging.
627 */
628void
629svga_screen_cache_dump(const struct svga_screen *svgascreen)
630{
631   const struct svga_host_surface_cache *cache = &svgascreen->cache;
632   unsigned bucket;
633   unsigned count = 0;
634
635   debug_printf("svga3d surface cache:\n");
636   for (bucket = 0; bucket < SVGA_HOST_SURFACE_CACHE_BUCKETS; bucket++) {
637      struct list_head *curr;
638      curr = cache->bucket[bucket].next;
639      while (curr && curr != &cache->bucket[bucket]) {
640         struct svga_host_surface_cache_entry *entry =
641            LIST_ENTRY(struct svga_host_surface_cache_entry,
642                       curr, bucket_head);
643         if (entry->key.format == SVGA3D_BUFFER) {
644            debug_printf("  %p: buffer %u bytes\n",
645                         entry->handle,
646                         entry->key.size.width);
647         }
648         else {
649            debug_printf("  %p: %u x %u x %u format %u\n",
650                         entry->handle,
651                         entry->key.size.width,
652                         entry->key.size.height,
653                         entry->key.size.depth,
654                         entry->key.format);
655         }
656         curr = curr->next;
657         count++;
658      }
659   }
660
661   debug_printf("%u surfaces, %u bytes\n", count, cache->total_size);
662}
663