1361fc4cbSmaya/*
2361fc4cbSmaya * Copyright © 2015 Intel Corporation
3361fc4cbSmaya *
4361fc4cbSmaya * Permission is hereby granted, free of charge, to any person obtaining a
5361fc4cbSmaya * copy of this software and associated documentation files (the "Software"),
6361fc4cbSmaya * to deal in the Software without restriction, including without limitation
7361fc4cbSmaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8361fc4cbSmaya * and/or sell copies of the Software, and to permit persons to whom the
9361fc4cbSmaya * Software is furnished to do so, subject to the following conditions:
10361fc4cbSmaya *
11361fc4cbSmaya * The above copyright notice and this permission notice (including the next
12361fc4cbSmaya * paragraph) shall be included in all copies or substantial portions of the
13361fc4cbSmaya * Software.
14361fc4cbSmaya *
15361fc4cbSmaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16361fc4cbSmaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17361fc4cbSmaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18361fc4cbSmaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19361fc4cbSmaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20361fc4cbSmaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21361fc4cbSmaya * DEALINGS IN THE SOFTWARE.
22361fc4cbSmaya */
23361fc4cbSmaya
24361fc4cbSmaya#include "tu_private.h"
25361fc4cbSmaya
26361fc4cbSmaya#include "util/debug.h"
27361fc4cbSmaya#include "util/disk_cache.h"
28361fc4cbSmaya#include "util/mesa-sha1.h"
29361fc4cbSmaya#include "util/u_atomic.h"
307ec681f3Smrg#include "vulkan/util/vk_util.h"
31361fc4cbSmaya
32361fc4cbSmayastruct cache_entry_variant_info
33361fc4cbSmaya{
34361fc4cbSmaya};
35361fc4cbSmaya
36361fc4cbSmayastruct cache_entry
37361fc4cbSmaya{
38361fc4cbSmaya   union {
39361fc4cbSmaya      unsigned char sha1[20];
40361fc4cbSmaya      uint32_t sha1_dw[5];
41361fc4cbSmaya   };
42361fc4cbSmaya   uint32_t code_sizes[MESA_SHADER_STAGES];
43361fc4cbSmaya   struct tu_shader_variant *variants[MESA_SHADER_STAGES];
44361fc4cbSmaya   char code[0];
45361fc4cbSmaya};
46361fc4cbSmaya
477ec681f3Smrgstatic void
48361fc4cbSmayatu_pipeline_cache_init(struct tu_pipeline_cache *cache,
49361fc4cbSmaya                       struct tu_device *device)
50361fc4cbSmaya{
51361fc4cbSmaya   cache->device = device;
52361fc4cbSmaya   pthread_mutex_init(&cache->mutex, NULL);
53361fc4cbSmaya
54361fc4cbSmaya   cache->modified = false;
55361fc4cbSmaya   cache->kernel_count = 0;
56361fc4cbSmaya   cache->total_size = 0;
57361fc4cbSmaya   cache->table_size = 1024;
58361fc4cbSmaya   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
59361fc4cbSmaya   cache->hash_table = malloc(byte_size);
60361fc4cbSmaya
61361fc4cbSmaya   /* We don't consider allocation failure fatal, we just start with a 0-sized
62361fc4cbSmaya    * cache. Disable caching when we want to keep shader debug info, since
63361fc4cbSmaya    * we don't get the debug info on cached shaders. */
64361fc4cbSmaya   if (cache->hash_table == NULL)
65361fc4cbSmaya      cache->table_size = 0;
66361fc4cbSmaya   else
67361fc4cbSmaya      memset(cache->hash_table, 0, byte_size);
68361fc4cbSmaya}
69361fc4cbSmaya
707ec681f3Smrgstatic void
71361fc4cbSmayatu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
72361fc4cbSmaya{
73361fc4cbSmaya   for (unsigned i = 0; i < cache->table_size; ++i)
74361fc4cbSmaya      if (cache->hash_table[i]) {
75361fc4cbSmaya         vk_free(&cache->alloc, cache->hash_table[i]);
76361fc4cbSmaya      }
77361fc4cbSmaya   pthread_mutex_destroy(&cache->mutex);
78361fc4cbSmaya   free(cache->hash_table);
79361fc4cbSmaya}
80361fc4cbSmaya
81361fc4cbSmayastatic uint32_t
82361fc4cbSmayaentry_size(struct cache_entry *entry)
83361fc4cbSmaya{
84361fc4cbSmaya   size_t ret = sizeof(*entry);
85361fc4cbSmaya   for (int i = 0; i < MESA_SHADER_STAGES; ++i)
86361fc4cbSmaya      if (entry->code_sizes[i])
87361fc4cbSmaya         ret +=
88361fc4cbSmaya            sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
89361fc4cbSmaya   return ret;
90361fc4cbSmaya}
91361fc4cbSmaya
92361fc4cbSmayastatic struct cache_entry *
93361fc4cbSmayatu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
94361fc4cbSmaya                                  const unsigned char *sha1)
95361fc4cbSmaya{
96361fc4cbSmaya   const uint32_t mask = cache->table_size - 1;
97361fc4cbSmaya   const uint32_t start = (*(uint32_t *) sha1);
98361fc4cbSmaya
99361fc4cbSmaya   if (cache->table_size == 0)
100361fc4cbSmaya      return NULL;
101361fc4cbSmaya
102361fc4cbSmaya   for (uint32_t i = 0; i < cache->table_size; i++) {
103361fc4cbSmaya      const uint32_t index = (start + i) & mask;
104361fc4cbSmaya      struct cache_entry *entry = cache->hash_table[index];
105361fc4cbSmaya
106361fc4cbSmaya      if (!entry)
107361fc4cbSmaya         return NULL;
108361fc4cbSmaya
109361fc4cbSmaya      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
110361fc4cbSmaya         return entry;
111361fc4cbSmaya      }
112361fc4cbSmaya   }
113361fc4cbSmaya
114361fc4cbSmaya   unreachable("hash table should never be full");
115361fc4cbSmaya}
116361fc4cbSmaya
117361fc4cbSmayastatic struct cache_entry *
118361fc4cbSmayatu_pipeline_cache_search(struct tu_pipeline_cache *cache,
119361fc4cbSmaya                         const unsigned char *sha1)
120361fc4cbSmaya{
121361fc4cbSmaya   struct cache_entry *entry;
122361fc4cbSmaya
123361fc4cbSmaya   pthread_mutex_lock(&cache->mutex);
124361fc4cbSmaya
125361fc4cbSmaya   entry = tu_pipeline_cache_search_unlocked(cache, sha1);
126361fc4cbSmaya
127361fc4cbSmaya   pthread_mutex_unlock(&cache->mutex);
128361fc4cbSmaya
129361fc4cbSmaya   return entry;
130361fc4cbSmaya}
131361fc4cbSmaya
132361fc4cbSmayastatic void
133361fc4cbSmayatu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
134361fc4cbSmaya                            struct cache_entry *entry)
135361fc4cbSmaya{
136361fc4cbSmaya   const uint32_t mask = cache->table_size - 1;
137361fc4cbSmaya   const uint32_t start = entry->sha1_dw[0];
138361fc4cbSmaya
139361fc4cbSmaya   /* We'll always be able to insert when we get here. */
140361fc4cbSmaya   assert(cache->kernel_count < cache->table_size / 2);
141361fc4cbSmaya
142361fc4cbSmaya   for (uint32_t i = 0; i < cache->table_size; i++) {
143361fc4cbSmaya      const uint32_t index = (start + i) & mask;
144361fc4cbSmaya      if (!cache->hash_table[index]) {
145361fc4cbSmaya         cache->hash_table[index] = entry;
146361fc4cbSmaya         break;
147361fc4cbSmaya      }
148361fc4cbSmaya   }
149361fc4cbSmaya
150361fc4cbSmaya   cache->total_size += entry_size(entry);
151361fc4cbSmaya   cache->kernel_count++;
152361fc4cbSmaya}
153361fc4cbSmaya
154361fc4cbSmayastatic VkResult
155361fc4cbSmayatu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
156361fc4cbSmaya{
157361fc4cbSmaya   const uint32_t table_size = cache->table_size * 2;
158361fc4cbSmaya   const uint32_t old_table_size = cache->table_size;
159361fc4cbSmaya   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
160361fc4cbSmaya   struct cache_entry **table;
161361fc4cbSmaya   struct cache_entry **old_table = cache->hash_table;
162361fc4cbSmaya
163361fc4cbSmaya   table = malloc(byte_size);
164361fc4cbSmaya   if (table == NULL)
1657ec681f3Smrg      return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY);
166361fc4cbSmaya
167361fc4cbSmaya   cache->hash_table = table;
168361fc4cbSmaya   cache->table_size = table_size;
169361fc4cbSmaya   cache->kernel_count = 0;
170361fc4cbSmaya   cache->total_size = 0;
171361fc4cbSmaya
172361fc4cbSmaya   memset(cache->hash_table, 0, byte_size);
173361fc4cbSmaya   for (uint32_t i = 0; i < old_table_size; i++) {
174361fc4cbSmaya      struct cache_entry *entry = old_table[i];
175361fc4cbSmaya      if (!entry)
176361fc4cbSmaya         continue;
177361fc4cbSmaya
178361fc4cbSmaya      tu_pipeline_cache_set_entry(cache, entry);
179361fc4cbSmaya   }
180361fc4cbSmaya
181361fc4cbSmaya   free(old_table);
182361fc4cbSmaya
183361fc4cbSmaya   return VK_SUCCESS;
184361fc4cbSmaya}
185361fc4cbSmaya
186361fc4cbSmayastatic void
187361fc4cbSmayatu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
188361fc4cbSmaya                            struct cache_entry *entry)
189361fc4cbSmaya{
190361fc4cbSmaya   if (cache->kernel_count == cache->table_size / 2)
191361fc4cbSmaya      tu_pipeline_cache_grow(cache);
192361fc4cbSmaya
193361fc4cbSmaya   /* Failing to grow that hash table isn't fatal, but may mean we don't
194361fc4cbSmaya    * have enough space to add this new kernel. Only add it if there's room.
195361fc4cbSmaya    */
196361fc4cbSmaya   if (cache->kernel_count < cache->table_size / 2)
197361fc4cbSmaya      tu_pipeline_cache_set_entry(cache, entry);
198361fc4cbSmaya}
199361fc4cbSmaya
2007ec681f3Smrgstatic void
201361fc4cbSmayatu_pipeline_cache_load(struct tu_pipeline_cache *cache,
202361fc4cbSmaya                       const void *data,
203361fc4cbSmaya                       size_t size)
204361fc4cbSmaya{
205361fc4cbSmaya   struct tu_device *device = cache->device;
2067ec681f3Smrg   struct vk_pipeline_cache_header header;
207361fc4cbSmaya
208361fc4cbSmaya   if (size < sizeof(header))
209361fc4cbSmaya      return;
210361fc4cbSmaya   memcpy(&header, data, sizeof(header));
211361fc4cbSmaya   if (header.header_size < sizeof(header))
212361fc4cbSmaya      return;
213361fc4cbSmaya   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
214361fc4cbSmaya      return;
2157ec681f3Smrg   if (header.vendor_id != 0x5143)
216361fc4cbSmaya      return;
2177ec681f3Smrg   if (header.device_id != device->physical_device->dev_id.chip_id)
218361fc4cbSmaya      return;
219361fc4cbSmaya   if (memcmp(header.uuid, device->physical_device->cache_uuid,
220361fc4cbSmaya              VK_UUID_SIZE) != 0)
221361fc4cbSmaya      return;
222361fc4cbSmaya
223361fc4cbSmaya   char *end = (void *) data + size;
224361fc4cbSmaya   char *p = (void *) data + header.header_size;
225361fc4cbSmaya
226361fc4cbSmaya   while (end - p >= sizeof(struct cache_entry)) {
227361fc4cbSmaya      struct cache_entry *entry = (struct cache_entry *) p;
228361fc4cbSmaya      struct cache_entry *dest_entry;
229361fc4cbSmaya      size_t size = entry_size(entry);
230361fc4cbSmaya      if (end - p < size)
231361fc4cbSmaya         break;
232361fc4cbSmaya
233361fc4cbSmaya      dest_entry =
234361fc4cbSmaya         vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
235361fc4cbSmaya      if (dest_entry) {
236361fc4cbSmaya         memcpy(dest_entry, entry, size);
237361fc4cbSmaya         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
238361fc4cbSmaya            dest_entry->variants[i] = NULL;
239361fc4cbSmaya         tu_pipeline_cache_add_entry(cache, dest_entry);
240361fc4cbSmaya      }
241361fc4cbSmaya      p += size;
242361fc4cbSmaya   }
243361fc4cbSmaya}
244361fc4cbSmaya
2457ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
246361fc4cbSmayatu_CreatePipelineCache(VkDevice _device,
247361fc4cbSmaya                       const VkPipelineCacheCreateInfo *pCreateInfo,
248361fc4cbSmaya                       const VkAllocationCallbacks *pAllocator,
249361fc4cbSmaya                       VkPipelineCache *pPipelineCache)
250361fc4cbSmaya{
251361fc4cbSmaya   TU_FROM_HANDLE(tu_device, device, _device);
252361fc4cbSmaya   struct tu_pipeline_cache *cache;
253361fc4cbSmaya
254361fc4cbSmaya   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
255361fc4cbSmaya   assert(pCreateInfo->flags == 0);
256361fc4cbSmaya
2577ec681f3Smrg   cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),
2587ec681f3Smrg                           VK_OBJECT_TYPE_PIPELINE_CACHE);
259361fc4cbSmaya   if (cache == NULL)
2607ec681f3Smrg      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
261361fc4cbSmaya
262361fc4cbSmaya   if (pAllocator)
263361fc4cbSmaya      cache->alloc = *pAllocator;
264361fc4cbSmaya   else
2657ec681f3Smrg      cache->alloc = device->vk.alloc;
266361fc4cbSmaya
267361fc4cbSmaya   tu_pipeline_cache_init(cache, device);
268361fc4cbSmaya
269361fc4cbSmaya   if (pCreateInfo->initialDataSize > 0) {
270361fc4cbSmaya      tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
271361fc4cbSmaya                             pCreateInfo->initialDataSize);
272361fc4cbSmaya   }
273361fc4cbSmaya
274361fc4cbSmaya   *pPipelineCache = tu_pipeline_cache_to_handle(cache);
275361fc4cbSmaya
276361fc4cbSmaya   return VK_SUCCESS;
277361fc4cbSmaya}
278361fc4cbSmaya
2797ec681f3SmrgVKAPI_ATTR void VKAPI_CALL
280361fc4cbSmayatu_DestroyPipelineCache(VkDevice _device,
281361fc4cbSmaya                        VkPipelineCache _cache,
282361fc4cbSmaya                        const VkAllocationCallbacks *pAllocator)
283361fc4cbSmaya{
284361fc4cbSmaya   TU_FROM_HANDLE(tu_device, device, _device);
285361fc4cbSmaya   TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
286361fc4cbSmaya
287361fc4cbSmaya   if (!cache)
288361fc4cbSmaya      return;
289361fc4cbSmaya   tu_pipeline_cache_finish(cache);
290361fc4cbSmaya
2917ec681f3Smrg   vk_object_free(&device->vk, pAllocator, cache);
292361fc4cbSmaya}
293361fc4cbSmaya
2947ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
295361fc4cbSmayatu_GetPipelineCacheData(VkDevice _device,
296361fc4cbSmaya                        VkPipelineCache _cache,
297361fc4cbSmaya                        size_t *pDataSize,
298361fc4cbSmaya                        void *pData)
299361fc4cbSmaya{
300361fc4cbSmaya   TU_FROM_HANDLE(tu_device, device, _device);
301361fc4cbSmaya   TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
3027ec681f3Smrg   struct vk_pipeline_cache_header *header;
303361fc4cbSmaya   VkResult result = VK_SUCCESS;
304361fc4cbSmaya
305361fc4cbSmaya   pthread_mutex_lock(&cache->mutex);
306361fc4cbSmaya
307361fc4cbSmaya   const size_t size = sizeof(*header) + cache->total_size;
308361fc4cbSmaya   if (pData == NULL) {
309361fc4cbSmaya      pthread_mutex_unlock(&cache->mutex);
310361fc4cbSmaya      *pDataSize = size;
311361fc4cbSmaya      return VK_SUCCESS;
312361fc4cbSmaya   }
313361fc4cbSmaya   if (*pDataSize < sizeof(*header)) {
314361fc4cbSmaya      pthread_mutex_unlock(&cache->mutex);
315361fc4cbSmaya      *pDataSize = 0;
316361fc4cbSmaya      return VK_INCOMPLETE;
317361fc4cbSmaya   }
318361fc4cbSmaya   void *p = pData, *end = pData + *pDataSize;
319361fc4cbSmaya   header = p;
320361fc4cbSmaya   header->header_size = sizeof(*header);
321361fc4cbSmaya   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
3227ec681f3Smrg   header->vendor_id = 0x5143;
3237ec681f3Smrg   header->device_id = device->physical_device->dev_id.chip_id;
324361fc4cbSmaya   memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
325361fc4cbSmaya   p += header->header_size;
326361fc4cbSmaya
327361fc4cbSmaya   struct cache_entry *entry;
328361fc4cbSmaya   for (uint32_t i = 0; i < cache->table_size; i++) {
329361fc4cbSmaya      if (!cache->hash_table[i])
330361fc4cbSmaya         continue;
331361fc4cbSmaya      entry = cache->hash_table[i];
332361fc4cbSmaya      const uint32_t size = entry_size(entry);
333361fc4cbSmaya      if (end < p + size) {
334361fc4cbSmaya         result = VK_INCOMPLETE;
335361fc4cbSmaya         break;
336361fc4cbSmaya      }
337361fc4cbSmaya
338361fc4cbSmaya      memcpy(p, entry, size);
339361fc4cbSmaya      for (int j = 0; j < MESA_SHADER_STAGES; ++j)
340361fc4cbSmaya         ((struct cache_entry *) p)->variants[j] = NULL;
341361fc4cbSmaya      p += size;
342361fc4cbSmaya   }
343361fc4cbSmaya   *pDataSize = p - pData;
344361fc4cbSmaya
345361fc4cbSmaya   pthread_mutex_unlock(&cache->mutex);
346361fc4cbSmaya   return result;
347361fc4cbSmaya}
348361fc4cbSmaya
349361fc4cbSmayastatic void
350361fc4cbSmayatu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
351361fc4cbSmaya                        struct tu_pipeline_cache *src)
352361fc4cbSmaya{
353361fc4cbSmaya   for (uint32_t i = 0; i < src->table_size; i++) {
354361fc4cbSmaya      struct cache_entry *entry = src->hash_table[i];
355361fc4cbSmaya      if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
356361fc4cbSmaya         continue;
357361fc4cbSmaya
358361fc4cbSmaya      tu_pipeline_cache_add_entry(dst, entry);
359361fc4cbSmaya
360361fc4cbSmaya      src->hash_table[i] = NULL;
361361fc4cbSmaya   }
362361fc4cbSmaya}
363361fc4cbSmaya
3647ec681f3SmrgVKAPI_ATTR VkResult VKAPI_CALL
365361fc4cbSmayatu_MergePipelineCaches(VkDevice _device,
366361fc4cbSmaya                       VkPipelineCache destCache,
367361fc4cbSmaya                       uint32_t srcCacheCount,
368361fc4cbSmaya                       const VkPipelineCache *pSrcCaches)
369361fc4cbSmaya{
370361fc4cbSmaya   TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
371361fc4cbSmaya
372361fc4cbSmaya   for (uint32_t i = 0; i < srcCacheCount; i++) {
373361fc4cbSmaya      TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
374361fc4cbSmaya
375361fc4cbSmaya      tu_pipeline_cache_merge(dst, src);
376361fc4cbSmaya   }
377361fc4cbSmaya
378361fc4cbSmaya   return VK_SUCCESS;
379361fc4cbSmaya}
380