1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "tu_private.h"
25
26#include "util/debug.h"
27#include "util/disk_cache.h"
28#include "util/mesa-sha1.h"
29#include "util/u_atomic.h"
30
31struct cache_entry_variant_info
32{
33};
34
35struct cache_entry
36{
37   union {
38      unsigned char sha1[20];
39      uint32_t sha1_dw[5];
40   };
41   uint32_t code_sizes[MESA_SHADER_STAGES];
42   struct tu_shader_variant *variants[MESA_SHADER_STAGES];
43   char code[0];
44};
45
46void
47tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
48                       struct tu_device *device)
49{
50   cache->device = device;
51   pthread_mutex_init(&cache->mutex, NULL);
52
53   cache->modified = false;
54   cache->kernel_count = 0;
55   cache->total_size = 0;
56   cache->table_size = 1024;
57   const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
58   cache->hash_table = malloc(byte_size);
59
60   /* We don't consider allocation failure fatal, we just start with a 0-sized
61    * cache. Disable caching when we want to keep shader debug info, since
62    * we don't get the debug info on cached shaders. */
63   if (cache->hash_table == NULL)
64      cache->table_size = 0;
65   else
66      memset(cache->hash_table, 0, byte_size);
67}
68
69void
70tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
71{
72   for (unsigned i = 0; i < cache->table_size; ++i)
73      if (cache->hash_table[i]) {
74         vk_free(&cache->alloc, cache->hash_table[i]);
75      }
76   pthread_mutex_destroy(&cache->mutex);
77   free(cache->hash_table);
78}
79
80static uint32_t
81entry_size(struct cache_entry *entry)
82{
83   size_t ret = sizeof(*entry);
84   for (int i = 0; i < MESA_SHADER_STAGES; ++i)
85      if (entry->code_sizes[i])
86         ret +=
87            sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
88   return ret;
89}
90
91void
92tu_hash_shaders(unsigned char *hash,
93                const VkPipelineShaderStageCreateInfo **stages,
94                const struct tu_pipeline_layout *layout,
95                const struct tu_pipeline_key *key,
96                uint32_t flags)
97{
98   struct mesa_sha1 ctx;
99
100   _mesa_sha1_init(&ctx);
101   if (key)
102      _mesa_sha1_update(&ctx, key, sizeof(*key));
103   if (layout)
104      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
105
106   for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
107      if (stages[i]) {
108         TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module);
109         const VkSpecializationInfo *spec_info =
110            stages[i]->pSpecializationInfo;
111
112         _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
113         _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
114         if (spec_info) {
115            _mesa_sha1_update(
116               &ctx, spec_info->pMapEntries,
117               spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
118            _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
119         }
120      }
121   }
122   _mesa_sha1_update(&ctx, &flags, 4);
123   _mesa_sha1_final(&ctx, hash);
124}
125
126static struct cache_entry *
127tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
128                                  const unsigned char *sha1)
129{
130   const uint32_t mask = cache->table_size - 1;
131   const uint32_t start = (*(uint32_t *) sha1);
132
133   if (cache->table_size == 0)
134      return NULL;
135
136   for (uint32_t i = 0; i < cache->table_size; i++) {
137      const uint32_t index = (start + i) & mask;
138      struct cache_entry *entry = cache->hash_table[index];
139
140      if (!entry)
141         return NULL;
142
143      if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
144         return entry;
145      }
146   }
147
148   unreachable("hash table should never be full");
149}
150
151static struct cache_entry *
152tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
153                         const unsigned char *sha1)
154{
155   struct cache_entry *entry;
156
157   pthread_mutex_lock(&cache->mutex);
158
159   entry = tu_pipeline_cache_search_unlocked(cache, sha1);
160
161   pthread_mutex_unlock(&cache->mutex);
162
163   return entry;
164}
165
166static void
167tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
168                            struct cache_entry *entry)
169{
170   const uint32_t mask = cache->table_size - 1;
171   const uint32_t start = entry->sha1_dw[0];
172
173   /* We'll always be able to insert when we get here. */
174   assert(cache->kernel_count < cache->table_size / 2);
175
176   for (uint32_t i = 0; i < cache->table_size; i++) {
177      const uint32_t index = (start + i) & mask;
178      if (!cache->hash_table[index]) {
179         cache->hash_table[index] = entry;
180         break;
181      }
182   }
183
184   cache->total_size += entry_size(entry);
185   cache->kernel_count++;
186}
187
188static VkResult
189tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
190{
191   const uint32_t table_size = cache->table_size * 2;
192   const uint32_t old_table_size = cache->table_size;
193   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
194   struct cache_entry **table;
195   struct cache_entry **old_table = cache->hash_table;
196
197   table = malloc(byte_size);
198   if (table == NULL)
199      return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
200
201   cache->hash_table = table;
202   cache->table_size = table_size;
203   cache->kernel_count = 0;
204   cache->total_size = 0;
205
206   memset(cache->hash_table, 0, byte_size);
207   for (uint32_t i = 0; i < old_table_size; i++) {
208      struct cache_entry *entry = old_table[i];
209      if (!entry)
210         continue;
211
212      tu_pipeline_cache_set_entry(cache, entry);
213   }
214
215   free(old_table);
216
217   return VK_SUCCESS;
218}
219
220static void
221tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
222                            struct cache_entry *entry)
223{
224   if (cache->kernel_count == cache->table_size / 2)
225      tu_pipeline_cache_grow(cache);
226
227   /* Failing to grow that hash table isn't fatal, but may mean we don't
228    * have enough space to add this new kernel. Only add it if there's room.
229    */
230   if (cache->kernel_count < cache->table_size / 2)
231      tu_pipeline_cache_set_entry(cache, entry);
232}
233
234struct cache_header
235{
236   uint32_t header_size;
237   uint32_t header_version;
238   uint32_t vendor_id;
239   uint32_t device_id;
240   uint8_t uuid[VK_UUID_SIZE];
241};
242
243void
244tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
245                       const void *data,
246                       size_t size)
247{
248   struct tu_device *device = cache->device;
249   struct cache_header header;
250
251   if (size < sizeof(header))
252      return;
253   memcpy(&header, data, sizeof(header));
254   if (header.header_size < sizeof(header))
255      return;
256   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
257      return;
258   if (header.vendor_id != 0 /* TODO */)
259      return;
260   if (header.device_id != 0 /* TODO */)
261      return;
262   if (memcmp(header.uuid, device->physical_device->cache_uuid,
263              VK_UUID_SIZE) != 0)
264      return;
265
266   char *end = (void *) data + size;
267   char *p = (void *) data + header.header_size;
268
269   while (end - p >= sizeof(struct cache_entry)) {
270      struct cache_entry *entry = (struct cache_entry *) p;
271      struct cache_entry *dest_entry;
272      size_t size = entry_size(entry);
273      if (end - p < size)
274         break;
275
276      dest_entry =
277         vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
278      if (dest_entry) {
279         memcpy(dest_entry, entry, size);
280         for (int i = 0; i < MESA_SHADER_STAGES; ++i)
281            dest_entry->variants[i] = NULL;
282         tu_pipeline_cache_add_entry(cache, dest_entry);
283      }
284      p += size;
285   }
286}
287
288VkResult
289tu_CreatePipelineCache(VkDevice _device,
290                       const VkPipelineCacheCreateInfo *pCreateInfo,
291                       const VkAllocationCallbacks *pAllocator,
292                       VkPipelineCache *pPipelineCache)
293{
294   TU_FROM_HANDLE(tu_device, device, _device);
295   struct tu_pipeline_cache *cache;
296
297   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
298   assert(pCreateInfo->flags == 0);
299
300   cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8,
301                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
302   if (cache == NULL)
303      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
304
305   if (pAllocator)
306      cache->alloc = *pAllocator;
307   else
308      cache->alloc = device->alloc;
309
310   tu_pipeline_cache_init(cache, device);
311
312   if (pCreateInfo->initialDataSize > 0) {
313      tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
314                             pCreateInfo->initialDataSize);
315   }
316
317   *pPipelineCache = tu_pipeline_cache_to_handle(cache);
318
319   return VK_SUCCESS;
320}
321
322void
323tu_DestroyPipelineCache(VkDevice _device,
324                        VkPipelineCache _cache,
325                        const VkAllocationCallbacks *pAllocator)
326{
327   TU_FROM_HANDLE(tu_device, device, _device);
328   TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
329
330   if (!cache)
331      return;
332   tu_pipeline_cache_finish(cache);
333
334   vk_free2(&device->alloc, pAllocator, cache);
335}
336
337VkResult
338tu_GetPipelineCacheData(VkDevice _device,
339                        VkPipelineCache _cache,
340                        size_t *pDataSize,
341                        void *pData)
342{
343   TU_FROM_HANDLE(tu_device, device, _device);
344   TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
345   struct cache_header *header;
346   VkResult result = VK_SUCCESS;
347
348   pthread_mutex_lock(&cache->mutex);
349
350   const size_t size = sizeof(*header) + cache->total_size;
351   if (pData == NULL) {
352      pthread_mutex_unlock(&cache->mutex);
353      *pDataSize = size;
354      return VK_SUCCESS;
355   }
356   if (*pDataSize < sizeof(*header)) {
357      pthread_mutex_unlock(&cache->mutex);
358      *pDataSize = 0;
359      return VK_INCOMPLETE;
360   }
361   void *p = pData, *end = pData + *pDataSize;
362   header = p;
363   header->header_size = sizeof(*header);
364   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
365   header->vendor_id = 0 /* TODO */;
366   header->device_id = 0 /* TODO */;
367   memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
368   p += header->header_size;
369
370   struct cache_entry *entry;
371   for (uint32_t i = 0; i < cache->table_size; i++) {
372      if (!cache->hash_table[i])
373         continue;
374      entry = cache->hash_table[i];
375      const uint32_t size = entry_size(entry);
376      if (end < p + size) {
377         result = VK_INCOMPLETE;
378         break;
379      }
380
381      memcpy(p, entry, size);
382      for (int j = 0; j < MESA_SHADER_STAGES; ++j)
383         ((struct cache_entry *) p)->variants[j] = NULL;
384      p += size;
385   }
386   *pDataSize = p - pData;
387
388   pthread_mutex_unlock(&cache->mutex);
389   return result;
390}
391
392static void
393tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
394                        struct tu_pipeline_cache *src)
395{
396   for (uint32_t i = 0; i < src->table_size; i++) {
397      struct cache_entry *entry = src->hash_table[i];
398      if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
399         continue;
400
401      tu_pipeline_cache_add_entry(dst, entry);
402
403      src->hash_table[i] = NULL;
404   }
405}
406
407VkResult
408tu_MergePipelineCaches(VkDevice _device,
409                       VkPipelineCache destCache,
410                       uint32_t srcCacheCount,
411                       const VkPipelineCache *pSrcCaches)
412{
413   TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
414
415   for (uint32_t i = 0; i < srcCacheCount; i++) {
416      TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
417
418      tu_pipeline_cache_merge(dst, src);
419   }
420
421   return VK_SUCCESS;
422}
423