1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "tu_private.h" 25b8e80941Smrg 26b8e80941Smrg#include "util/debug.h" 27b8e80941Smrg#include "util/disk_cache.h" 28b8e80941Smrg#include "util/mesa-sha1.h" 29b8e80941Smrg#include "util/u_atomic.h" 30b8e80941Smrg 31b8e80941Smrgstruct cache_entry_variant_info 32b8e80941Smrg{ 33b8e80941Smrg}; 34b8e80941Smrg 35b8e80941Smrgstruct cache_entry 36b8e80941Smrg{ 37b8e80941Smrg union { 38b8e80941Smrg unsigned char sha1[20]; 39b8e80941Smrg uint32_t sha1_dw[5]; 40b8e80941Smrg }; 41b8e80941Smrg uint32_t code_sizes[MESA_SHADER_STAGES]; 42b8e80941Smrg struct tu_shader_variant *variants[MESA_SHADER_STAGES]; 43b8e80941Smrg char code[0]; 44b8e80941Smrg}; 45b8e80941Smrg 46b8e80941Smrgvoid 47b8e80941Smrgtu_pipeline_cache_init(struct tu_pipeline_cache *cache, 48b8e80941Smrg struct tu_device *device) 49b8e80941Smrg{ 50b8e80941Smrg cache->device = device; 51b8e80941Smrg pthread_mutex_init(&cache->mutex, NULL); 52b8e80941Smrg 53b8e80941Smrg cache->modified = false; 54b8e80941Smrg cache->kernel_count = 0; 55b8e80941Smrg cache->total_size = 0; 56b8e80941Smrg cache->table_size = 1024; 57b8e80941Smrg const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); 58b8e80941Smrg cache->hash_table = malloc(byte_size); 59b8e80941Smrg 60b8e80941Smrg /* We don't consider allocation failure fatal, we just start with a 0-sized 61b8e80941Smrg * cache. Disable caching when we want to keep shader debug info, since 62b8e80941Smrg * we don't get the debug info on cached shaders. */ 63b8e80941Smrg if (cache->hash_table == NULL) 64b8e80941Smrg cache->table_size = 0; 65b8e80941Smrg else 66b8e80941Smrg memset(cache->hash_table, 0, byte_size); 67b8e80941Smrg} 68b8e80941Smrg 69b8e80941Smrgvoid 70b8e80941Smrgtu_pipeline_cache_finish(struct tu_pipeline_cache *cache) 71b8e80941Smrg{ 72b8e80941Smrg for (unsigned i = 0; i < cache->table_size; ++i) 73b8e80941Smrg if (cache->hash_table[i]) { 74b8e80941Smrg vk_free(&cache->alloc, cache->hash_table[i]); 75b8e80941Smrg } 76b8e80941Smrg pthread_mutex_destroy(&cache->mutex); 77b8e80941Smrg free(cache->hash_table); 78b8e80941Smrg} 79b8e80941Smrg 80b8e80941Smrgstatic uint32_t 81b8e80941Smrgentry_size(struct cache_entry *entry) 82b8e80941Smrg{ 83b8e80941Smrg size_t ret = sizeof(*entry); 84b8e80941Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) 85b8e80941Smrg if (entry->code_sizes[i]) 86b8e80941Smrg ret += 87b8e80941Smrg sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; 88b8e80941Smrg return ret; 89b8e80941Smrg} 90b8e80941Smrg 91b8e80941Smrgvoid 92b8e80941Smrgtu_hash_shaders(unsigned char *hash, 93b8e80941Smrg const VkPipelineShaderStageCreateInfo **stages, 94b8e80941Smrg const struct tu_pipeline_layout *layout, 95b8e80941Smrg const struct tu_pipeline_key *key, 96b8e80941Smrg uint32_t flags) 97b8e80941Smrg{ 98b8e80941Smrg struct mesa_sha1 ctx; 99b8e80941Smrg 100b8e80941Smrg _mesa_sha1_init(&ctx); 101b8e80941Smrg if (key) 102b8e80941Smrg _mesa_sha1_update(&ctx, key, sizeof(*key)); 103b8e80941Smrg if (layout) 104b8e80941Smrg _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 105b8e80941Smrg 106b8e80941Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 107b8e80941Smrg if (stages[i]) { 108b8e80941Smrg TU_FROM_HANDLE(tu_shader_module, module, stages[i]->module); 109b8e80941Smrg const VkSpecializationInfo *spec_info = 110b8e80941Smrg stages[i]->pSpecializationInfo; 111b8e80941Smrg 112b8e80941Smrg _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); 113b8e80941Smrg _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName)); 114b8e80941Smrg if (spec_info) { 115b8e80941Smrg _mesa_sha1_update( 116b8e80941Smrg &ctx, spec_info->pMapEntries, 117b8e80941Smrg spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); 118b8e80941Smrg _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); 119b8e80941Smrg } 120b8e80941Smrg } 121b8e80941Smrg } 122b8e80941Smrg _mesa_sha1_update(&ctx, &flags, 4); 123b8e80941Smrg _mesa_sha1_final(&ctx, hash); 124b8e80941Smrg} 125b8e80941Smrg 126b8e80941Smrgstatic struct cache_entry * 127b8e80941Smrgtu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, 128b8e80941Smrg const unsigned char *sha1) 129b8e80941Smrg{ 130b8e80941Smrg const uint32_t mask = cache->table_size - 1; 131b8e80941Smrg const uint32_t start = (*(uint32_t *) sha1); 132b8e80941Smrg 133b8e80941Smrg if (cache->table_size == 0) 134b8e80941Smrg return NULL; 135b8e80941Smrg 136b8e80941Smrg for (uint32_t i = 0; i < cache->table_size; i++) { 137b8e80941Smrg const uint32_t index = (start + i) & mask; 138b8e80941Smrg struct cache_entry *entry = cache->hash_table[index]; 139b8e80941Smrg 140b8e80941Smrg if (!entry) 141b8e80941Smrg return NULL; 142b8e80941Smrg 143b8e80941Smrg if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { 144b8e80941Smrg return entry; 145b8e80941Smrg } 146b8e80941Smrg } 147b8e80941Smrg 148b8e80941Smrg unreachable("hash table should never be full"); 149b8e80941Smrg} 150b8e80941Smrg 151b8e80941Smrgstatic struct cache_entry * 152b8e80941Smrgtu_pipeline_cache_search(struct tu_pipeline_cache *cache, 153b8e80941Smrg const unsigned char *sha1) 154b8e80941Smrg{ 155b8e80941Smrg struct cache_entry *entry; 156b8e80941Smrg 157b8e80941Smrg pthread_mutex_lock(&cache->mutex); 158b8e80941Smrg 159b8e80941Smrg entry = tu_pipeline_cache_search_unlocked(cache, sha1); 160b8e80941Smrg 161b8e80941Smrg pthread_mutex_unlock(&cache->mutex); 162b8e80941Smrg 163b8e80941Smrg return entry; 164b8e80941Smrg} 165b8e80941Smrg 166b8e80941Smrgstatic void 167b8e80941Smrgtu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache, 168b8e80941Smrg struct cache_entry *entry) 169b8e80941Smrg{ 170b8e80941Smrg const uint32_t mask = cache->table_size - 1; 171b8e80941Smrg const uint32_t start = entry->sha1_dw[0]; 172b8e80941Smrg 173b8e80941Smrg /* We'll always be able to insert when we get here. */ 174b8e80941Smrg assert(cache->kernel_count < cache->table_size / 2); 175b8e80941Smrg 176b8e80941Smrg for (uint32_t i = 0; i < cache->table_size; i++) { 177b8e80941Smrg const uint32_t index = (start + i) & mask; 178b8e80941Smrg if (!cache->hash_table[index]) { 179b8e80941Smrg cache->hash_table[index] = entry; 180b8e80941Smrg break; 181b8e80941Smrg } 182b8e80941Smrg } 183b8e80941Smrg 184b8e80941Smrg cache->total_size += entry_size(entry); 185b8e80941Smrg cache->kernel_count++; 186b8e80941Smrg} 187b8e80941Smrg 188b8e80941Smrgstatic VkResult 189b8e80941Smrgtu_pipeline_cache_grow(struct tu_pipeline_cache *cache) 190b8e80941Smrg{ 191b8e80941Smrg const uint32_t table_size = cache->table_size * 2; 192b8e80941Smrg const uint32_t old_table_size = cache->table_size; 193b8e80941Smrg const size_t byte_size = table_size * sizeof(cache->hash_table[0]); 194b8e80941Smrg struct cache_entry **table; 195b8e80941Smrg struct cache_entry **old_table = cache->hash_table; 196b8e80941Smrg 197b8e80941Smrg table = malloc(byte_size); 198b8e80941Smrg if (table == NULL) 199b8e80941Smrg return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 200b8e80941Smrg 201b8e80941Smrg cache->hash_table = table; 202b8e80941Smrg cache->table_size = table_size; 203b8e80941Smrg cache->kernel_count = 0; 204b8e80941Smrg cache->total_size = 0; 205b8e80941Smrg 206b8e80941Smrg memset(cache->hash_table, 0, byte_size); 207b8e80941Smrg for (uint32_t i = 0; i < old_table_size; i++) { 208b8e80941Smrg struct cache_entry *entry = old_table[i]; 209b8e80941Smrg if (!entry) 210b8e80941Smrg continue; 211b8e80941Smrg 212b8e80941Smrg tu_pipeline_cache_set_entry(cache, entry); 213b8e80941Smrg } 214b8e80941Smrg 215b8e80941Smrg free(old_table); 216b8e80941Smrg 217b8e80941Smrg return VK_SUCCESS; 218b8e80941Smrg} 219b8e80941Smrg 220b8e80941Smrgstatic void 221b8e80941Smrgtu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache, 222b8e80941Smrg struct cache_entry *entry) 223b8e80941Smrg{ 224b8e80941Smrg if (cache->kernel_count == cache->table_size / 2) 225b8e80941Smrg tu_pipeline_cache_grow(cache); 226b8e80941Smrg 227b8e80941Smrg /* Failing to grow that hash table isn't fatal, but may mean we don't 228b8e80941Smrg * have enough space to add this new kernel. Only add it if there's room. 229b8e80941Smrg */ 230b8e80941Smrg if (cache->kernel_count < cache->table_size / 2) 231b8e80941Smrg tu_pipeline_cache_set_entry(cache, entry); 232b8e80941Smrg} 233b8e80941Smrg 234b8e80941Smrgstruct cache_header 235b8e80941Smrg{ 236b8e80941Smrg uint32_t header_size; 237b8e80941Smrg uint32_t header_version; 238b8e80941Smrg uint32_t vendor_id; 239b8e80941Smrg uint32_t device_id; 240b8e80941Smrg uint8_t uuid[VK_UUID_SIZE]; 241b8e80941Smrg}; 242b8e80941Smrg 243b8e80941Smrgvoid 244b8e80941Smrgtu_pipeline_cache_load(struct tu_pipeline_cache *cache, 245b8e80941Smrg const void *data, 246b8e80941Smrg size_t size) 247b8e80941Smrg{ 248b8e80941Smrg struct tu_device *device = cache->device; 249b8e80941Smrg struct cache_header header; 250b8e80941Smrg 251b8e80941Smrg if (size < sizeof(header)) 252b8e80941Smrg return; 253b8e80941Smrg memcpy(&header, data, sizeof(header)); 254b8e80941Smrg if (header.header_size < sizeof(header)) 255b8e80941Smrg return; 256b8e80941Smrg if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 257b8e80941Smrg return; 258b8e80941Smrg if (header.vendor_id != 0 /* TODO */) 259b8e80941Smrg return; 260b8e80941Smrg if (header.device_id != 0 /* TODO */) 261b8e80941Smrg return; 262b8e80941Smrg if (memcmp(header.uuid, device->physical_device->cache_uuid, 263b8e80941Smrg VK_UUID_SIZE) != 0) 264b8e80941Smrg return; 265b8e80941Smrg 266b8e80941Smrg char *end = (void *) data + size; 267b8e80941Smrg char *p = (void *) data + header.header_size; 268b8e80941Smrg 269b8e80941Smrg while (end - p >= sizeof(struct cache_entry)) { 270b8e80941Smrg struct cache_entry *entry = (struct cache_entry *) p; 271b8e80941Smrg struct cache_entry *dest_entry; 272b8e80941Smrg size_t size = entry_size(entry); 273b8e80941Smrg if (end - p < size) 274b8e80941Smrg break; 275b8e80941Smrg 276b8e80941Smrg dest_entry = 277b8e80941Smrg vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 278b8e80941Smrg if (dest_entry) { 279b8e80941Smrg memcpy(dest_entry, entry, size); 280b8e80941Smrg for (int i = 0; i < MESA_SHADER_STAGES; ++i) 281b8e80941Smrg dest_entry->variants[i] = NULL; 282b8e80941Smrg tu_pipeline_cache_add_entry(cache, dest_entry); 283b8e80941Smrg } 284b8e80941Smrg p += size; 285b8e80941Smrg } 286b8e80941Smrg} 287b8e80941Smrg 288b8e80941SmrgVkResult 289b8e80941Smrgtu_CreatePipelineCache(VkDevice _device, 290b8e80941Smrg const VkPipelineCacheCreateInfo *pCreateInfo, 291b8e80941Smrg const VkAllocationCallbacks *pAllocator, 292b8e80941Smrg VkPipelineCache *pPipelineCache) 293b8e80941Smrg{ 294b8e80941Smrg TU_FROM_HANDLE(tu_device, device, _device); 295b8e80941Smrg struct tu_pipeline_cache *cache; 296b8e80941Smrg 297b8e80941Smrg assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 298b8e80941Smrg assert(pCreateInfo->flags == 0); 299b8e80941Smrg 300b8e80941Smrg cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8, 301b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 302b8e80941Smrg if (cache == NULL) 303b8e80941Smrg return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 304b8e80941Smrg 305b8e80941Smrg if (pAllocator) 306b8e80941Smrg cache->alloc = *pAllocator; 307b8e80941Smrg else 308b8e80941Smrg cache->alloc = device->alloc; 309b8e80941Smrg 310b8e80941Smrg tu_pipeline_cache_init(cache, device); 311b8e80941Smrg 312b8e80941Smrg if (pCreateInfo->initialDataSize > 0) { 313b8e80941Smrg tu_pipeline_cache_load(cache, pCreateInfo->pInitialData, 314b8e80941Smrg pCreateInfo->initialDataSize); 315b8e80941Smrg } 316b8e80941Smrg 317b8e80941Smrg *pPipelineCache = tu_pipeline_cache_to_handle(cache); 318b8e80941Smrg 319b8e80941Smrg return VK_SUCCESS; 320b8e80941Smrg} 321b8e80941Smrg 322b8e80941Smrgvoid 323b8e80941Smrgtu_DestroyPipelineCache(VkDevice _device, 324b8e80941Smrg VkPipelineCache _cache, 325b8e80941Smrg const VkAllocationCallbacks *pAllocator) 326b8e80941Smrg{ 327b8e80941Smrg TU_FROM_HANDLE(tu_device, device, _device); 328b8e80941Smrg TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 329b8e80941Smrg 330b8e80941Smrg if (!cache) 331b8e80941Smrg return; 332b8e80941Smrg tu_pipeline_cache_finish(cache); 333b8e80941Smrg 334b8e80941Smrg vk_free2(&device->alloc, pAllocator, cache); 335b8e80941Smrg} 336b8e80941Smrg 337b8e80941SmrgVkResult 338b8e80941Smrgtu_GetPipelineCacheData(VkDevice _device, 339b8e80941Smrg VkPipelineCache _cache, 340b8e80941Smrg size_t *pDataSize, 341b8e80941Smrg void *pData) 342b8e80941Smrg{ 343b8e80941Smrg TU_FROM_HANDLE(tu_device, device, _device); 344b8e80941Smrg TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 345b8e80941Smrg struct cache_header *header; 346b8e80941Smrg VkResult result = VK_SUCCESS; 347b8e80941Smrg 348b8e80941Smrg pthread_mutex_lock(&cache->mutex); 349b8e80941Smrg 350b8e80941Smrg const size_t size = sizeof(*header) + cache->total_size; 351b8e80941Smrg if (pData == NULL) { 352b8e80941Smrg pthread_mutex_unlock(&cache->mutex); 353b8e80941Smrg *pDataSize = size; 354b8e80941Smrg return VK_SUCCESS; 355b8e80941Smrg } 356b8e80941Smrg if (*pDataSize < sizeof(*header)) { 357b8e80941Smrg pthread_mutex_unlock(&cache->mutex); 358b8e80941Smrg *pDataSize = 0; 359b8e80941Smrg return VK_INCOMPLETE; 360b8e80941Smrg } 361b8e80941Smrg void *p = pData, *end = pData + *pDataSize; 362b8e80941Smrg header = p; 363b8e80941Smrg header->header_size = sizeof(*header); 364b8e80941Smrg header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 365b8e80941Smrg header->vendor_id = 0 /* TODO */; 366b8e80941Smrg header->device_id = 0 /* TODO */; 367b8e80941Smrg memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); 368b8e80941Smrg p += header->header_size; 369b8e80941Smrg 370b8e80941Smrg struct cache_entry *entry; 371b8e80941Smrg for (uint32_t i = 0; i < cache->table_size; i++) { 372b8e80941Smrg if (!cache->hash_table[i]) 373b8e80941Smrg continue; 374b8e80941Smrg entry = cache->hash_table[i]; 375b8e80941Smrg const uint32_t size = entry_size(entry); 376b8e80941Smrg if (end < p + size) { 377b8e80941Smrg result = VK_INCOMPLETE; 378b8e80941Smrg break; 379b8e80941Smrg } 380b8e80941Smrg 381b8e80941Smrg memcpy(p, entry, size); 382b8e80941Smrg for (int j = 0; j < MESA_SHADER_STAGES; ++j) 383b8e80941Smrg ((struct cache_entry *) p)->variants[j] = NULL; 384b8e80941Smrg p += size; 385b8e80941Smrg } 386b8e80941Smrg *pDataSize = p - pData; 387b8e80941Smrg 388b8e80941Smrg pthread_mutex_unlock(&cache->mutex); 389b8e80941Smrg return result; 390b8e80941Smrg} 391b8e80941Smrg 392b8e80941Smrgstatic void 393b8e80941Smrgtu_pipeline_cache_merge(struct tu_pipeline_cache *dst, 394b8e80941Smrg struct tu_pipeline_cache *src) 395b8e80941Smrg{ 396b8e80941Smrg for (uint32_t i = 0; i < src->table_size; i++) { 397b8e80941Smrg struct cache_entry *entry = src->hash_table[i]; 398b8e80941Smrg if (!entry || tu_pipeline_cache_search(dst, entry->sha1)) 399b8e80941Smrg continue; 400b8e80941Smrg 401b8e80941Smrg tu_pipeline_cache_add_entry(dst, entry); 402b8e80941Smrg 403b8e80941Smrg src->hash_table[i] = NULL; 404b8e80941Smrg } 405b8e80941Smrg} 406b8e80941Smrg 407b8e80941SmrgVkResult 408b8e80941Smrgtu_MergePipelineCaches(VkDevice _device, 409b8e80941Smrg VkPipelineCache destCache, 410b8e80941Smrg uint32_t srcCacheCount, 411b8e80941Smrg const VkPipelineCache *pSrcCaches) 412b8e80941Smrg{ 413b8e80941Smrg TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache); 414b8e80941Smrg 415b8e80941Smrg for (uint32_t i = 0; i < srcCacheCount; i++) { 416b8e80941Smrg TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]); 417b8e80941Smrg 418b8e80941Smrg tu_pipeline_cache_merge(dst, src); 419b8e80941Smrg } 420b8e80941Smrg 421b8e80941Smrg return VK_SUCCESS; 422b8e80941Smrg} 423