1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "tu_private.h" 25 26#include "util/debug.h" 27#include "util/disk_cache.h" 28#include "util/mesa-sha1.h" 29#include "util/u_atomic.h" 30#include "vulkan/util/vk_util.h" 31 32struct cache_entry_variant_info 33{ 34}; 35 36struct cache_entry 37{ 38 union { 39 unsigned char sha1[20]; 40 uint32_t sha1_dw[5]; 41 }; 42 uint32_t code_sizes[MESA_SHADER_STAGES]; 43 struct tu_shader_variant *variants[MESA_SHADER_STAGES]; 44 char code[0]; 45}; 46 47static void 48tu_pipeline_cache_init(struct tu_pipeline_cache *cache, 49 struct tu_device *device) 50{ 51 cache->device = device; 52 pthread_mutex_init(&cache->mutex, NULL); 53 54 cache->modified = false; 55 cache->kernel_count = 0; 56 cache->total_size = 0; 57 cache->table_size = 1024; 58 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); 59 cache->hash_table = malloc(byte_size); 60 61 /* We don't consider allocation failure fatal, we just start with a 0-sized 62 * cache. Disable caching when we want to keep shader debug info, since 63 * we don't get the debug info on cached shaders. */ 64 if (cache->hash_table == NULL) 65 cache->table_size = 0; 66 else 67 memset(cache->hash_table, 0, byte_size); 68} 69 70static void 71tu_pipeline_cache_finish(struct tu_pipeline_cache *cache) 72{ 73 for (unsigned i = 0; i < cache->table_size; ++i) 74 if (cache->hash_table[i]) { 75 vk_free(&cache->alloc, cache->hash_table[i]); 76 } 77 pthread_mutex_destroy(&cache->mutex); 78 free(cache->hash_table); 79} 80 81static uint32_t 82entry_size(struct cache_entry *entry) 83{ 84 size_t ret = sizeof(*entry); 85 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 86 if (entry->code_sizes[i]) 87 ret += 88 sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; 89 return ret; 90} 91 92static struct cache_entry * 93tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, 94 const unsigned char *sha1) 95{ 96 const uint32_t mask = cache->table_size - 1; 97 const uint32_t start = (*(uint32_t *) sha1); 98 99 if (cache->table_size == 0) 100 return NULL; 101 102 for (uint32_t i = 0; i < cache->table_size; i++) { 103 const uint32_t index = (start + i) & mask; 104 struct cache_entry *entry = cache->hash_table[index]; 105 106 if (!entry) 107 return NULL; 108 109 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { 110 return entry; 111 } 112 } 113 114 unreachable("hash table should never be full"); 115} 116 117static struct cache_entry * 118tu_pipeline_cache_search(struct tu_pipeline_cache *cache, 119 const unsigned char *sha1) 120{ 121 struct cache_entry *entry; 122 123 pthread_mutex_lock(&cache->mutex); 124 125 entry = tu_pipeline_cache_search_unlocked(cache, sha1); 126 127 pthread_mutex_unlock(&cache->mutex); 128 129 return entry; 130} 131 132static void 133tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache, 134 struct cache_entry *entry) 135{ 136 const uint32_t mask = cache->table_size - 1; 137 const uint32_t start = entry->sha1_dw[0]; 138 139 /* We'll always be able to insert when we get here. */ 140 assert(cache->kernel_count < cache->table_size / 2); 141 142 for (uint32_t i = 0; i < cache->table_size; i++) { 143 const uint32_t index = (start + i) & mask; 144 if (!cache->hash_table[index]) { 145 cache->hash_table[index] = entry; 146 break; 147 } 148 } 149 150 cache->total_size += entry_size(entry); 151 cache->kernel_count++; 152} 153 154static VkResult 155tu_pipeline_cache_grow(struct tu_pipeline_cache *cache) 156{ 157 const uint32_t table_size = cache->table_size * 2; 158 const uint32_t old_table_size = cache->table_size; 159 const size_t byte_size = table_size * sizeof(cache->hash_table[0]); 160 struct cache_entry **table; 161 struct cache_entry **old_table = cache->hash_table; 162 163 table = malloc(byte_size); 164 if (table == NULL) 165 return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY); 166 167 cache->hash_table = table; 168 cache->table_size = table_size; 169 cache->kernel_count = 0; 170 cache->total_size = 0; 171 172 memset(cache->hash_table, 0, byte_size); 173 for (uint32_t i = 0; i < old_table_size; i++) { 174 struct cache_entry *entry = old_table[i]; 175 if (!entry) 176 continue; 177 178 tu_pipeline_cache_set_entry(cache, entry); 179 } 180 181 free(old_table); 182 183 return VK_SUCCESS; 184} 185 186static void 187tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache, 188 struct cache_entry *entry) 189{ 190 if (cache->kernel_count == cache->table_size / 2) 191 tu_pipeline_cache_grow(cache); 192 193 /* Failing to grow that hash table isn't fatal, but may mean we don't 194 * have enough space to add this new kernel. Only add it if there's room. 195 */ 196 if (cache->kernel_count < cache->table_size / 2) 197 tu_pipeline_cache_set_entry(cache, entry); 198} 199 200static void 201tu_pipeline_cache_load(struct tu_pipeline_cache *cache, 202 const void *data, 203 size_t size) 204{ 205 struct tu_device *device = cache->device; 206 struct vk_pipeline_cache_header header; 207 208 if (size < sizeof(header)) 209 return; 210 memcpy(&header, data, sizeof(header)); 211 if (header.header_size < sizeof(header)) 212 return; 213 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 214 return; 215 if (header.vendor_id != 0x5143) 216 return; 217 if (header.device_id != device->physical_device->dev_id.chip_id) 218 return; 219 if (memcmp(header.uuid, device->physical_device->cache_uuid, 220 VK_UUID_SIZE) != 0) 221 return; 222 223 char *end = (void *) data + size; 224 char *p = (void *) data + header.header_size; 225 226 while (end - p >= sizeof(struct cache_entry)) { 227 struct cache_entry *entry = (struct cache_entry *) p; 228 struct cache_entry *dest_entry; 229 size_t size = entry_size(entry); 230 if (end - p < size) 231 break; 232 233 dest_entry = 234 vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 235 if (dest_entry) { 236 memcpy(dest_entry, entry, size); 237 for (int i = 0; i < MESA_SHADER_STAGES; ++i) 238 dest_entry->variants[i] = NULL; 239 tu_pipeline_cache_add_entry(cache, dest_entry); 240 } 241 p += size; 242 } 243} 244 245VKAPI_ATTR VkResult VKAPI_CALL 246tu_CreatePipelineCache(VkDevice _device, 247 const VkPipelineCacheCreateInfo *pCreateInfo, 248 const VkAllocationCallbacks *pAllocator, 249 VkPipelineCache *pPipelineCache) 250{ 251 TU_FROM_HANDLE(tu_device, device, _device); 252 struct tu_pipeline_cache *cache; 253 254 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 255 assert(pCreateInfo->flags == 0); 256 257 cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache), 258 VK_OBJECT_TYPE_PIPELINE_CACHE); 259 if (cache == NULL) 260 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 261 262 if (pAllocator) 263 cache->alloc = *pAllocator; 264 else 265 cache->alloc = device->vk.alloc; 266 267 tu_pipeline_cache_init(cache, device); 268 269 if (pCreateInfo->initialDataSize > 0) { 270 tu_pipeline_cache_load(cache, pCreateInfo->pInitialData, 271 pCreateInfo->initialDataSize); 272 } 273 274 *pPipelineCache = tu_pipeline_cache_to_handle(cache); 275 276 return VK_SUCCESS; 277} 278 279VKAPI_ATTR void VKAPI_CALL 280tu_DestroyPipelineCache(VkDevice _device, 281 VkPipelineCache _cache, 282 const VkAllocationCallbacks *pAllocator) 283{ 284 TU_FROM_HANDLE(tu_device, device, _device); 285 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 286 287 if (!cache) 288 return; 289 tu_pipeline_cache_finish(cache); 290 291 vk_object_free(&device->vk, pAllocator, cache); 292} 293 294VKAPI_ATTR VkResult VKAPI_CALL 295tu_GetPipelineCacheData(VkDevice _device, 296 VkPipelineCache _cache, 297 size_t *pDataSize, 298 void *pData) 299{ 300 TU_FROM_HANDLE(tu_device, device, _device); 301 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache); 302 struct vk_pipeline_cache_header *header; 303 VkResult result = VK_SUCCESS; 304 305 pthread_mutex_lock(&cache->mutex); 306 307 const size_t size = sizeof(*header) + cache->total_size; 308 if (pData == NULL) { 309 pthread_mutex_unlock(&cache->mutex); 310 *pDataSize = size; 311 return VK_SUCCESS; 312 } 313 if (*pDataSize < sizeof(*header)) { 314 pthread_mutex_unlock(&cache->mutex); 315 *pDataSize = 0; 316 return VK_INCOMPLETE; 317 } 318 void *p = pData, *end = pData + *pDataSize; 319 header = p; 320 header->header_size = sizeof(*header); 321 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 322 header->vendor_id = 0x5143; 323 header->device_id = device->physical_device->dev_id.chip_id; 324 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE); 325 p += header->header_size; 326 327 struct cache_entry *entry; 328 for (uint32_t i = 0; i < cache->table_size; i++) { 329 if (!cache->hash_table[i]) 330 continue; 331 entry = cache->hash_table[i]; 332 const uint32_t size = entry_size(entry); 333 if (end < p + size) { 334 result = VK_INCOMPLETE; 335 break; 336 } 337 338 memcpy(p, entry, size); 339 for (int j = 0; j < MESA_SHADER_STAGES; ++j) 340 ((struct cache_entry *) p)->variants[j] = NULL; 341 p += size; 342 } 343 *pDataSize = p - pData; 344 345 pthread_mutex_unlock(&cache->mutex); 346 return result; 347} 348 349static void 350tu_pipeline_cache_merge(struct tu_pipeline_cache *dst, 351 struct tu_pipeline_cache *src) 352{ 353 for (uint32_t i = 0; i < src->table_size; i++) { 354 struct cache_entry *entry = src->hash_table[i]; 355 if (!entry || tu_pipeline_cache_search(dst, entry->sha1)) 356 continue; 357 358 tu_pipeline_cache_add_entry(dst, entry); 359 360 src->hash_table[i] = NULL; 361 } 362} 363 364VKAPI_ATTR VkResult VKAPI_CALL 365tu_MergePipelineCaches(VkDevice _device, 366 VkPipelineCache destCache, 367 uint32_t srcCacheCount, 368 const VkPipelineCache *pSrcCaches) 369{ 370 TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache); 371 372 for (uint32_t i = 0; i < srcCacheCount; i++) { 373 TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]); 374 375 tu_pipeline_cache_merge(dst, src); 376 } 377 378 return VK_SUCCESS; 379} 380