1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file iris_program_cache.c
25 *
26 * The in-memory program cache.  This is basically a hash table mapping
27 * API-specified shaders and a state key to a compiled variant.  It also
28 * takes care of uploading shader assembly into a BO for use on the GPU.
29 */
30
31#include <stdio.h>
32#include <errno.h>
33#include "pipe/p_defines.h"
34#include "pipe/p_state.h"
35#include "pipe/p_context.h"
36#include "pipe/p_screen.h"
37#include "util/u_atomic.h"
38#include "util/u_upload_mgr.h"
39#include "compiler/nir/nir.h"
40#include "compiler/nir/nir_builder.h"
41#include "intel/compiler/brw_compiler.h"
42#include "intel/compiler/brw_eu.h"
43#include "intel/compiler/brw_nir.h"
44#include "iris_context.h"
45#include "iris_resource.h"
46
47struct keybox {
48   uint16_t size;
49   enum iris_program_cache_id cache_id;
50   uint8_t data[0];
51};
52
53static struct keybox *
54make_keybox(void *mem_ctx,
55            enum iris_program_cache_id cache_id,
56            const void *key,
57            uint32_t key_size)
58{
59   struct keybox *keybox =
60      ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
61
62   keybox->cache_id = cache_id;
63   keybox->size = key_size;
64   memcpy(keybox->data, key, key_size);
65
66   return keybox;
67}
68
69static uint32_t
70keybox_hash(const void *void_key)
71{
72   const struct keybox *key = void_key;
73   return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
74}
75
76static bool
77keybox_equals(const void *void_a, const void *void_b)
78{
79   const struct keybox *a = void_a, *b = void_b;
80   if (a->size != b->size)
81      return false;
82
83   return memcmp(a->data, b->data, a->size) == 0;
84}
85
86static unsigned
87get_program_string_id(enum iris_program_cache_id cache_id, const void *key)
88{
89   switch (cache_id) {
90   case IRIS_CACHE_VS:
91      return ((struct brw_vs_prog_key *) key)->program_string_id;
92   case IRIS_CACHE_TCS:
93      return ((struct brw_tcs_prog_key *) key)->program_string_id;
94   case IRIS_CACHE_TES:
95      return ((struct brw_tes_prog_key *) key)->program_string_id;
96   case IRIS_CACHE_GS:
97      return ((struct brw_gs_prog_key *) key)->program_string_id;
98   case IRIS_CACHE_CS:
99      return ((struct brw_cs_prog_key *) key)->program_string_id;
100   case IRIS_CACHE_FS:
101      return ((struct brw_wm_prog_key *) key)->program_string_id;
102   default:
103      unreachable("no program string id for this kind of program");
104   }
105}
106
107struct iris_compiled_shader *
108iris_find_cached_shader(struct iris_context *ice,
109                        enum iris_program_cache_id cache_id,
110                        uint32_t key_size,
111                        const void *key)
112{
113   struct keybox *keybox =
114      make_keybox(ice->shaders.cache, cache_id, key, key_size);
115   struct hash_entry *entry =
116      _mesa_hash_table_search(ice->shaders.cache, keybox);
117
118   ralloc_free(keybox);
119
120   return entry ? entry->data : NULL;
121}
122
123const void *
124iris_find_previous_compile(const struct iris_context *ice,
125                           enum iris_program_cache_id cache_id,
126                           unsigned program_string_id)
127{
128   hash_table_foreach(ice->shaders.cache, entry) {
129      const struct keybox *keybox = entry->key;
130      if (keybox->cache_id == cache_id &&
131          get_program_string_id(cache_id, keybox->data) == program_string_id) {
132         return keybox->data;
133      }
134   }
135
136   return NULL;
137}
138
139/**
140 * Look for an existing entry in the cache that has identical assembly code.
141 *
142 * This is useful for programs generating shaders at runtime, where multiple
143 * distinct shaders (from an API perspective) may compile to the same assembly
144 * in our backend.  This saves space in the program cache buffer.
145 */
146static const struct iris_compiled_shader *
147find_existing_assembly(struct hash_table *cache,
148                       const void *assembly,
149                       unsigned assembly_size)
150{
151   hash_table_foreach(cache, entry) {
152      const struct iris_compiled_shader *existing = entry->data;
153      if (existing->prog_data->program_size == assembly_size &&
154          memcmp(existing->map, assembly, assembly_size) == 0)
155         return existing;
156   }
157   return NULL;
158}
159
160struct iris_compiled_shader *
161iris_upload_shader(struct iris_context *ice,
162                   enum iris_program_cache_id cache_id,
163                   uint32_t key_size,
164                   const void *key,
165                   const void *assembly,
166                   struct brw_stage_prog_data *prog_data,
167                   uint32_t *streamout,
168                   enum brw_param_builtin *system_values,
169                   unsigned num_system_values,
170                   unsigned num_cbufs)
171{
172   struct hash_table *cache = ice->shaders.cache;
173   struct iris_compiled_shader *shader =
174      rzalloc_size(cache, sizeof(struct iris_compiled_shader) +
175                   ice->vtbl.derived_program_state_size(cache_id));
176   const struct iris_compiled_shader *existing =
177      find_existing_assembly(cache, assembly, prog_data->program_size);
178
179   /* If we can find a matching prog in the cache already, then reuse the
180    * existing stuff without creating new copy into the underlying buffer
181    * object.  This is notably useful for programs generating shaders at
182    * runtime, where multiple shaders may compile to the same thing in our
183    * backend.
184    */
185   if (existing) {
186      pipe_resource_reference(&shader->assembly.res, existing->assembly.res);
187      shader->assembly.offset = existing->assembly.offset;
188      shader->map = existing->map;
189   } else {
190      shader->assembly.res = NULL;
191      u_upload_alloc(ice->shaders.uploader, 0, prog_data->program_size, 64,
192                     &shader->assembly.offset, &shader->assembly.res,
193                     &shader->map);
194      memcpy(shader->map, assembly, prog_data->program_size);
195   }
196
197   shader->prog_data = prog_data;
198   shader->streamout = streamout;
199   shader->system_values = system_values;
200   shader->num_system_values = num_system_values;
201   shader->num_cbufs = num_cbufs;
202
203   ralloc_steal(shader, shader->prog_data);
204   ralloc_steal(shader->prog_data, prog_data->param);
205   ralloc_steal(shader->prog_data, prog_data->pull_param);
206   ralloc_steal(shader, shader->streamout);
207   ralloc_steal(shader, shader->system_values);
208
209   /* Store the 3DSTATE shader packets and other derived state. */
210   ice->vtbl.store_derived_program_state(ice, cache_id, shader);
211
212   struct keybox *keybox = make_keybox(cache, cache_id, key, key_size);
213   _mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
214
215   return shader;
216}
217
218bool
219iris_blorp_lookup_shader(struct blorp_batch *blorp_batch,
220                         const void *key, uint32_t key_size,
221                         uint32_t *kernel_out, void *prog_data_out)
222{
223   struct blorp_context *blorp = blorp_batch->blorp;
224   struct iris_context *ice = blorp->driver_ctx;
225   struct iris_batch *batch = blorp_batch->driver_batch;
226   struct iris_compiled_shader *shader =
227      iris_find_cached_shader(ice, IRIS_CACHE_BLORP, key_size, key);
228
229   if (!shader)
230      return false;
231
232   struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
233   *kernel_out =
234      iris_bo_offset_from_base_address(bo) + shader->assembly.offset;
235   *((void **) prog_data_out) = shader->prog_data;
236
237   iris_use_pinned_bo(batch, bo, false);
238
239   return true;
240}
241
242bool
243iris_blorp_upload_shader(struct blorp_batch *blorp_batch,
244                         const void *key, uint32_t key_size,
245                         const void *kernel, UNUSED uint32_t kernel_size,
246                         const struct brw_stage_prog_data *prog_data_templ,
247                         UNUSED uint32_t prog_data_size,
248                         uint32_t *kernel_out, void *prog_data_out)
249{
250   struct blorp_context *blorp = blorp_batch->blorp;
251   struct iris_context *ice = blorp->driver_ctx;
252   struct iris_batch *batch = blorp_batch->driver_batch;
253
254   void *prog_data = ralloc_size(NULL, prog_data_size);
255   memcpy(prog_data, prog_data_templ, prog_data_size);
256
257   struct iris_compiled_shader *shader =
258      iris_upload_shader(ice, IRIS_CACHE_BLORP, key_size, key, kernel,
259                         prog_data, NULL, NULL, 0, 0);
260
261   struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
262   *kernel_out =
263      iris_bo_offset_from_base_address(bo) + shader->assembly.offset;
264   *((void **) prog_data_out) = shader->prog_data;
265
266   iris_use_pinned_bo(batch, bo, false);
267
268   return true;
269}
270
271void
272iris_init_program_cache(struct iris_context *ice)
273{
274   ice->shaders.cache =
275      _mesa_hash_table_create(ice, keybox_hash, keybox_equals);
276
277   ice->shaders.uploader =
278      u_upload_create(&ice->ctx, 16384, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
279                      IRIS_RESOURCE_FLAG_SHADER_MEMZONE);
280}
281
282void
283iris_destroy_program_cache(struct iris_context *ice)
284{
285   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
286      ice->shaders.prog[i] = NULL;
287   }
288
289   hash_table_foreach(ice->shaders.cache, entry) {
290      struct iris_compiled_shader *shader = entry->data;
291      pipe_resource_reference(&shader->assembly.res, NULL);
292   }
293
294   u_upload_destroy(ice->shaders.uploader);
295
296   ralloc_free(ice->shaders.cache);
297}
298
299static const char *
300cache_name(enum iris_program_cache_id cache_id)
301{
302   if (cache_id == IRIS_CACHE_BLORP)
303      return "BLORP";
304
305   return _mesa_shader_stage_to_string(cache_id);
306}
307
308void
309iris_print_program_cache(struct iris_context *ice)
310{
311   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
312   const struct gen_device_info *devinfo = &screen->devinfo;
313
314   hash_table_foreach(ice->shaders.cache, entry) {
315      const struct keybox *keybox = entry->key;
316      struct iris_compiled_shader *shader = entry->data;
317      fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
318      brw_disassemble(devinfo, shader->map, 0,
319                      shader->prog_data->program_size, stderr);
320   }
321}
322