1/*
2 * Copyright (c) 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include <stdint.h>
25
26#include "compiler/nir/nir_serialize.h"
27#include "util/build_id.h"
28#include "util/mesa-sha1.h"
29
30#include "brw_context.h"
31#include "brw_program.h"
32#include "brw_state.h"
33
34static uint8_t driver_sha1[20];
35
36void
37brw_program_binary_init(unsigned device_id)
38{
39   const struct build_id_note *note =
40      build_id_find_nhdr_for_addr(brw_program_binary_init);
41   assert(note);
42
43   /**
44    * With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be
45    * unique. Therefore, we make a sha1 of the "i965" string and the sha1
46    * build id from i965_dri.so.
47    */
48   struct mesa_sha1 ctx;
49   _mesa_sha1_init(&ctx);
50   char renderer[10];
51   assert(device_id < 0x10000);
52   int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id);
53   assert(len == sizeof(renderer) - 1);
54   _mesa_sha1_update(&ctx, renderer, len);
55   _mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note));
56   _mesa_sha1_final(&ctx, driver_sha1);
57}
58
59void
60brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1)
61{
62   memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20);
63}
64
65enum driver_cache_blob_part {
66   END_PART,
67   INTEL_PART,
68   NIR_PART,
69};
70
71static bool
72blob_parts_valid(void *blob, uint32_t size)
73{
74   struct blob_reader reader;
75   blob_reader_init(&reader, blob, size);
76
77   do {
78      uint32_t part_type = blob_read_uint32(&reader);
79      if (reader.overrun)
80         return false;
81      if (part_type == END_PART)
82         return reader.current == reader.end;
83      switch ((enum driver_cache_blob_part)part_type) {
84      case INTEL_PART:
85      case NIR_PART:
86         /* Read the uint32_t part-size and skip over it */
87         blob_skip_bytes(&reader, blob_read_uint32(&reader));
88         if (reader.overrun)
89            return false;
90         break;
91      default:
92         return false;
93      }
94   } while (true);
95}
96
97static bool
98blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part)
99{
100   struct blob_reader reader;
101   blob_reader_init(&reader, blob, size);
102
103   assert(blob_parts_valid(blob, size));
104   do {
105      uint32_t part_type = blob_read_uint32(&reader);
106      if (part_type == END_PART)
107         return false;
108      if (part_type == part)
109         return true;
110      blob_skip_bytes(&reader, blob_read_uint32(&reader));
111   } while (true);
112}
113
114static bool
115driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program)
116{
117   if (!blob) {
118      return false;
119   } else if (!blob_parts_valid(blob, size)) {
120      unreachable("Driver blob format is bad!");
121      return false;
122   } else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) {
123      return true;
124   } else {
125      return false;
126   }
127}
128
129static void
130serialize_nir_part(struct blob *writer, struct gl_program *prog)
131{
132   blob_write_uint32(writer, NIR_PART);
133   intptr_t size_offset = blob_reserve_uint32(writer);
134   size_t nir_start = writer->size;
135   nir_serialize(writer, prog->nir, false);
136   blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
137}
138
139void
140brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
141{
142   if (driver_blob_is_ready(prog->driver_cache_blob,
143                            prog->driver_cache_blob_size, false))
144      return;
145
146   if (prog->driver_cache_blob)
147      ralloc_free(prog->driver_cache_blob);
148
149   struct blob writer;
150   blob_init(&writer);
151   serialize_nir_part(&writer, prog);
152   blob_write_uint32(&writer, END_PART);
153   prog->driver_cache_blob = ralloc_size(NULL, writer.size);
154   memcpy(prog->driver_cache_blob, writer.data, writer.size);
155   prog->driver_cache_blob_size = writer.size;
156   blob_finish(&writer);
157}
158
159static bool
160deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx,
161                        struct gl_program *prog, gl_shader_stage stage)
162{
163   struct brw_context *brw = brw_context(ctx);
164
165   union brw_any_prog_key prog_key;
166   blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage));
167   prog_key.base.program_string_id = brw_program(prog)->id;
168
169   enum brw_cache_id cache_id = brw_stage_cache_id(stage);
170
171   const uint8_t *program;
172   struct brw_stage_prog_data *prog_data =
173      ralloc_size(NULL, sizeof(union brw_any_prog_data));
174
175   if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) {
176      ralloc_free(prog_data);
177      return false;
178   }
179
180   uint32_t offset;
181   void *out_prog_data;
182   brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
183                    program, prog_data->program_size, prog_data,
184                    brw_prog_data_size(stage), &offset, &out_prog_data);
185
186   ralloc_free(prog_data);
187
188   return true;
189}
190
191void
192brw_program_deserialize_driver_blob(struct gl_context *ctx,
193                                    struct gl_program *prog,
194                                    gl_shader_stage stage)
195{
196   if (!prog->driver_cache_blob)
197      return;
198
199   struct blob_reader reader;
200   blob_reader_init(&reader, prog->driver_cache_blob,
201                    prog->driver_cache_blob_size);
202
203   do {
204      uint32_t part_type = blob_read_uint32(&reader);
205      if ((enum driver_cache_blob_part)part_type == END_PART)
206         break;
207      switch ((enum driver_cache_blob_part)part_type) {
208      case INTEL_PART: {
209         ASSERTED uint32_t gen_size = blob_read_uint32(&reader);
210         assert(!reader.overrun &&
211                (uintptr_t)(reader.end - reader.current) > gen_size);
212         deserialize_intel_program(&reader, ctx, prog, stage);
213         break;
214      }
215      case NIR_PART: {
216         ASSERTED uint32_t nir_size = blob_read_uint32(&reader);
217         assert(!reader.overrun &&
218                (uintptr_t)(reader.end - reader.current) > nir_size);
219         const struct nir_shader_compiler_options *options =
220            ctx->Const.ShaderCompilerOptions[stage].NirOptions;
221         prog->nir = nir_deserialize(NULL, options, &reader);
222         break;
223      }
224      default:
225         unreachable("Unsupported blob part type!");
226         break;
227      }
228   } while (true);
229
230   ralloc_free(prog->driver_cache_blob);
231   prog->driver_cache_blob = NULL;
232   prog->driver_cache_blob_size = 0;
233}
234
235/* This is just a wrapper around brw_program_deserialize_nir() as i965
236 * doesn't need gl_shader_program like other drivers do.
237 */
238void
239brw_deserialize_program_binary(struct gl_context *ctx,
240                               struct gl_shader_program *shProg,
241                               struct gl_program *prog)
242{
243   brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
244}
245
246static void
247serialize_intel_part(struct blob *writer, struct gl_context *ctx,
248                   struct gl_shader_program *sh_prog,
249                   struct gl_program *prog)
250{
251   struct brw_context *brw = brw_context(ctx);
252
253   union brw_any_prog_key key;
254   brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog);
255
256   const gl_shader_stage stage = prog->info.stage;
257   uint32_t offset = 0;
258   void *prog_data = NULL;
259   if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key,
260                        brw_prog_key_size(stage), &offset, &prog_data,
261                        false)) {
262      const void *program_map = brw->cache.map + offset;
263      /* TODO: Improve perf for non-LLC. It would be best to save it at
264       * program generation time when the program is in normal memory
265       * accessible with cache to the CPU. Another easier change would be to
266       * use _mesa_streaming_load_memcpy to read from the program mapped
267       * memory.
268       */
269      blob_write_uint32(writer, INTEL_PART);
270      intptr_t size_offset = blob_reserve_uint32(writer);
271      size_t gen_start = writer->size;
272      blob_write_bytes(writer, &key, brw_prog_key_size(stage));
273      brw_write_blob_program_data(writer, stage, program_map, prog_data);
274      blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
275   }
276}
277
278void
279brw_serialize_program_binary(struct gl_context *ctx,
280                             struct gl_shader_program *sh_prog,
281                             struct gl_program *prog)
282{
283   if (driver_blob_is_ready(prog->driver_cache_blob,
284                            prog->driver_cache_blob_size, true))
285      return;
286
287   if (prog->driver_cache_blob) {
288      if (!prog->nir) {
289         /* If we loaded from the disk shader cache, then the nir might not
290          * have been deserialized yet.
291          */
292         brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
293      }
294      ralloc_free(prog->driver_cache_blob);
295   }
296
297   struct blob writer;
298   blob_init(&writer);
299   serialize_nir_part(&writer, prog);
300   serialize_intel_part(&writer, ctx, sh_prog, prog);
301   blob_write_uint32(&writer, END_PART);
302   prog->driver_cache_blob = ralloc_size(NULL, writer.size);
303   memcpy(prog->driver_cache_blob, writer.data, writer.size);
304   prog->driver_cache_blob_size = writer.size;
305   blob_finish(&writer);
306}
307
308void
309brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
310                            const void *program,
311                            struct brw_stage_prog_data *prog_data)
312{
313   /* Write prog_data to blob. */
314   blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
315
316   /* Write program to blob. */
317   blob_write_bytes(binary, program, prog_data->program_size);
318
319   /* Write push params */
320   blob_write_bytes(binary, prog_data->param,
321                    sizeof(uint32_t) * prog_data->nr_params);
322
323   /* Write pull params */
324   blob_write_bytes(binary, prog_data->pull_param,
325                    sizeof(uint32_t) * prog_data->nr_pull_params);
326}
327
328bool
329brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
330                           gl_shader_stage stage, const uint8_t **program,
331                           struct brw_stage_prog_data *prog_data)
332{
333   /* Read shader prog_data from blob. */
334   blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
335   if (binary->overrun)
336      return false;
337
338   /* Read shader program from blob. */
339   *program = blob_read_bytes(binary, prog_data->program_size);
340
341   /* Read push params */
342   prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
343   blob_copy_bytes(binary, prog_data->param,
344                   sizeof(uint32_t) * prog_data->nr_params);
345
346   /* Read pull params */
347   prog_data->pull_param = rzalloc_array(NULL, uint32_t,
348                                         prog_data->nr_pull_params);
349   blob_copy_bytes(binary, prog_data->pull_param,
350                   sizeof(uint32_t) * prog_data->nr_pull_params);
351
352   return !binary->overrun;
353}
354