vtn_opencl.c revision 7ec681f3
1/*
2 * Copyright © 2018 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark (robdclark@gmail.com)
25 */
26
27#include "math.h"
28#include "nir/nir_builtin_builder.h"
29
30#include "util/u_printf.h"
31#include "vtn_private.h"
32#include "OpenCL.std.h"
33
34typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35                                    uint32_t opcode,
36                                    unsigned num_srcs, nir_ssa_def **srcs,
37                                    struct vtn_type **src_types,
38                                    const struct vtn_type *dest_type);
39
40static int to_llvm_address_space(SpvStorageClass mode)
41{
42   switch (mode) {
43   case SpvStorageClassPrivate:
44   case SpvStorageClassFunction: return 0;
45   case SpvStorageClassCrossWorkgroup: return 1;
46   case SpvStorageClassUniform:
47   case SpvStorageClassUniformConstant: return 2;
48   case SpvStorageClassWorkgroup: return 3;
49   default: return -1;
50   }
51}
52
53
54static void
55vtn_opencl_mangle(const char *in_name,
56                  uint32_t const_mask,
57                  int ntypes, struct vtn_type **src_types,
58                  char **outstring)
59{
60   char local_name[256] = "";
61   char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
62
63   for (unsigned i = 0; i < ntypes; ++i) {
64      const struct glsl_type *type = src_types[i]->type;
65      enum vtn_base_type base_type = src_types[i]->base_type;
66      if (src_types[i]->base_type == vtn_base_type_pointer) {
67         *(args_str++) = 'P';
68         int address_space = to_llvm_address_space(src_types[i]->storage_class);
69         if (address_space > 0)
70            args_str += sprintf(args_str, "U3AS%d", address_space);
71
72         type = src_types[i]->deref->type;
73         base_type = src_types[i]->deref->base_type;
74      }
75
76      if (const_mask & (1 << i))
77         *(args_str++) = 'K';
78
79      unsigned num_elements = glsl_get_components(type);
80      if (num_elements > 1) {
81         /* Vectors are not treated as built-ins for mangling, so check for substitution.
82          * In theory, we'd need to know which substitution value this is. In practice,
83          * the functions we need from libclc only support 1
84          */
85         bool substitution = false;
86         for (unsigned j = 0; j < i; ++j) {
87            const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
88               src_types[j]->deref->type : src_types[j]->type;
89            if (type == other_type) {
90               substitution = true;
91               break;
92            }
93         }
94
95         if (substitution) {
96            args_str += sprintf(args_str, "S_");
97            continue;
98         } else
99            args_str += sprintf(args_str, "Dv%d_", num_elements);
100      }
101
102      const char *suffix = NULL;
103      switch (base_type) {
104      case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
105      case vtn_base_type_event: suffix = "9ocl_event"; break;
106      default: {
107         const char *primitives[] = {
108            [GLSL_TYPE_UINT] = "j",
109            [GLSL_TYPE_INT] = "i",
110            [GLSL_TYPE_FLOAT] = "f",
111            [GLSL_TYPE_FLOAT16] = "Dh",
112            [GLSL_TYPE_DOUBLE] = "d",
113            [GLSL_TYPE_UINT8] = "h",
114            [GLSL_TYPE_INT8] = "c",
115            [GLSL_TYPE_UINT16] = "t",
116            [GLSL_TYPE_INT16] = "s",
117            [GLSL_TYPE_UINT64] = "m",
118            [GLSL_TYPE_INT64] = "l",
119            [GLSL_TYPE_BOOL] = "b",
120            [GLSL_TYPE_ERROR] = NULL,
121         };
122         enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
123         assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
124         suffix = primitives[glsl_base_type];
125         break;
126      }
127      }
128      args_str += sprintf(args_str, "%s", suffix);
129   }
130
131   *outstring = strdup(local_name);
132}
133
134static nir_function *mangle_and_find(struct vtn_builder *b,
135                                     const char *name,
136                                     uint32_t const_mask,
137                                     uint32_t num_srcs,
138                                     struct vtn_type **src_types)
139{
140   char *mname;
141   nir_function *found = NULL;
142
143   vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
144   /* try and find in current shader first. */
145   nir_foreach_function(funcs, b->shader) {
146      if (!strcmp(funcs->name, mname)) {
147         found = funcs;
148         break;
149      }
150   }
151   /* if not found here find in clc shader and create a decl mirroring it */
152   if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
153      nir_foreach_function(funcs, b->options->clc_shader) {
154         if (!strcmp(funcs->name, mname)) {
155            found = funcs;
156            break;
157         }
158      }
159      if (found) {
160         nir_function *decl = nir_function_create(b->shader, mname);
161         decl->num_params = found->num_params;
162         decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
163         for (unsigned i = 0; i < decl->num_params; i++) {
164            decl->params[i] = found->params[i];
165         }
166         found = decl;
167      }
168   }
169   if (!found)
170      vtn_fail("Can't find clc function %s\n", mname);
171   free(mname);
172   return found;
173}
174
175static bool call_mangled_function(struct vtn_builder *b,
176                                  const char *name,
177                                  uint32_t const_mask,
178                                  uint32_t num_srcs,
179                                  struct vtn_type **src_types,
180                                  const struct vtn_type *dest_type,
181                                  nir_ssa_def **srcs,
182                                  nir_deref_instr **ret_deref_ptr)
183{
184   nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
185   if (!found)
186      return false;
187
188   nir_call_instr *call = nir_call_instr_create(b->shader, found);
189
190   nir_deref_instr *ret_deref = NULL;
191   uint32_t param_idx = 0;
192   if (dest_type) {
193      nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
194                                                        glsl_get_bare_type(dest_type->type),
195                                                        "return_tmp");
196      ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
197      call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
198   }
199
200   for (unsigned i = 0; i < num_srcs; i++)
201      call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
202   nir_builder_instr_insert(&b->nb, &call->instr);
203
204   *ret_deref_ptr = ret_deref;
205   return true;
206}
207
208static void
209handle_instr(struct vtn_builder *b, uint32_t opcode,
210             const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
211{
212   struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
213
214   nir_ssa_def *srcs[5] = { NULL };
215   struct vtn_type *src_types[5] = { NULL };
216   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
217   for (unsigned i = 0; i < num_srcs; i++) {
218      struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
219      struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
220      srcs[i] = ssa->def;
221      src_types[i] = val->type;
222   }
223
224   nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
225   if (result) {
226      vtn_push_nir_ssa(b, w_dest[1], result);
227   } else {
228      vtn_assert(dest_type == NULL);
229   }
230}
231
232static nir_op
233nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
234                             enum OpenCLstd_Entrypoints opcode)
235{
236   switch (opcode) {
237   case OpenCLstd_Fabs: return nir_op_fabs;
238   case OpenCLstd_SAbs: return nir_op_iabs;
239   case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
240   case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
241   case OpenCLstd_Ceil: return nir_op_fceil;
242   case OpenCLstd_Floor: return nir_op_ffloor;
243   case OpenCLstd_SHadd: return nir_op_ihadd;
244   case OpenCLstd_UHadd: return nir_op_uhadd;
245   case OpenCLstd_Fmax: return nir_op_fmax;
246   case OpenCLstd_SMax: return nir_op_imax;
247   case OpenCLstd_UMax: return nir_op_umax;
248   case OpenCLstd_Fmin: return nir_op_fmin;
249   case OpenCLstd_SMin: return nir_op_imin;
250   case OpenCLstd_UMin: return nir_op_umin;
251   case OpenCLstd_Mix: return nir_op_flrp;
252   case OpenCLstd_Native_cos: return nir_op_fcos;
253   case OpenCLstd_Native_divide: return nir_op_fdiv;
254   case OpenCLstd_Native_exp2: return nir_op_fexp2;
255   case OpenCLstd_Native_log2: return nir_op_flog2;
256   case OpenCLstd_Native_powr: return nir_op_fpow;
257   case OpenCLstd_Native_recip: return nir_op_frcp;
258   case OpenCLstd_Native_rsqrt: return nir_op_frsq;
259   case OpenCLstd_Native_sin: return nir_op_fsin;
260   case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
261   case OpenCLstd_SMul_hi: return nir_op_imul_high;
262   case OpenCLstd_UMul_hi: return nir_op_umul_high;
263   case OpenCLstd_Popcount: return nir_op_bit_count;
264   case OpenCLstd_SRhadd: return nir_op_irhadd;
265   case OpenCLstd_URhadd: return nir_op_urhadd;
266   case OpenCLstd_Rsqrt: return nir_op_frsq;
267   case OpenCLstd_Sign: return nir_op_fsign;
268   case OpenCLstd_Sqrt: return nir_op_fsqrt;
269   case OpenCLstd_SSub_sat: return nir_op_isub_sat;
270   case OpenCLstd_USub_sat: return nir_op_usub_sat;
271   case OpenCLstd_Trunc: return nir_op_ftrunc;
272   case OpenCLstd_Rint: return nir_op_fround_even;
273   case OpenCLstd_Half_divide: return nir_op_fdiv;
274   case OpenCLstd_Half_recip: return nir_op_frcp;
275   /* uhm... */
276   case OpenCLstd_UAbs: return nir_op_mov;
277   default:
278      vtn_fail("No NIR equivalent");
279   }
280}
281
282static nir_ssa_def *
283handle_alu(struct vtn_builder *b, uint32_t opcode,
284           unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
285           const struct vtn_type *dest_type)
286{
287   nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
288                                    srcs[0], srcs[1], srcs[2], NULL);
289   if (opcode == OpenCLstd_Popcount)
290      ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
291   return ret;
292}
293
294#define REMAP(op, str) [OpenCLstd_##op] = { str }
295static const struct {
296   const char *fn;
297} remap_table[] = {
298   REMAP(Distance, "distance"),
299   REMAP(Fast_distance, "fast_distance"),
300   REMAP(Fast_length, "fast_length"),
301   REMAP(Fast_normalize, "fast_normalize"),
302   REMAP(Half_rsqrt, "half_rsqrt"),
303   REMAP(Half_sqrt, "half_sqrt"),
304   REMAP(Length, "length"),
305   REMAP(Normalize, "normalize"),
306   REMAP(Degrees, "degrees"),
307   REMAP(Radians, "radians"),
308   REMAP(Rotate, "rotate"),
309   REMAP(Smoothstep, "smoothstep"),
310   REMAP(Step, "step"),
311
312   REMAP(Pow, "pow"),
313   REMAP(Pown, "pown"),
314   REMAP(Powr, "powr"),
315   REMAP(Rootn, "rootn"),
316   REMAP(Modf, "modf"),
317
318   REMAP(Acos, "acos"),
319   REMAP(Acosh, "acosh"),
320   REMAP(Acospi, "acospi"),
321   REMAP(Asin, "asin"),
322   REMAP(Asinh, "asinh"),
323   REMAP(Asinpi, "asinpi"),
324   REMAP(Atan, "atan"),
325   REMAP(Atan2, "atan2"),
326   REMAP(Atanh, "atanh"),
327   REMAP(Atanpi, "atanpi"),
328   REMAP(Atan2pi, "atan2pi"),
329   REMAP(Cos, "cos"),
330   REMAP(Cosh, "cosh"),
331   REMAP(Cospi, "cospi"),
332   REMAP(Sin, "sin"),
333   REMAP(Sinh, "sinh"),
334   REMAP(Sinpi, "sinpi"),
335   REMAP(Tan, "tan"),
336   REMAP(Tanh, "tanh"),
337   REMAP(Tanpi, "tanpi"),
338   REMAP(Sincos, "sincos"),
339   REMAP(Fract, "fract"),
340   REMAP(Frexp, "frexp"),
341   REMAP(Fma, "fma"),
342   REMAP(Fmod, "fmod"),
343
344   REMAP(Half_cos, "cos"),
345   REMAP(Half_exp, "exp"),
346   REMAP(Half_exp2, "exp2"),
347   REMAP(Half_exp10, "exp10"),
348   REMAP(Half_log, "log"),
349   REMAP(Half_log2, "log2"),
350   REMAP(Half_log10, "log10"),
351   REMAP(Half_powr, "powr"),
352   REMAP(Half_sin, "sin"),
353   REMAP(Half_tan, "tan"),
354
355   REMAP(Remainder, "remainder"),
356   REMAP(Remquo, "remquo"),
357   REMAP(Hypot, "hypot"),
358   REMAP(Exp, "exp"),
359   REMAP(Exp2, "exp2"),
360   REMAP(Exp10, "exp10"),
361   REMAP(Expm1, "expm1"),
362   REMAP(Ldexp, "ldexp"),
363
364   REMAP(Ilogb, "ilogb"),
365   REMAP(Log, "log"),
366   REMAP(Log2, "log2"),
367   REMAP(Log10, "log10"),
368   REMAP(Log1p, "log1p"),
369   REMAP(Logb, "logb"),
370
371   REMAP(Cbrt, "cbrt"),
372   REMAP(Erfc, "erfc"),
373   REMAP(Erf, "erf"),
374
375   REMAP(Lgamma, "lgamma"),
376   REMAP(Lgamma_r, "lgamma_r"),
377   REMAP(Tgamma, "tgamma"),
378
379   REMAP(UMad_sat, "mad_sat"),
380   REMAP(SMad_sat, "mad_sat"),
381
382   REMAP(Shuffle, "shuffle"),
383   REMAP(Shuffle2, "shuffle2"),
384};
385#undef REMAP
386
387static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
388{
389   if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
390      return NULL;
391   return remap_table[opcode].fn;
392}
393
394static struct vtn_type *
395get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
396{
397   struct vtn_type *ret = rzalloc(b, struct vtn_type);
398   assert(glsl_type_is_vector_or_scalar(type));
399   ret->type = type;
400   ret->length = glsl_get_vector_elements(type);
401   ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
402   return ret;
403}
404
405static struct vtn_type *
406get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
407{
408   struct vtn_type *ret = rzalloc(b, struct vtn_type);
409   ret->type = nir_address_format_to_glsl_type(
410            vtn_mode_to_address_format(
411               b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
412   ret->base_type = vtn_base_type_pointer;
413   ret->storage_class = storage_class;
414   ret->deref = t;
415   return ret;
416}
417
418static struct vtn_type *
419get_signed_type(struct vtn_builder *b, struct vtn_type *t)
420{
421   if (t->base_type == vtn_base_type_pointer) {
422      return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
423   }
424   return get_vtn_type_for_glsl_type(
425      b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
426                          glsl_get_vector_elements(t->type)));
427}
428
429static nir_ssa_def *
430handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
431              int num_srcs,
432              nir_ssa_def **srcs,
433              struct vtn_type **src_types,
434              const struct vtn_type *dest_type)
435{
436   const char *name = remap_clc_opcode(opcode);
437   if (!name)
438       return NULL;
439
440   /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
441    * which doesn't mangle correctly when the function expects int or pointer-to-int.
442    * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
443    */
444   int signed_param = -1;
445   switch (opcode) {
446   case OpenCLstd_Frexp:
447   case OpenCLstd_Lgamma_r:
448   case OpenCLstd_Pown:
449   case OpenCLstd_Rootn:
450   case OpenCLstd_Ldexp:
451      signed_param = 1;
452      break;
453   case OpenCLstd_Remquo:
454      signed_param = 2;
455      break;
456   case OpenCLstd_SMad_sat: {
457      /* All parameters need to be converted to signed */
458      src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
459      break;
460   }
461   default: break;
462   }
463
464   if (signed_param >= 0) {
465      src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
466   }
467
468   nir_deref_instr *ret_deref = NULL;
469
470   if (!call_mangled_function(b, name, 0, num_srcs, src_types,
471                              dest_type, srcs, &ret_deref))
472      return NULL;
473
474   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
475}
476
477static nir_ssa_def *
478handle_special(struct vtn_builder *b, uint32_t opcode,
479               unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
480               const struct vtn_type *dest_type)
481{
482   nir_builder *nb = &b->nb;
483   enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
484
485   switch (cl_opcode) {
486   case OpenCLstd_SAbs_diff:
487     /* these works easier in direct NIR */
488      return nir_iabs_diff(nb, srcs[0], srcs[1]);
489   case OpenCLstd_UAbs_diff:
490      return nir_uabs_diff(nb, srcs[0], srcs[1]);
491   case OpenCLstd_Bitselect:
492      return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
493   case OpenCLstd_SMad_hi:
494      return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
495   case OpenCLstd_UMad_hi:
496      return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
497   case OpenCLstd_SMul24:
498      return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
499   case OpenCLstd_UMul24:
500      return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
501   case OpenCLstd_SMad24:
502      return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
503   case OpenCLstd_UMad24:
504      return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
505   case OpenCLstd_FClamp:
506      return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
507   case OpenCLstd_SClamp:
508      return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
509   case OpenCLstd_UClamp:
510      return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
511   case OpenCLstd_Copysign:
512      return nir_copysign(nb, srcs[0], srcs[1]);
513   case OpenCLstd_Cross:
514      if (dest_type->length == 4)
515         return nir_cross4(nb, srcs[0], srcs[1]);
516      return nir_cross3(nb, srcs[0], srcs[1]);
517   case OpenCLstd_Fdim:
518      return nir_fdim(nb, srcs[0], srcs[1]);
519   case OpenCLstd_Fmod:
520      if (nb->shader->options->lower_fmod)
521         break;
522      return nir_fmod(nb, srcs[0], srcs[1]);
523   case OpenCLstd_Mad:
524      return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
525   case OpenCLstd_Maxmag:
526      return nir_maxmag(nb, srcs[0], srcs[1]);
527   case OpenCLstd_Minmag:
528      return nir_minmag(nb, srcs[0], srcs[1]);
529   case OpenCLstd_Nan:
530      return nir_nan(nb, srcs[0]);
531   case OpenCLstd_Nextafter:
532      return nir_nextafter(nb, srcs[0], srcs[1]);
533   case OpenCLstd_Normalize:
534      return nir_normalize(nb, srcs[0]);
535   case OpenCLstd_Clz:
536      return nir_clz_u(nb, srcs[0]);
537   case OpenCLstd_Ctz:
538      return nir_ctz_u(nb, srcs[0]);
539   case OpenCLstd_Select:
540      return nir_select(nb, srcs[0], srcs[1], srcs[2]);
541   case OpenCLstd_S_Upsample:
542   case OpenCLstd_U_Upsample:
543      /* SPIR-V and CL have different defs for upsample, just implement in nir */
544      return nir_upsample(nb, srcs[0], srcs[1]);
545   case OpenCLstd_Native_exp:
546      return nir_fexp(nb, srcs[0]);
547   case OpenCLstd_Native_exp10:
548      return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
549   case OpenCLstd_Native_log:
550      return nir_flog(nb, srcs[0]);
551   case OpenCLstd_Native_log10:
552      return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
553   case OpenCLstd_Native_tan:
554      return nir_ftan(nb, srcs[0]);
555   case OpenCLstd_Ldexp:
556      if (nb->shader->options->lower_ldexp)
557         break;
558      return nir_ldexp(nb, srcs[0], srcs[1]);
559   case OpenCLstd_Fma:
560      /* FIXME: the software implementation only supports fp32 for now. */
561      if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
562         break;
563      return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
564   default:
565      break;
566   }
567
568   nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
569   if (!ret)
570      vtn_fail("No NIR equivalent");
571
572   return ret;
573}
574
575static nir_ssa_def *
576handle_core(struct vtn_builder *b, uint32_t opcode,
577            unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
578            const struct vtn_type *dest_type)
579{
580   nir_deref_instr *ret_deref = NULL;
581
582   switch ((SpvOp)opcode) {
583   case SpvOpGroupAsyncCopy: {
584      /* Libclc doesn't include 3-component overloads of the async copy functions.
585       * However, the CLC spec says:
586       * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
587       * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
588       * vector types
589       */
590      for (unsigned i = 0; i < num_srcs; ++i) {
591         if (src_types[i]->base_type == vtn_base_type_pointer &&
592             src_types[i]->deref->base_type == vtn_base_type_vector &&
593             src_types[i]->deref->length == 3) {
594            src_types[i] =
595               get_pointer_type(b,
596                                get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
597                                src_types[i]->storage_class);
598         }
599      }
600      if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
601         return NULL;
602      break;
603   }
604   case SpvOpGroupWaitEvents: {
605      src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
606      if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
607         return NULL;
608      break;
609   }
610   default:
611      return NULL;
612   }
613
614   return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
615}
616
617
618static void
619_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
620                     const uint32_t *w, unsigned count, bool load,
621                     bool vec_aligned, nir_rounding_mode rounding)
622{
623   struct vtn_type *type;
624   if (load)
625      type = vtn_get_type(b, w[1]);
626   else
627      type = vtn_get_value_type(b, w[5]);
628   unsigned a = load ? 0 : 1;
629
630   enum glsl_base_type base_type = glsl_get_base_type(type->type);
631   unsigned components = glsl_get_vector_elements(type->type);
632
633   nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
634   struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
635
636   struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
637   nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
638
639   nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
640      (vec_aligned && components == 3) ? 4 : components);
641   nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
642
643   unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
644                                      glsl_get_bit_size(type->type) / 8;
645   enum glsl_base_type ptr_base_type =
646      glsl_get_base_type(p->pointer->type->type);
647   if (base_type != ptr_base_type) {
648      vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
649                  (base_type != GLSL_TYPE_FLOAT &&
650                   base_type != GLSL_TYPE_DOUBLE),
651                  "vload/vstore cannot do type conversion. "
652                  "vload/vstore_half can only convert from half to other "
653                  "floating-point types.");
654
655      /* Above-computed alignment was for floats/doubles, not halves */
656      alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
657   }
658
659   deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
660
661   for (int i = 0; i < components; i++) {
662      nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
663      nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
664
665      if (load) {
666         comps[i] = vtn_local_load(b, arr_deref, p->type->access);
667         ncomps[i] = comps[i]->def;
668         if (base_type != ptr_base_type) {
669            assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
670                   (base_type == GLSL_TYPE_FLOAT ||
671                    base_type == GLSL_TYPE_DOUBLE));
672            ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
673                                 glsl_base_type_get_bit_size(base_type));
674         }
675      } else {
676         struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
677         struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
678         ssa->def = nir_channel(&b->nb, val->def, i);
679         if (base_type != ptr_base_type) {
680            assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
681                   (base_type == GLSL_TYPE_FLOAT ||
682                    base_type == GLSL_TYPE_DOUBLE));
683            if (rounding == nir_rounding_mode_undef) {
684               ssa->def = nir_f2f16(&b->nb, ssa->def);
685            } else {
686               ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
687                                                nir_type_float | ssa->def->bit_size,
688                                                nir_type_float16,
689                                                rounding, false);
690            }
691         }
692         vtn_local_store(b, ssa, arr_deref, p->type->access);
693      }
694   }
695   if (load) {
696      vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
697   }
698}
699
700static void
701vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
702                        const uint32_t *w, unsigned count)
703{
704   _handle_v_load_store(b, opcode, w, count, true,
705                        opcode == OpenCLstd_Vloada_halfn,
706                        nir_rounding_mode_undef);
707}
708
709static void
710vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
711                         const uint32_t *w, unsigned count)
712{
713   _handle_v_load_store(b, opcode, w, count, false,
714                        opcode == OpenCLstd_Vstorea_halfn,
715                        nir_rounding_mode_undef);
716}
717
718static void
719vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
720                                const uint32_t *w, unsigned count)
721{
722   _handle_v_load_store(b, opcode, w, count, false,
723                        opcode == OpenCLstd_Vstorea_halfn_r,
724                        vtn_rounding_mode_to_nir(b, w[8]));
725}
726
727static unsigned
728vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
729{
730   nir_deref_instr *deref = vtn_nir_deref(b, id);
731
732   while (deref && deref->deref_type != nir_deref_type_var)
733      deref = nir_deref_instr_parent(deref);
734
735   vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
736               "Printf string argument must be a pointer to a constant variable");
737   vtn_fail_if(deref->var->constant_initializer == NULL,
738               "Printf string argument must have an initializer");
739   vtn_fail_if(!glsl_type_is_array(deref->var->type),
740               "Printf string must be an char array");
741   const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
742   vtn_fail_if(char_type != glsl_uint8_t_type() &&
743               char_type != glsl_int8_t_type(),
744               "Printf string must be an char array");
745
746   nir_constant *c = deref->var->constant_initializer;
747   assert(c->num_elements == glsl_get_length(deref->var->type));
748
749   unsigned idx = info->string_size;
750   info->strings = reralloc_size(b->shader, info->strings,
751                                 idx + c->num_elements);
752   info->string_size += c->num_elements;
753
754   char *str = &info->strings[idx];
755   bool found_null = false;
756   for (unsigned i = 0; i < c->num_elements; i++) {
757      memcpy((char *)str + i, c->elements[i]->values, 1);
758      if (str[i] == '\0')
759         found_null = true;
760   }
761   vtn_fail_if(!found_null, "Printf string must be null terminated");
762   return idx;
763}
764
765/* printf is special because there are no limits on args */
766static void
767handle_printf(struct vtn_builder *b, uint32_t opcode,
768              const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
769{
770   if (!b->options->caps.printf) {
771      vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
772      return;
773   }
774
775   /* Step 1. extract the format string */
776
777   /*
778    * info_idx is 1-based to match clover/llvm
779    * the backend indexes the info table at info_idx - 1.
780    */
781   b->shader->printf_info_count++;
782   unsigned info_idx = b->shader->printf_info_count;
783
784   b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
785                                     nir_printf_info, info_idx);
786   nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
787
788   info->strings = NULL;
789   info->string_size = 0;
790
791   vtn_add_printf_string(b, w_src[0], info);
792
793   info->num_args = num_srcs - 1;
794   info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
795
796   /* Step 2, build an ad-hoc struct type out of the args */
797   unsigned field_offset = 0;
798   struct glsl_struct_field *fields =
799      rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
800   for (unsigned i = 1; i < num_srcs; ++i) {
801      struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
802      struct vtn_type *src_type = val->type;
803      fields[i - 1].type = src_type->type;
804      fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
805      field_offset = align(field_offset, 4);
806      fields[i - 1].offset = field_offset;
807      info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
808      field_offset += glsl_get_cl_size(src_type->type);
809   }
810   const struct glsl_type *struct_type =
811      glsl_struct_type(fields, num_srcs - 1, "printf", true);
812
813   /* Step 3, create a variable of that type and populate its fields */
814   nir_variable *var = nir_local_variable_create(b->func->nir_func->impl,
815                                                 struct_type, NULL);
816   nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817   size_t fmt_pos = 0;
818   for (unsigned i = 1; i < num_srcs; ++i) {
819      nir_deref_instr *field_deref =
820         nir_build_deref_struct(&b->nb, deref_var, i - 1);
821      nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822      /* extract strings */
823      fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824      if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825         unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826         nir_store_deref(&b->nb, field_deref,
827                         nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828                         ~0 /* write_mask */);
829      } else
830         nir_store_deref(&b->nb, field_deref, field_src, ~0);
831   }
832
833   /* Lastly, the actual intrinsic */
834   nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835   nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836   vtn_push_nir_ssa(b, w_dest[1], ret);
837}
838
839static nir_ssa_def *
840handle_round(struct vtn_builder *b, uint32_t opcode,
841             unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842             const struct vtn_type *dest_type)
843{
844   nir_ssa_def *src = srcs[0];
845   nir_builder *nb = &b->nb;
846   nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847   nir_ssa_def *truncated = nir_ftrunc(nb, src);
848   nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849
850   return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851                    nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852}
853
854static nir_ssa_def *
855handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856               unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857               const struct vtn_type *dest_type)
858{
859   struct nir_ssa_def *input = srcs[0];
860   struct nir_ssa_def *mask = srcs[1];
861
862   unsigned out_elems = dest_type->length;
863   nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864   unsigned in_elems = input->num_components;
865   if (mask->bit_size != 32)
866      mask = nir_u2u32(&b->nb, mask);
867   mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868   for (unsigned i = 0; i < out_elems; i++)
869      outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870
871   return nir_vec(&b->nb, outres, out_elems);
872}
873
874static nir_ssa_def *
875handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877                const struct vtn_type *dest_type)
878{
879   struct nir_ssa_def *input0 = srcs[0];
880   struct nir_ssa_def *input1 = srcs[1];
881   struct nir_ssa_def *mask = srcs[2];
882
883   unsigned out_elems = dest_type->length;
884   nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885   unsigned in_elems = input0->num_components;
886   unsigned total_mask = 2 * in_elems - 1;
887   unsigned half_mask = in_elems - 1;
888   if (mask->bit_size != 32)
889      mask = nir_u2u32(&b->nb, mask);
890   mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891   for (unsigned i = 0; i < out_elems; i++) {
892      nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893      nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894      nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895      nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896      nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897      outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898   }
899   return nir_vec(&b->nb, outres, out_elems);
900}
901
902bool
903vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904                              const uint32_t *w, unsigned count)
905{
906   enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907
908   switch (cl_opcode) {
909   case OpenCLstd_Fabs:
910   case OpenCLstd_SAbs:
911   case OpenCLstd_UAbs:
912   case OpenCLstd_SAdd_sat:
913   case OpenCLstd_UAdd_sat:
914   case OpenCLstd_Ceil:
915   case OpenCLstd_Floor:
916   case OpenCLstd_Fmax:
917   case OpenCLstd_SHadd:
918   case OpenCLstd_UHadd:
919   case OpenCLstd_SMax:
920   case OpenCLstd_UMax:
921   case OpenCLstd_Fmin:
922   case OpenCLstd_SMin:
923   case OpenCLstd_UMin:
924   case OpenCLstd_Mix:
925   case OpenCLstd_Native_cos:
926   case OpenCLstd_Native_divide:
927   case OpenCLstd_Native_exp2:
928   case OpenCLstd_Native_log2:
929   case OpenCLstd_Native_powr:
930   case OpenCLstd_Native_recip:
931   case OpenCLstd_Native_rsqrt:
932   case OpenCLstd_Native_sin:
933   case OpenCLstd_Native_sqrt:
934   case OpenCLstd_SMul_hi:
935   case OpenCLstd_UMul_hi:
936   case OpenCLstd_Popcount:
937   case OpenCLstd_SRhadd:
938   case OpenCLstd_URhadd:
939   case OpenCLstd_Rsqrt:
940   case OpenCLstd_Sign:
941   case OpenCLstd_Sqrt:
942   case OpenCLstd_SSub_sat:
943   case OpenCLstd_USub_sat:
944   case OpenCLstd_Trunc:
945   case OpenCLstd_Rint:
946   case OpenCLstd_Half_divide:
947   case OpenCLstd_Half_recip:
948      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949      return true;
950   case OpenCLstd_SAbs_diff:
951   case OpenCLstd_UAbs_diff:
952   case OpenCLstd_SMad_hi:
953   case OpenCLstd_UMad_hi:
954   case OpenCLstd_SMad24:
955   case OpenCLstd_UMad24:
956   case OpenCLstd_SMul24:
957   case OpenCLstd_UMul24:
958   case OpenCLstd_Bitselect:
959   case OpenCLstd_FClamp:
960   case OpenCLstd_SClamp:
961   case OpenCLstd_UClamp:
962   case OpenCLstd_Copysign:
963   case OpenCLstd_Cross:
964   case OpenCLstd_Degrees:
965   case OpenCLstd_Fdim:
966   case OpenCLstd_Fma:
967   case OpenCLstd_Distance:
968   case OpenCLstd_Fast_distance:
969   case OpenCLstd_Fast_length:
970   case OpenCLstd_Fast_normalize:
971   case OpenCLstd_Half_rsqrt:
972   case OpenCLstd_Half_sqrt:
973   case OpenCLstd_Length:
974   case OpenCLstd_Mad:
975   case OpenCLstd_Maxmag:
976   case OpenCLstd_Minmag:
977   case OpenCLstd_Nan:
978   case OpenCLstd_Nextafter:
979   case OpenCLstd_Normalize:
980   case OpenCLstd_Radians:
981   case OpenCLstd_Rotate:
982   case OpenCLstd_Select:
983   case OpenCLstd_Step:
984   case OpenCLstd_Smoothstep:
985   case OpenCLstd_S_Upsample:
986   case OpenCLstd_U_Upsample:
987   case OpenCLstd_Clz:
988   case OpenCLstd_Ctz:
989   case OpenCLstd_Native_exp:
990   case OpenCLstd_Native_exp10:
991   case OpenCLstd_Native_log:
992   case OpenCLstd_Native_log10:
993   case OpenCLstd_Acos:
994   case OpenCLstd_Acosh:
995   case OpenCLstd_Acospi:
996   case OpenCLstd_Asin:
997   case OpenCLstd_Asinh:
998   case OpenCLstd_Asinpi:
999   case OpenCLstd_Atan:
1000   case OpenCLstd_Atan2:
1001   case OpenCLstd_Atanh:
1002   case OpenCLstd_Atanpi:
1003   case OpenCLstd_Atan2pi:
1004   case OpenCLstd_Fract:
1005   case OpenCLstd_Frexp:
1006   case OpenCLstd_Exp:
1007   case OpenCLstd_Exp2:
1008   case OpenCLstd_Expm1:
1009   case OpenCLstd_Exp10:
1010   case OpenCLstd_Fmod:
1011   case OpenCLstd_Ilogb:
1012   case OpenCLstd_Log:
1013   case OpenCLstd_Log2:
1014   case OpenCLstd_Log10:
1015   case OpenCLstd_Log1p:
1016   case OpenCLstd_Logb:
1017   case OpenCLstd_Ldexp:
1018   case OpenCLstd_Cos:
1019   case OpenCLstd_Cosh:
1020   case OpenCLstd_Cospi:
1021   case OpenCLstd_Sin:
1022   case OpenCLstd_Sinh:
1023   case OpenCLstd_Sinpi:
1024   case OpenCLstd_Tan:
1025   case OpenCLstd_Tanh:
1026   case OpenCLstd_Tanpi:
1027   case OpenCLstd_Cbrt:
1028   case OpenCLstd_Erfc:
1029   case OpenCLstd_Erf:
1030   case OpenCLstd_Lgamma:
1031   case OpenCLstd_Lgamma_r:
1032   case OpenCLstd_Tgamma:
1033   case OpenCLstd_Pow:
1034   case OpenCLstd_Powr:
1035   case OpenCLstd_Pown:
1036   case OpenCLstd_Rootn:
1037   case OpenCLstd_Remainder:
1038   case OpenCLstd_Remquo:
1039   case OpenCLstd_Hypot:
1040   case OpenCLstd_Sincos:
1041   case OpenCLstd_Modf:
1042   case OpenCLstd_UMad_sat:
1043   case OpenCLstd_SMad_sat:
1044   case OpenCLstd_Native_tan:
1045   case OpenCLstd_Half_cos:
1046   case OpenCLstd_Half_exp:
1047   case OpenCLstd_Half_exp2:
1048   case OpenCLstd_Half_exp10:
1049   case OpenCLstd_Half_log:
1050   case OpenCLstd_Half_log2:
1051   case OpenCLstd_Half_log10:
1052   case OpenCLstd_Half_powr:
1053   case OpenCLstd_Half_sin:
1054   case OpenCLstd_Half_tan:
1055      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056      return true;
1057   case OpenCLstd_Vloadn:
1058   case OpenCLstd_Vload_half:
1059   case OpenCLstd_Vload_halfn:
1060   case OpenCLstd_Vloada_halfn:
1061      vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062      return true;
1063   case OpenCLstd_Vstoren:
1064   case OpenCLstd_Vstore_half:
1065   case OpenCLstd_Vstore_halfn:
1066   case OpenCLstd_Vstorea_halfn:
1067      vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068      return true;
1069   case OpenCLstd_Vstore_half_r:
1070   case OpenCLstd_Vstore_halfn_r:
1071   case OpenCLstd_Vstorea_halfn_r:
1072      vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073      return true;
1074   case OpenCLstd_Shuffle:
1075      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076      return true;
1077   case OpenCLstd_Shuffle2:
1078      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079      return true;
1080   case OpenCLstd_Round:
1081      handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082      return true;
1083   case OpenCLstd_Printf:
1084      handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085      return true;
1086   case OpenCLstd_Prefetch:
1087      /* TODO maybe add a nir instruction for this? */
1088      return true;
1089   default:
1090      vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091      return false;
1092   }
1093}
1094
1095bool
1096vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097                                   const uint32_t *w, unsigned count)
1098{
1099   switch (opcode) {
1100   case SpvOpGroupAsyncCopy:
1101      handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102      return true;
1103   case SpvOpGroupWaitEvents:
1104      handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105      return true;
1106   default:
1107      return false;
1108   }
1109   return true;
1110}
1111