1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2018 Red Hat
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Rob Clark (robdclark@gmail.com)
25b8e80941Smrg */
26b8e80941Smrg
27b8e80941Smrg#include "math.h"
28b8e80941Smrg
29b8e80941Smrg#include "nir/nir_builtin_builder.h"
30b8e80941Smrg
31b8e80941Smrg#include "vtn_private.h"
32b8e80941Smrg#include "OpenCL.std.h"
33b8e80941Smrg
34b8e80941Smrgtypedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b, enum OpenCLstd opcode,
35b8e80941Smrg                                    unsigned num_srcs, nir_ssa_def **srcs,
36b8e80941Smrg                                    const struct glsl_type *dest_type);
37b8e80941Smrg
38b8e80941Smrgstatic void
39b8e80941Smrghandle_instr(struct vtn_builder *b, enum OpenCLstd opcode, const uint32_t *w,
40b8e80941Smrg             unsigned count, nir_handler handler)
41b8e80941Smrg{
42b8e80941Smrg   const struct glsl_type *dest_type =
43b8e80941Smrg      vtn_value(b, w[1], vtn_value_type_type)->type->type;
44b8e80941Smrg
45b8e80941Smrg   unsigned num_srcs = count - 5;
46b8e80941Smrg   nir_ssa_def *srcs[3] = { NULL };
47b8e80941Smrg   vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
48b8e80941Smrg   for (unsigned i = 0; i < num_srcs; i++) {
49b8e80941Smrg      srcs[i] = vtn_ssa_value(b, w[i + 5])->def;
50b8e80941Smrg   }
51b8e80941Smrg
52b8e80941Smrg   nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, dest_type);
53b8e80941Smrg   if (result) {
54b8e80941Smrg      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
55b8e80941Smrg      val->ssa = vtn_create_ssa_value(b, dest_type);
56b8e80941Smrg      val->ssa->def = result;
57b8e80941Smrg   } else {
58b8e80941Smrg      vtn_assert(dest_type == glsl_void_type());
59b8e80941Smrg   }
60b8e80941Smrg}
61b8e80941Smrg
62b8e80941Smrgstatic nir_op
63b8e80941Smrgnir_alu_op_for_opencl_opcode(struct vtn_builder *b, enum OpenCLstd opcode)
64b8e80941Smrg{
65b8e80941Smrg   switch (opcode) {
66b8e80941Smrg   case Fabs: return nir_op_fabs;
67b8e80941Smrg   case SAbs: return nir_op_iabs;
68b8e80941Smrg   case SAdd_sat: return nir_op_iadd_sat;
69b8e80941Smrg   case UAdd_sat: return nir_op_uadd_sat;
70b8e80941Smrg   case Ceil: return nir_op_fceil;
71b8e80941Smrg   case Cos: return nir_op_fcos;
72b8e80941Smrg   case Exp2: return nir_op_fexp2;
73b8e80941Smrg   case Log2: return nir_op_flog2;
74b8e80941Smrg   case Floor: return nir_op_ffloor;
75b8e80941Smrg   case SHadd: return nir_op_ihadd;
76b8e80941Smrg   case UHadd: return nir_op_uhadd;
77b8e80941Smrg   case Fma: return nir_op_ffma;
78b8e80941Smrg   case Fmax: return nir_op_fmax;
79b8e80941Smrg   case SMax: return nir_op_imax;
80b8e80941Smrg   case UMax: return nir_op_umax;
81b8e80941Smrg   case Fmin: return nir_op_fmin;
82b8e80941Smrg   case SMin: return nir_op_imin;
83b8e80941Smrg   case UMin: return nir_op_umin;
84b8e80941Smrg   case Fmod: return nir_op_fmod;
85b8e80941Smrg   case Mix: return nir_op_flrp;
86b8e80941Smrg   case SMul_hi: return nir_op_imul_high;
87b8e80941Smrg   case UMul_hi: return nir_op_umul_high;
88b8e80941Smrg   case Popcount: return nir_op_bit_count;
89b8e80941Smrg   case Pow: return nir_op_fpow;
90b8e80941Smrg   case Remainder: return nir_op_frem;
91b8e80941Smrg   case SRhadd: return nir_op_irhadd;
92b8e80941Smrg   case URhadd: return nir_op_urhadd;
93b8e80941Smrg   case Rsqrt: return nir_op_frsq;
94b8e80941Smrg   case Sign: return nir_op_fsign;
95b8e80941Smrg   case Sin: return nir_op_fsin;
96b8e80941Smrg   case Sqrt: return nir_op_fsqrt;
97b8e80941Smrg   case SSub_sat: return nir_op_isub_sat;
98b8e80941Smrg   case USub_sat: return nir_op_usub_sat;
99b8e80941Smrg   case Trunc: return nir_op_ftrunc;
100b8e80941Smrg   /* uhm... */
101b8e80941Smrg   case UAbs: return nir_op_imov;
102b8e80941Smrg   default:
103b8e80941Smrg      vtn_fail("No NIR equivalent");
104b8e80941Smrg   }
105b8e80941Smrg}
106b8e80941Smrg
107b8e80941Smrgstatic nir_ssa_def *
108b8e80941Smrghandle_alu(struct vtn_builder *b, enum OpenCLstd opcode, unsigned num_srcs,
109b8e80941Smrg           nir_ssa_def **srcs, const struct glsl_type *dest_type)
110b8e80941Smrg{
111b8e80941Smrg   return nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, opcode),
112b8e80941Smrg                        srcs[0], srcs[1], srcs[2], NULL);
113b8e80941Smrg}
114b8e80941Smrg
115b8e80941Smrgstatic nir_ssa_def *
116b8e80941Smrghandle_special(struct vtn_builder *b, enum OpenCLstd opcode, unsigned num_srcs,
117b8e80941Smrg               nir_ssa_def **srcs, const struct glsl_type *dest_type)
118b8e80941Smrg{
119b8e80941Smrg   nir_builder *nb = &b->nb;
120b8e80941Smrg
121b8e80941Smrg   switch (opcode) {
122b8e80941Smrg   case SAbs_diff:
123b8e80941Smrg      return nir_iabs_diff(nb, srcs[0], srcs[1]);
124b8e80941Smrg   case UAbs_diff:
125b8e80941Smrg      return nir_uabs_diff(nb, srcs[0], srcs[1]);
126b8e80941Smrg   case Bitselect:
127b8e80941Smrg      return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
128b8e80941Smrg   case FClamp:
129b8e80941Smrg      return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
130b8e80941Smrg   case SClamp:
131b8e80941Smrg      return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
132b8e80941Smrg   case UClamp:
133b8e80941Smrg      return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
134b8e80941Smrg   case Copysign:
135b8e80941Smrg      return nir_copysign(nb, srcs[0], srcs[1]);
136b8e80941Smrg   case Cross:
137b8e80941Smrg      if (glsl_get_components(dest_type) == 4)
138b8e80941Smrg         return nir_cross4(nb, srcs[0], srcs[1]);
139b8e80941Smrg      return nir_cross3(nb, srcs[0], srcs[1]);
140b8e80941Smrg   case Degrees:
141b8e80941Smrg      return nir_degrees(nb, srcs[0]);
142b8e80941Smrg   case Fdim:
143b8e80941Smrg      return nir_fdim(nb, srcs[0], srcs[1]);
144b8e80941Smrg   case Distance:
145b8e80941Smrg      return nir_distance(nb, srcs[0], srcs[1]);
146b8e80941Smrg   case Fast_distance:
147b8e80941Smrg      return nir_fast_distance(nb, srcs[0], srcs[1]);
148b8e80941Smrg   case Fast_length:
149b8e80941Smrg      return nir_fast_length(nb, srcs[0]);
150b8e80941Smrg   case Fast_normalize:
151b8e80941Smrg      return nir_fast_normalize(nb, srcs[0]);
152b8e80941Smrg   case Length:
153b8e80941Smrg      return nir_length(nb, srcs[0]);
154b8e80941Smrg   case Mad:
155b8e80941Smrg      return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
156b8e80941Smrg   case Maxmag:
157b8e80941Smrg      return nir_maxmag(nb, srcs[0], srcs[1]);
158b8e80941Smrg   case Minmag:
159b8e80941Smrg      return nir_minmag(nb, srcs[0], srcs[1]);
160b8e80941Smrg   case Nan:
161b8e80941Smrg      return nir_nan(nb, srcs[0]);
162b8e80941Smrg   case Nextafter:
163b8e80941Smrg      return nir_nextafter(nb, srcs[0], srcs[1]);
164b8e80941Smrg   case Normalize:
165b8e80941Smrg      return nir_normalize(nb, srcs[0]);
166b8e80941Smrg   case Radians:
167b8e80941Smrg      return nir_radians(nb, srcs[0]);
168b8e80941Smrg   case Rotate:
169b8e80941Smrg      return nir_rotate(nb, srcs[0], srcs[1]);
170b8e80941Smrg   case Smoothstep:
171b8e80941Smrg      return nir_smoothstep(nb, srcs[0], srcs[1], srcs[2]);
172b8e80941Smrg   case Select:
173b8e80941Smrg      return nir_select(nb, srcs[0], srcs[1], srcs[2]);
174b8e80941Smrg   case Step:
175b8e80941Smrg      return nir_sge(nb, srcs[1], srcs[0]);
176b8e80941Smrg   case S_Upsample:
177b8e80941Smrg   case U_Upsample:
178b8e80941Smrg      return nir_upsample(nb, srcs[0], srcs[1]);
179b8e80941Smrg   default:
180b8e80941Smrg      vtn_fail("No NIR equivalent");
181b8e80941Smrg      return NULL;
182b8e80941Smrg   }
183b8e80941Smrg}
184b8e80941Smrg
185b8e80941Smrgstatic void
186b8e80941Smrg_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd opcode,
187b8e80941Smrg                     const uint32_t *w, unsigned count, bool load)
188b8e80941Smrg{
189b8e80941Smrg   struct vtn_type *type;
190b8e80941Smrg   if (load)
191b8e80941Smrg      type = vtn_value(b, w[1], vtn_value_type_type)->type;
192b8e80941Smrg   else
193b8e80941Smrg      type = vtn_untyped_value(b, w[5])->type;
194b8e80941Smrg   unsigned a = load ? 0 : 1;
195b8e80941Smrg
196b8e80941Smrg   const struct glsl_type *dest_type = type->type;
197b8e80941Smrg   unsigned components = glsl_get_vector_elements(dest_type);
198b8e80941Smrg   unsigned stride = components * glsl_get_bit_size(dest_type) / 8;
199b8e80941Smrg
200b8e80941Smrg   nir_ssa_def *offset = vtn_ssa_value(b, w[5 + a])->def;
201b8e80941Smrg   struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
202b8e80941Smrg
203b8e80941Smrg   nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
204b8e80941Smrg
205b8e80941Smrg   /* 1. cast to vec type with adjusted stride */
206b8e80941Smrg   deref = nir_build_deref_cast(&b->nb, &deref->dest.ssa, deref->mode,
207b8e80941Smrg                                dest_type, stride);
208b8e80941Smrg   /* 2. deref ptr_as_array */
209b8e80941Smrg   deref = nir_build_deref_ptr_as_array(&b->nb, deref, offset);
210b8e80941Smrg
211b8e80941Smrg   if (load) {
212b8e80941Smrg      struct vtn_ssa_value *val = vtn_local_load(b, deref, p->type->access);
213b8e80941Smrg      vtn_push_ssa(b, w[2], type, val);
214b8e80941Smrg   } else {
215b8e80941Smrg      struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
216b8e80941Smrg      vtn_local_store(b, val, deref, p->type->access);
217b8e80941Smrg   }
218b8e80941Smrg}
219b8e80941Smrg
220b8e80941Smrgstatic void
221b8e80941Smrgvtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd opcode,
222b8e80941Smrg                        const uint32_t *w, unsigned count)
223b8e80941Smrg{
224b8e80941Smrg   _handle_v_load_store(b, opcode, w, count, true);
225b8e80941Smrg}
226b8e80941Smrg
227b8e80941Smrgstatic void
228b8e80941Smrgvtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd opcode,
229b8e80941Smrg                         const uint32_t *w, unsigned count)
230b8e80941Smrg{
231b8e80941Smrg   _handle_v_load_store(b, opcode, w, count, false);
232b8e80941Smrg}
233b8e80941Smrg
234b8e80941Smrgstatic nir_ssa_def *
235b8e80941Smrghandle_printf(struct vtn_builder *b, enum OpenCLstd opcode, unsigned num_srcs,
236b8e80941Smrg              nir_ssa_def **srcs, const struct glsl_type *dest_type)
237b8e80941Smrg{
238b8e80941Smrg   /* hahah, yeah, right.. */
239b8e80941Smrg   return nir_imm_int(&b->nb, -1);
240b8e80941Smrg}
241b8e80941Smrg
242b8e80941Smrgbool
243b8e80941Smrgvtn_handle_opencl_instruction(struct vtn_builder *b, uint32_t ext_opcode,
244b8e80941Smrg                              const uint32_t *w, unsigned count)
245b8e80941Smrg{
246b8e80941Smrg   switch (ext_opcode) {
247b8e80941Smrg   case Fabs:
248b8e80941Smrg   case SAbs:
249b8e80941Smrg   case UAbs:
250b8e80941Smrg   case SAdd_sat:
251b8e80941Smrg   case UAdd_sat:
252b8e80941Smrg   case Ceil:
253b8e80941Smrg   case Cos:
254b8e80941Smrg   case Exp2:
255b8e80941Smrg   case Log2:
256b8e80941Smrg   case Floor:
257b8e80941Smrg   case Fma:
258b8e80941Smrg   case Fmax:
259b8e80941Smrg   case SHadd:
260b8e80941Smrg   case UHadd:
261b8e80941Smrg   case SMax:
262b8e80941Smrg   case UMax:
263b8e80941Smrg   case Fmin:
264b8e80941Smrg   case SMin:
265b8e80941Smrg   case UMin:
266b8e80941Smrg   case Mix:
267b8e80941Smrg   case Fmod:
268b8e80941Smrg   case SMul_hi:
269b8e80941Smrg   case UMul_hi:
270b8e80941Smrg   case Popcount:
271b8e80941Smrg   case Pow:
272b8e80941Smrg   case Remainder:
273b8e80941Smrg   case SRhadd:
274b8e80941Smrg   case URhadd:
275b8e80941Smrg   case Rsqrt:
276b8e80941Smrg   case Sign:
277b8e80941Smrg   case Sin:
278b8e80941Smrg   case Sqrt:
279b8e80941Smrg   case SSub_sat:
280b8e80941Smrg   case USub_sat:
281b8e80941Smrg   case Trunc:
282b8e80941Smrg      handle_instr(b, ext_opcode, w, count, handle_alu);
283b8e80941Smrg      return true;
284b8e80941Smrg   case SAbs_diff:
285b8e80941Smrg   case UAbs_diff:
286b8e80941Smrg   case Bitselect:
287b8e80941Smrg   case FClamp:
288b8e80941Smrg   case SClamp:
289b8e80941Smrg   case UClamp:
290b8e80941Smrg   case Copysign:
291b8e80941Smrg   case Cross:
292b8e80941Smrg   case Degrees:
293b8e80941Smrg   case Fdim:
294b8e80941Smrg   case Distance:
295b8e80941Smrg   case Fast_distance:
296b8e80941Smrg   case Fast_length:
297b8e80941Smrg   case Fast_normalize:
298b8e80941Smrg   case Length:
299b8e80941Smrg   case Mad:
300b8e80941Smrg   case Maxmag:
301b8e80941Smrg   case Minmag:
302b8e80941Smrg   case Nan:
303b8e80941Smrg   case Nextafter:
304b8e80941Smrg   case Normalize:
305b8e80941Smrg   case Radians:
306b8e80941Smrg   case Rotate:
307b8e80941Smrg   case Select:
308b8e80941Smrg   case Step:
309b8e80941Smrg   case Smoothstep:
310b8e80941Smrg   case S_Upsample:
311b8e80941Smrg   case U_Upsample:
312b8e80941Smrg      handle_instr(b, ext_opcode, w, count, handle_special);
313b8e80941Smrg      return true;
314b8e80941Smrg   case Vloadn:
315b8e80941Smrg      vtn_handle_opencl_vload(b, ext_opcode, w, count);
316b8e80941Smrg      return true;
317b8e80941Smrg   case Vstoren:
318b8e80941Smrg      vtn_handle_opencl_vstore(b, ext_opcode, w, count);
319b8e80941Smrg      return true;
320b8e80941Smrg   case Printf:
321b8e80941Smrg      handle_instr(b, ext_opcode, w, count, handle_printf);
322b8e80941Smrg      return true;
323b8e80941Smrg   case Prefetch:
324b8e80941Smrg      /* TODO maybe add a nir instruction for this? */
325b8e80941Smrg      return true;
326b8e80941Smrg   default:
327b8e80941Smrg      vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
328b8e80941Smrg      return false;
329b8e80941Smrg   }
330b8e80941Smrg}
331