1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <stdio.h>
25#include <stdint.h>
26#include <stdexcept>
27#include <vector>
28
29#include <directx/d3d12.h>
30#include <dxgi1_4.h>
31#include <gtest/gtest.h>
32#include <wrl.h>
33
34#include "compute_test.h"
35
36using std::vector;
37
38TEST_F(ComputeTest, runtime_memcpy)
39{
40   struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
41   const char *kernel_source =
42   "struct shift { uchar val; uchar shift; ushort ret; };\n\
43   __kernel void main_test(__global struct shift *inout)\n\
44   {\n\
45      uint id = get_global_id(0);\n\
46      uint id2 = id + get_global_id(1);\n\
47      struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
48      lc[id] = inout[id];\n\
49      inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
50   }\n";
51
52   auto inout = ShaderArg<struct shift>({
53         { 0x10, 1, 0xffff },
54         { 0x20, 2, 0xffff },
55         { 0x30, 3, 0xffff },
56         { 0x40, 4, 0xffff },
57      },
58      SHADER_ARG_INOUT);
59   const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
60   run_shader(kernel_source, inout.size(), 1, 1, inout);
61   for (int i = 0; i < inout.size(); ++i)
62      EXPECT_EQ(inout[i].ret, expected[i]);
63}
64
65TEST_F(ComputeTest, two_global_arrays)
66{
67   const char *kernel_source =
68   "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
69   {\n\
70       uint idx = get_global_id(0);\n\
71       g1[idx] -= g2[idx];\n\
72   }\n";
73   auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
74   auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
75   const uint32_t expected[] = {
76      9, 18, 27, 36
77   };
78
79   run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
80   for (int i = 0; i < g1.size(); ++i)
81      EXPECT_EQ(g1[i], expected[i]);
82}
83
84/* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
85TEST_F(ComputeTest, DISABLED_i64tof32)
86{
87   const char *kernel_source =
88   "__kernel void main_test(__global long *out, __constant long *in)\n\
89   {\n\
90       __local float tmp[12];\n\
91       uint idx = get_global_id(0);\n\
92       tmp[idx] = in[idx];\n\
93       barrier(CLK_LOCAL_MEM_FENCE);\n\
94       out[idx] = tmp[idx + get_global_id(1)];\n\
95   }\n";
96   auto in = ShaderArg<int64_t>({ 0x100000000LL,
97                                  -0x100000000LL,
98                                  0x7fffffffffffffffLL,
99                                  0x4000004000000000LL,
100                                  0x4000003fffffffffLL,
101                                  0x4000004000000001LL,
102                                  -1,
103                                  -0x4000004000000000LL,
104                                  -0x4000003fffffffffLL,
105                                  -0x4000004000000001LL,
106                                  0,
107				  INT64_MIN },
108                                SHADER_ARG_INPUT);
109   auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
110   const int64_t expected[] = {
111      0x100000000LL,
112      -0x100000000LL,
113      0x7fffffffffffffffLL,
114      0x4000000000000000LL,
115      0x4000000000000000LL,
116      0x4000008000000000LL,
117      -1,
118      -0x4000000000000000LL,
119      -0x4000000000000000LL,
120      -0x4000008000000000LL,
121      0,
122      INT64_MIN,
123   };
124
125   run_shader(kernel_source, out.size(), 1, 1, out, in);
126   for (int i = 0; i < out.size(); ++i) {
127      EXPECT_EQ((int64_t)out[i], expected[i]);
128   }
129}
130TEST_F(ComputeTest, two_constant_arrays)
131{
132   const char *kernel_source =
133   "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
134   {\n\
135       uint idx = get_global_id(0);\n\
136       g1[idx] -= c1[idx] + c2[idx];\n\
137   }\n";
138   auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
139   auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
140   auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
141   const uint32_t expected[] = {
142      4, 13, 22, 31
143   };
144
145   run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
146   for (int i = 0; i < g1.size(); ++i)
147      EXPECT_EQ(g1[i], expected[i]);
148}
149
150TEST_F(ComputeTest, null_constant_ptr)
151{
152   const char *kernel_source =
153   "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
154   {\n\
155       __constant uint fallback[] = {2, 3, 4, 5};\n\
156       __constant uint *c = c1 ? c1 : fallback;\n\
157       uint idx = get_global_id(0);\n\
158       g1[idx] -= c[idx];\n\
159   }\n";
160   auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
161   auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
162   const uint32_t expected1[] = {
163      9, 18, 27, 36
164   };
165
166   run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
167   for (int i = 0; i < g1.size(); ++i)
168      EXPECT_EQ(g1[i], expected1[i]);
169
170   const uint32_t expected2[] = {
171      8, 17, 26, 35
172   };
173
174   g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
175   auto c2 = NullShaderArg();
176   run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
177   for (int i = 0; i < g1.size(); ++i)
178      EXPECT_EQ(g1[i], expected2[i]);
179}
180
181/* This test seems to fail on older versions of WARP. */
182TEST_F(ComputeTest, DISABLED_null_global_ptr)
183{
184   const char *kernel_source =
185   "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
186   {\n\
187       __constant uint fallback[] = {2, 3, 4, 5};\n\
188       uint idx = get_global_id(0);\n\
189       g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
190   }\n";
191   auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
192   auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
193   const uint32_t expected1[] = {
194      9, 18, 27, 36
195   };
196
197   run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
198   for (int i = 0; i < g1.size(); ++i)
199      EXPECT_EQ(g1[i], expected1[i]);
200
201   const uint32_t expected2[] = {
202      8, 17, 26, 35
203   };
204
205   g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
206   auto g2null = NullShaderArg();
207   run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
208   for (int i = 0; i < g1.size(); ++i)
209      EXPECT_EQ(g1[i], expected2[i]);
210}
211
212TEST_F(ComputeTest, ret_constant_ptr)
213{
214   struct s { uint64_t ptr; uint32_t val; };
215   const char *kernel_source =
216   "struct s { __constant uint *ptr; uint val; };\n\
217   __kernel void main_test(__global struct s *out, __constant uint *in)\n\
218   {\n\
219       __constant uint foo[] = { 1, 2 };\n\
220       uint idx = get_global_id(0);\n\
221       if (idx == 0)\n\
222          out[idx].ptr = foo;\n\
223       else\n\
224          out[idx].ptr = in;\n\
225       out[idx].val = out[idx].ptr[idx];\n\
226   }\n";
227   auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
228   auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
229   const uint32_t expected_val[] = {
230      1, 4
231   };
232   const uint64_t expected_ptr[] = {
233      2ull << 32, 1ull << 32
234   };
235
236   run_shader(kernel_source, out.size(), 1, 1, out, in);
237   for (int i = 0; i < out.size(); ++i) {
238      EXPECT_EQ(out[i].val, expected_val[i]);
239      EXPECT_EQ(out[i].ptr, expected_ptr[i]);
240   }
241}
242
243TEST_F(ComputeTest, ret_global_ptr)
244{
245   struct s { uint64_t ptr; uint32_t val; };
246   const char *kernel_source =
247   "struct s { __global uint *ptr; uint val; };\n\
248   __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
249   {\n\
250       uint idx = get_global_id(0);\n\
251       out[idx].ptr = idx ? in2 : in1;\n\
252       out[idx].val = out[idx].ptr[idx];\n\
253   }\n";
254   auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
255   auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
256   auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
257   const uint32_t expected_val[] = {
258      1, 4
259   };
260   const uint64_t expected_ptr[] = {
261      1ull << 32, 2ull << 32
262   };
263
264   run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
265   for (int i = 0; i < out.size(); ++i) {
266      EXPECT_EQ(out[i].val, expected_val[i]);
267      EXPECT_EQ(out[i].ptr, expected_ptr[i]);
268   }
269}
270
271TEST_F(ComputeTest, ret_local_ptr)
272{
273   struct s { uint64_t ptr; };
274   const char *kernel_source =
275   "struct s { __local uint *ptr; };\n\
276   __kernel void main_test(__global struct s *out)\n\
277   {\n\
278       __local uint tmp[2];\n\
279       uint idx = get_global_id(0);\n\
280       tmp[idx] = idx;\n\
281       out[idx].ptr = &tmp[idx];\n\
282   }\n";
283   auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
284   const uint64_t expected_ptr[] = {
285      0, 4,
286   };
287
288   run_shader(kernel_source, out.size(), 1, 1, out);
289   for (int i = 0; i < out.size(); ++i) {
290      EXPECT_EQ(out[i].ptr, expected_ptr[i]);
291   }
292}
293
294TEST_F(ComputeTest, ret_private_ptr)
295{
296   struct s { uint64_t ptr; uint32_t value; };
297   const char *kernel_source =
298   "struct s { __private uint *ptr; uint value; };\n\
299   __kernel void main_test(__global struct s *out)\n\
300   {\n\
301       uint tmp[2] = {1, 2};\n\
302       uint idx = get_global_id(0);\n\
303       out[idx].ptr = &tmp[idx];\n\
304       out[idx].value = *out[idx].ptr;\n\
305   }\n";
306   auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
307   const uint64_t expected_ptr[] = {
308      0, 4,
309   };
310   const uint32_t expected_value[] = {
311      1, 2
312   };
313
314   run_shader(kernel_source, out.size(), 1, 1, out);
315   for (int i = 0; i < out.size(); ++i) {
316      EXPECT_EQ(out[i].ptr, expected_ptr[i]);
317   }
318}
319
320TEST_F(ComputeTest, globals_8bit)
321{
322   const char *kernel_source =
323   "__kernel void main_test(__global unsigned char *inout)\n\
324   {\n\
325       uint idx = get_global_id(0);\n\
326       inout[idx] = inout[idx] + 1;\n\
327   }\n";
328   auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
329   const uint8_t expected[] = {
330      101, 111, 121, 131
331   };
332   run_shader(kernel_source, inout.size(), 1, 1, inout);
333   for (int i = 0; i < inout.size(); ++i)
334      EXPECT_EQ(inout[i], expected[i]);
335}
336
337TEST_F(ComputeTest, globals_16bit)
338{
339   const char *kernel_source =
340   "__kernel void main_test(__global unsigned short *inout)\n\
341   {\n\
342       uint idx = get_global_id(0);\n\
343       inout[idx] = inout[idx] + 1;\n\
344   }\n";
345   auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
346   const uint16_t expected[] = {
347      10001, 10011, 10021, 10031
348   };
349   run_shader(kernel_source, inout.size(), 1, 1, inout);
350   for (int i = 0; i < inout.size(); ++i)
351      EXPECT_EQ(inout[i], expected[i]);
352}
353
354TEST_F(ComputeTest, DISABLED_globals_64bit)
355{
356   /* Test disabled, because we need a fixed version of WARP that hasn't
357      been officially shipped yet */
358
359   const char *kernel_source =
360   "__kernel void main_test(__global unsigned long *inout)\n\
361   {\n\
362       uint idx = get_global_id(0);\n\
363       inout[idx] = inout[idx] + 1;\n\
364   }\n";
365   uint64_t base = 1ull << 50;
366   auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
367                                    SHADER_ARG_INOUT);
368   const uint64_t expected[] = {
369      base + 1, base + 11, base + 21, base + 31
370   };
371   run_shader(kernel_source, inout.size(), 1, 1, inout);
372   for (int i = 0; i < inout.size(); ++i)
373      EXPECT_EQ(inout[i], expected[i]);
374}
375
376TEST_F(ComputeTest, built_ins_global_id)
377{
378   const char *kernel_source =
379   "__kernel void main_test(__global uint *output)\n\
380   {\n\
381       output[get_global_id(0)] = get_global_id(0);\n\
382   }\n";
383   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
384                                     SHADER_ARG_OUTPUT);
385   const uint32_t expected[] = {
386      0, 1, 2, 3
387   };
388
389   run_shader(kernel_source, output.size(), 1, 1, output);
390   for (int i = 0; i < output.size(); ++i)
391      EXPECT_EQ(output[i], expected[i]);
392}
393
394TEST_F(ComputeTest, built_ins_global_id_rmw)
395{
396   const char *kernel_source =
397   "__kernel void main_test(__global uint *output)\n\
398   {\n\
399       uint id = get_global_id(0);\n\
400       output[id] = output[id] * (id + 1);\n\
401   }\n";
402   auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
403                                    SHADER_ARG_INOUT);
404   const uint32_t expected[] = {
405      0x00000001, 0x20000002, 0x00060006, 0x1004080c
406   };
407   run_shader(kernel_source, inout.size(), 1, 1, inout);
408   for (int i = 0; i < inout.size(); ++i)
409      EXPECT_EQ(inout[i], expected[i]);
410}
411
412TEST_F(ComputeTest, types_float_basics)
413{
414   const char *kernel_source =
415   "__kernel void main_test(__global uint *output)\n\
416   {\n\
417       output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
418   }\n";
419   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
420                                     SHADER_ARG_OUTPUT);
421   const uint32_t expected[] = {
422      1, 2, 3, 4
423   };
424   run_shader(kernel_source, output.size(), 1, 1, output);
425   for (int i = 0; i < output.size(); ++i)
426      EXPECT_EQ(output[i], expected[i]);
427}
428
429TEST_F(ComputeTest, DISABLED_types_double_basics)
430{
431   const char *kernel_source =
432   "__kernel void main_test(__global uint *output)\n\
433   {\n\
434       output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
435   }\n";
436   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
437                                     SHADER_ARG_OUTPUT);
438   const uint32_t expected[] = {
439      1, 2, 3, 4
440   };
441   run_shader(kernel_source, output.size(), 1, 1, output);
442   for (int i = 0; i < output.size(); ++i)
443      EXPECT_EQ(output[i], expected[i]);
444}
445
446TEST_F(ComputeTest, types_short_basics)
447{
448   const char *kernel_source =
449   "__kernel void main_test(__global uint *output)\n\
450   {\n\
451       output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
452   }\n";
453   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
454                                     SHADER_ARG_OUTPUT);
455   const uint32_t expected[] = {
456      1, 2, 3, 4
457   };
458   run_shader(kernel_source, output.size(), 1, 1, output);
459   for (int i = 0; i < output.size(); ++i)
460      EXPECT_EQ(output[i], expected[i]);
461}
462
463TEST_F(ComputeTest, types_char_basics)
464{
465   const char *kernel_source =
466   "__kernel void main_test(__global uint *output)\n\
467   {\n\
468       output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
469   }\n";
470   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
471                                     SHADER_ARG_OUTPUT);
472   const uint32_t expected[] = {
473      1, 2, 3, 4
474   };
475   run_shader(kernel_source, output.size(), 1, 1, output);
476   for (int i = 0; i < output.size(); ++i)
477      EXPECT_EQ(output[i], expected[i]);
478}
479
480TEST_F(ComputeTest, types_if_statement)
481{
482   const char *kernel_source =
483   "__kernel void main_test(__global uint *output)\n\
484   {\n\
485       int idx = get_global_id(0);\n\
486       if (idx > 0)\n\
487           output[idx] = ~idx;\n\
488       else\n\
489           output[0] = 0xff;\n\
490   }\n";
491   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
492                                     SHADER_ARG_OUTPUT);
493   const uint32_t expected[] = {
494      0xff, ~1u, ~2u, ~3u
495   };
496   run_shader(kernel_source, output.size(), 1, 1, output);
497   for (int i = 0; i < output.size(); ++i)
498      EXPECT_EQ(output[i], expected[i]);
499}
500
501TEST_F(ComputeTest, types_do_while_loop)
502{
503   const char *kernel_source =
504   "__kernel void main_test(__global uint *output)\n\
505   {\n\
506       int value = 1;\n\
507       int i = 1, n = get_global_id(0);\n\
508       do {\n\
509          value *= i++;\n\
510       } while (i <= n);\n\
511       output[n] = value;\n\
512   }\n";
513   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
514                                     SHADER_ARG_OUTPUT);
515   const uint32_t expected[] = {
516      1, 1, 1*2, 1*2*3, 1*2*3*4
517   };
518   run_shader(kernel_source, output.size(), 1, 1, output);
519   for (int i = 0; i < output.size(); ++i)
520      EXPECT_EQ(output[i], expected[i]);
521}
522
523TEST_F(ComputeTest, types_for_loop)
524{
525   const char *kernel_source =
526   "__kernel void main_test(__global uint *output)\n\
527   {\n\
528       int value = 1;\n\
529       int n = get_global_id(0);\n\
530       for (int i = 1; i <= n; ++i)\n\
531          value *= i;\n\
532       output[n] = value;\n\
533   }\n";
534   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
535                                     SHADER_ARG_OUTPUT);
536   const uint32_t expected[] = {
537      1, 1, 1*2, 1*2*3, 1*2*3*4
538   };
539   run_shader(kernel_source, output.size(), 1, 1, output);
540   for (int i = 0; i < output.size(); ++i)
541      EXPECT_EQ(output[i], expected[i]);
542}
543
544TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)
545{
546   const char *kernel_source =
547   "__kernel void main_test(__global ulong *inout)\n\
548   {\n\
549      ushort tmp[] = {\n\
550         get_global_id(1) + 0x00000000,\n\
551         get_global_id(1) + 0x10000001,\n\
552         get_global_id(1) + 0x20000020,\n\
553         get_global_id(1) + 0x30000300,\n\
554      };\n\
555      uint idx = get_global_id(0);\n\
556      inout[idx] = tmp[idx];\n\
557   }\n";
558   auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
559   const uint16_t expected[] = {
560      0x00000000, 0x10000001, 0x20000020, 0x30000300,
561   };
562   run_shader(kernel_source, inout.size(), 1, 1, inout);
563   for (int i = 0; i < inout.size(); ++i)
564      EXPECT_EQ(inout[i], expected[i]);
565}
566
567TEST_F(ComputeTest, complex_types_local_array_short)
568{
569   const char *kernel_source =
570   "__kernel void main_test(__global ushort *inout)\n\
571   {\n\
572      ushort tmp[] = {\n\
573         get_global_id(1) + 0x00,\n\
574         get_global_id(1) + 0x10,\n\
575         get_global_id(1) + 0x20,\n\
576         get_global_id(1) + 0x30,\n\
577      };\n\
578      uint idx = get_global_id(0);\n\
579      inout[idx] = tmp[idx];\n\
580   }\n";
581   auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
582   const uint16_t expected[] = {
583      0x00, 0x10, 0x20, 0x30,
584   };
585   run_shader(kernel_source, inout.size(), 1, 1, inout);
586   for (int i = 0; i < inout.size(); ++i)
587      EXPECT_EQ(inout[i], expected[i]);
588}
589
590TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
591{
592   const char *kernel_source =
593   "struct has_vecs { uchar c; ushort s; float2 f; };\n\
594   __kernel void main_test(__global uint *inout)\n\
595   {\n\
596      struct has_vecs tmp[] = {\n\
597         { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
598         { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
599         { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
600         { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
601      };\n\
602      uint idx = get_global_id(0);\n\
603      uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
604      inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
605   }\n";
606   auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
607   const uint16_t expected[] = { 101, 404, 909, 1616 };
608   run_shader(kernel_source, inout.size(), 1, 1, inout);
609   for (int i = 0; i < inout.size(); ++i)
610      EXPECT_EQ(inout[i], expected[i]);
611}
612
613TEST_F(ComputeTest, complex_types_local_array)
614{
615   const char *kernel_source =
616   "__kernel void main_test(__global uint *inout)\n\
617   {\n\
618      uint tmp[] = {\n\
619         get_global_id(1) + 0x00,\n\
620         get_global_id(1) + 0x10,\n\
621         get_global_id(1) + 0x20,\n\
622         get_global_id(1) + 0x30,\n\
623      };\n\
624      uint idx = get_global_id(0);\n\
625      inout[idx] = tmp[idx];\n\
626   }\n";
627   auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
628   const uint32_t expected[] = {
629      0x00, 0x10, 0x20, 0x30,
630   };
631   run_shader(kernel_source, inout.size(), 1, 1, inout);
632   for (int i = 0; i < inout.size(); ++i)
633      EXPECT_EQ(inout[i], expected[i]);
634}
635
636TEST_F(ComputeTest, complex_types_global_struct_array)
637{
638   struct two_vals { uint32_t add; uint32_t mul; };
639   const char *kernel_source =
640   "struct two_vals { uint add; uint mul; };\n\
641   __kernel void main_test(__global struct two_vals *in_out)\n\
642   {\n\
643      uint id = get_global_id(0);\n\
644      in_out[id].add = in_out[id].add + id;\n\
645      in_out[id].mul = in_out[id].mul * id;\n\
646   }\n";
647   auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
648                                           SHADER_ARG_INOUT);
649   const struct two_vals expected[] = {
650      { 8 + 0, 8 * 0 },
651      { 16 + 1, 16 * 1 },
652      { 64 + 2, 64 * 2 },
653      { 65536 + 3, 65536 * 3 }
654   };
655   run_shader(kernel_source, inout.size(), 1, 1, inout);
656   for (int i = 0; i < inout.size(); ++i) {
657      EXPECT_EQ(inout[i].add, expected[i].add);
658      EXPECT_EQ(inout[i].mul, expected[i].mul);
659   }
660}
661
662TEST_F(ComputeTest, complex_types_global_uint2)
663{
664   struct uint2 { uint32_t x; uint32_t y; };
665   const char *kernel_source =
666   "__kernel void main_test(__global uint2 *inout)\n\
667   {\n\
668      uint id = get_global_id(0);\n\
669      inout[id].x = inout[id].x + id;\n\
670      inout[id].y = inout[id].y * id;\n\
671   }\n";
672   auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
673                                        SHADER_ARG_INOUT);
674   const struct uint2 expected[] = {
675      { 8 + 0, 8 * 0 },
676      { 16 + 1, 16 * 1 },
677      { 64 + 2, 64 * 2 },
678      { 65536 + 3, 65536 * 3 }
679   };
680   run_shader(kernel_source, inout.size(), 1, 1, inout);
681   for (int i = 0; i < inout.size(); ++i) {
682      EXPECT_EQ(inout[i].x, expected[i].x);
683      EXPECT_EQ(inout[i].y, expected[i].y);
684   }
685}
686
687TEST_F(ComputeTest, complex_types_global_ushort2)
688{
689   struct ushort2 { uint16_t x; uint16_t y; };
690   const char *kernel_source =
691   "__kernel void main_test(__global ushort2 *inout)\n\
692   {\n\
693      uint id = get_global_id(0);\n\
694      inout[id].x = inout[id].x + id;\n\
695      inout[id].y = inout[id].y * id;\n\
696   }\n";
697   auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
698                                            { (uint16_t)65536, (uint16_t)65536 } },
699                                          SHADER_ARG_INOUT);
700   const struct ushort2 expected[] = {
701      { 8 + 0, 8 * 0 },
702      { 16 + 1, 16 * 1 },
703      { 64 + 2, 64 * 2 },
704      { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
705   };
706   run_shader(kernel_source, inout.size(), 1, 1, inout);
707   for (int i = 0; i < inout.size(); ++i) {
708      EXPECT_EQ(inout[i].x, expected[i].x);
709      EXPECT_EQ(inout[i].y, expected[i].y);
710   }
711}
712
713TEST_F(ComputeTest, complex_types_global_uchar3)
714{
715   struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
716   const char *kernel_source =
717   "__kernel void main_test(__global uchar3 *inout)\n\
718   {\n\
719      uint id = get_global_id(0);\n\
720      inout[id].x = inout[id].x + id;\n\
721      inout[id].y = inout[id].y * id;\n\
722      inout[id].z = inout[id].y + inout[id].x;\n\
723   }\n";
724   auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
725                                         SHADER_ARG_INOUT);
726   const struct uchar3 expected[] = {
727      { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
728      { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
729      { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
730      { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
731   };
732   run_shader(kernel_source, inout.size(), 1, 1, inout);
733   for (int i = 0; i < inout.size(); ++i) {
734      EXPECT_EQ(inout[i].x, expected[i].x);
735      EXPECT_EQ(inout[i].y, expected[i].y);
736      EXPECT_EQ(inout[i].z, expected[i].z);
737   }
738}
739
740TEST_F(ComputeTest, complex_types_constant_uchar3)
741{
742   struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
743   const char *kernel_source =
744   "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
745   {\n\
746      uint id = get_global_id(0);\n\
747      out[id].x = in[id].x + id;\n\
748      out[id].y = in[id].y * id;\n\
749      out[id].z = out[id].y + out[id].x;\n\
750   }\n";
751   auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
752                                      SHADER_ARG_INPUT);
753   auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
754                                      SHADER_ARG_OUTPUT);
755   const struct uchar3 expected[] = {
756      { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
757      { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
758      { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
759      { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
760   };
761   run_shader(kernel_source, out.size(), 1, 1, out, in);
762   for (int i = 0; i < out.size(); ++i) {
763      EXPECT_EQ(out[i].x, expected[i].x);
764      EXPECT_EQ(out[i].y, expected[i].y);
765      EXPECT_EQ(out[i].z, expected[i].z);
766   }
767}
768
769TEST_F(ComputeTest, complex_types_global_uint8)
770{
771   struct uint8 {
772      uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
773      uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
774   };
775   const char *kernel_source =
776   "__kernel void main_test(__global uint8 *inout)\n\
777   {\n\
778      uint id = get_global_id(0);\n\
779      inout[id].s01234567 = inout[id].s01234567 * 2;\n\
780   }\n";
781   auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
782                                        SHADER_ARG_INOUT);
783   const struct uint8 expected[] = {
784      { 2, 4, 6, 8, 10, 12, 14, 16 }
785   };
786   run_shader(kernel_source, inout.size(), 1, 1, inout);
787   for (int i = 0; i < inout.size(); ++i) {
788      EXPECT_EQ(inout[i].s0, expected[i].s0);
789      EXPECT_EQ(inout[i].s1, expected[i].s1);
790      EXPECT_EQ(inout[i].s2, expected[i].s2);
791      EXPECT_EQ(inout[i].s3, expected[i].s3);
792      EXPECT_EQ(inout[i].s4, expected[i].s4);
793      EXPECT_EQ(inout[i].s5, expected[i].s5);
794      EXPECT_EQ(inout[i].s6, expected[i].s6);
795      EXPECT_EQ(inout[i].s7, expected[i].s7);
796   }
797}
798
799TEST_F(ComputeTest, complex_types_local_ulong16)
800{
801   struct ulong16 {
802      uint64_t values[16];
803   };
804   const char *kernel_source =
805   R"(__kernel void main_test(__global ulong16 *inout)
806   {
807      __local ulong16 local_array[2];
808      uint id = get_global_id(0);
809      local_array[id] = inout[id];
810      barrier(CLK_LOCAL_MEM_FENCE);
811      inout[id] = local_array[0] * 2;
812   })";
813   auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
814                                        SHADER_ARG_INOUT);
815   const struct ulong16 expected[] = {
816      { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
817   };
818   run_shader(kernel_source, inout.size(), 1, 1, inout);
819   for (int i = 0; i < inout.size(); ++i) {
820      for (int j = 0; j < 16; ++j) {
821         EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
822      }
823   }
824}
825
826TEST_F(ComputeTest, complex_types_constant_uint8)
827{
828   struct uint8 {
829      uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
830      uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
831   };
832   const char *kernel_source =
833   "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
834   {\n\
835      uint id = get_global_id(0);\n\
836      out[id].s01234567 = in[id].s01234567 * 2;\n\
837   }\n";
838   auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
839                                     SHADER_ARG_INPUT);
840   auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
841                                      SHADER_ARG_INOUT);
842   const struct uint8 expected[] = {
843      { 2, 4, 6, 8, 10, 12, 14, 16 }
844   };
845   run_shader(kernel_source, out.size(), 1, 1, out, in);
846   for (int i = 0; i < out.size(); ++i) {
847      EXPECT_EQ(out[i].s0, expected[i].s0);
848      EXPECT_EQ(out[i].s1, expected[i].s1);
849      EXPECT_EQ(out[i].s2, expected[i].s2);
850      EXPECT_EQ(out[i].s3, expected[i].s3);
851      EXPECT_EQ(out[i].s4, expected[i].s4);
852      EXPECT_EQ(out[i].s5, expected[i].s5);
853      EXPECT_EQ(out[i].s6, expected[i].s6);
854      EXPECT_EQ(out[i].s7, expected[i].s7);
855   }
856}
857
858TEST_F(ComputeTest, DISABLED_complex_types_const_array)
859{
860   /* DISABLED because current release versions of WARP either return
861    * rubbish from reads or crash: they are not prepared to handle
862    * non-float global constants */
863   const char *kernel_source =
864   "__kernel void main_test(__global uint *output)\n\
865   {\n\
866       const uint foo[] = { 100, 101, 102, 103 };\n\
867       output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
868   }\n";
869   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
870                                     SHADER_ARG_OUTPUT);
871   const uint32_t expected[] = {
872      100, 101, 102, 103
873   };
874   run_shader(kernel_source, output.size(), 1, 1, output);
875   for (int i = 0; i < output.size(); ++i)
876      EXPECT_EQ(output[i], expected[i]);
877}
878
879TEST_F(ComputeTest, mem_access_load_store_ordering)
880{
881   const char *kernel_source =
882   "__kernel void main_test(__global uint *output)\n\
883   {\n\
884       uint foo[4];\n\
885       foo[0] = 0x11111111;\n\
886       foo[1] = 0x22222222;\n\
887       foo[2] = 0x44444444;\n\
888       foo[3] = 0x88888888;\n\
889       foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
890       foo[0] += get_global_id(0); // foo[0] = tid\n\
891       foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
892       output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
893   }\n";
894   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
895                                     SHADER_ARG_OUTPUT);
896   const uint16_t expected[] = {
897      0, 1, 2, 3
898   };
899   run_shader(kernel_source, output.size(), 1, 1, output);
900   for (int i = 0; i < output.size(); ++i)
901      EXPECT_EQ(output[i], expected[i]);
902}
903
904TEST_F(ComputeTest, DISABLED_two_const_arrays)
905{
906   /* DISABLED because current release versions of WARP either return
907    * rubbish from reads or crash: they are not prepared to handle
908    * non-float global constants */
909   const char *kernel_source =
910   "__kernel void main_test(__global uint *output)\n\
911   {\n\
912      uint id = get_global_id(0);\n\
913      uint foo[4] = {100, 101, 102, 103};\n\
914      uint bar[4] = {1, 2, 3, 4};\n\
915      output[id] = foo[id] * bar[id];\n\
916   }\n";
917   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
918                                     SHADER_ARG_OUTPUT);
919   const uint32_t expected[] = {
920      100, 202, 306, 412
921   };
922   run_shader(kernel_source, output.size(), 1, 1, output);
923   for (int i = 0; i < output.size(); ++i)
924      EXPECT_EQ(output[i], expected[i]);
925}
926
927TEST_F(ComputeTest, imod_pos)
928{
929   const char *kernel_source =
930   "__kernel void main_test(__global int *inout)\n\
931   {\n\
932       inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
933   }\n";
934   auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
935                                   SHADER_ARG_INOUT);
936   const int32_t expected[] = {
937      -1, 0, -2, -1,  0, 1, 2, 0, 1
938   };
939   run_shader(kernel_source, inout.size(), 1, 1, inout);
940   for (int i = 0; i < inout.size(); ++i)
941      EXPECT_EQ(inout[i], expected[i]);
942}
943
944TEST_F(ComputeTest, imod_neg)
945{
946   const char *kernel_source =
947   "__kernel void main_test(__global int *inout)\n\
948   {\n\
949       inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
950   }\n";
951   auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
952                                   SHADER_ARG_INOUT);
953   const int32_t expected[] = {
954      -1, 0, -2, -1,  0, 1, 2, 0, 1
955   };
956   run_shader(kernel_source, inout.size(), 1, 1, inout);
957   for (int i = 0; i < inout.size(); ++i)
958      EXPECT_EQ(inout[i], expected[i]);
959}
960
961TEST_F(ComputeTest, umod)
962{
963   const char *kernel_source =
964   "__kernel void main_test(__global uint *inout)\n\
965   {\n\
966       inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
967   }\n";
968   auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
969                                    SHADER_ARG_INOUT);
970   const uint32_t expected[] = {
971      0xfffffffa, 0xfffffffb, 0, 1, 2
972   };
973   run_shader(kernel_source, inout.size(), 1, 1, inout);
974   for (int i = 0; i < inout.size(); ++i)
975      EXPECT_EQ(inout[i], expected[i]);
976}
977
978TEST_F(ComputeTest, rotate)
979{
980   const char *kernel_source =
981   "__kernel void main_test(__global uint *inout)\n\
982   {\n\
983       inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
984   }\n";
985   auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
986                                    SHADER_ARG_INOUT);
987   const uint32_t expected[] = {
988      0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
989   };
990   run_shader(kernel_source, inout.size(), 1, 1, inout);
991   for (int i = 0; i < inout.size(); ++i)
992      EXPECT_EQ(inout[i], expected[i]);
993}
994
995TEST_F(ComputeTest, popcount)
996{
997   const char *kernel_source =
998   "__kernel void main_test(__global uint *inout)\n\
999   {\n\
1000       inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
1001   }\n";
1002   auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
1003                                    SHADER_ARG_INOUT);
1004   const uint32_t expected[] = {
1005      0, 1, 2, 2, 4, 32
1006   };
1007   run_shader(kernel_source, inout.size(), 1, 1, inout);
1008   for (int i = 0; i < inout.size(); ++i)
1009      EXPECT_EQ(inout[i], expected[i]);
1010}
1011
1012TEST_F(ComputeTest, hadd)
1013{
1014   const char *kernel_source =
1015   "__kernel void main_test(__global uint *inout)\n\
1016   {\n\
1017       inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1018   }\n";
1019   auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1020                                    SHADER_ARG_INOUT);
1021   const uint32_t expected[] = {
1022      (1u << 31) >> 1,
1023      ((1u << 31) + 1) >> 1,
1024      ((1u << 31) + 2) >> 1,
1025      ((1u << 31) + 3) >> 1,
1026      ((1ull << 31) + 0xfffffffc) >> 1,
1027      ((1ull << 31) + 0xfffffffd) >> 1,
1028      ((1ull << 31) + 0xfffffffe) >> 1,
1029      ((1ull << 31) + 0xffffffff) >> 1,
1030   };
1031   run_shader(kernel_source, inout.size(), 1, 1, inout);
1032   for (int i = 0; i < inout.size(); ++i)
1033      EXPECT_EQ(inout[i], expected[i]);
1034}
1035
1036TEST_F(ComputeTest, rhadd)
1037{
1038   const char *kernel_source =
1039   "__kernel void main_test(__global uint *inout)\n\
1040   {\n\
1041       inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1042   }\n";
1043   auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1044                                    SHADER_ARG_INOUT);
1045   const uint32_t expected[] = {
1046      ((1u << 31) + 1) >> 1,
1047      ((1u << 31) + 2) >> 1,
1048      ((1u << 31) + 3) >> 1,
1049      ((1u << 31) + 4) >> 1,
1050      ((1ull << 31) + 0xfffffffd) >> 1,
1051      ((1ull << 31) + 0xfffffffe) >> 1,
1052      ((1ull << 31) + 0xffffffff) >> 1,
1053      ((1ull << 31) + (1ull << 32)) >> 1,
1054   };
1055   run_shader(kernel_source, inout.size(), 1, 1, inout);
1056   for (int i = 0; i < inout.size(); ++i)
1057      EXPECT_EQ(inout[i], expected[i]);
1058}
1059
1060TEST_F(ComputeTest, add_sat)
1061{
1062   const char *kernel_source =
1063   "__kernel void main_test(__global uint *inout)\n\
1064   {\n\
1065       inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1066   }\n";
1067   auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1068                                    SHADER_ARG_INOUT);
1069   const uint32_t expected[] = {
1070      0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1071   };
1072   run_shader(kernel_source, inout.size(), 1, 1, inout);
1073   for (int i = 0; i < inout.size(); ++i)
1074      EXPECT_EQ(inout[i], expected[i]);
1075}
1076
1077TEST_F(ComputeTest, sub_sat)
1078{
1079   const char *kernel_source =
1080   "__kernel void main_test(__global uint *inout)\n\
1081   {\n\
1082       inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1083   }\n";
1084   auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1085   const uint32_t expected[] = {
1086      0, 0, 0, 1
1087   };
1088   run_shader(kernel_source, inout.size(), 1, 1, inout);
1089   for (int i = 0; i < inout.size(); ++i)
1090      EXPECT_EQ(inout[i], expected[i]);
1091}
1092
1093TEST_F(ComputeTest, mul_hi)
1094{
1095   const char *kernel_source =
1096   "__kernel void main_test(__global uint *inout)\n\
1097   {\n\
1098       inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1099   }\n";
1100   auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1101   const uint32_t expected[] = {
1102      0, 0, 1, 1, (1u << 30)
1103   };
1104   run_shader(kernel_source, inout.size(), 1, 1, inout);
1105   for (int i = 0; i < inout.size(); ++i)
1106      EXPECT_EQ(inout[i], expected[i]);
1107}
1108
1109TEST_F(ComputeTest, ldexp_x)
1110{
1111   const char *kernel_source =
1112   "__kernel void main_test(__global float *inout)\n\
1113   {\n\
1114       inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1115   }\n";
1116   auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1117   const float expected[] = {
1118      ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1119   };
1120   run_shader(kernel_source, inout.size(), 1, 1, inout);
1121   for (int i = 0; i < inout.size(); ++i)
1122      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1123}
1124
1125TEST_F(ComputeTest, ldexp_y)
1126{
1127   const char *kernel_source =
1128   "__kernel void main_test(__global float *inout)\n\
1129   {\n\
1130       inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1131   }\n";
1132   auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1133   const float expected[] = {
1134      ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1135   };
1136   run_shader(kernel_source, inout.size(), 1, 1, inout);
1137   for (int i = 0; i < inout.size(); ++i)
1138      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1139}
1140
1141TEST_F(ComputeTest, frexp_ret)
1142{
1143   const char *kernel_source =
1144   "__kernel void main_test(__global float *inout)\n\
1145   {\n\
1146       int exp;\n\
1147       inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1148   }\n";
1149   auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1150   const float expected[] = {
1151      0.0f, 0.5f, 0.5f, 0.75f
1152   };
1153   run_shader(kernel_source, inout.size(), 1, 1, inout);
1154   for (int i = 0; i < inout.size(); ++i)
1155      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1156}
1157
1158TEST_F(ComputeTest, frexp_exp)
1159{
1160   const char *kernel_source =
1161   "__kernel void main_test(__global float *inout)\n\
1162   {\n\
1163       int exp;\n\
1164       frexp(inout[get_global_id(0)], &exp);\n\
1165       inout[get_global_id(0)] = (float)exp;\n\
1166   }\n";
1167   auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1168   const float expected[] = {
1169      0.0f, 0.0f, 1.0f, 2.0f
1170   };
1171   run_shader(kernel_source, inout.size(), 1, 1, inout);
1172   for (int i = 0; i < inout.size(); ++i)
1173      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1174}
1175
1176TEST_F(ComputeTest, clz)
1177{
1178   const char *kernel_source =
1179   "__kernel void main_test(__global uint *inout)\n\
1180   {\n\
1181       inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1182   }\n";
1183   auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff,  (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1184   const uint32_t expected[] = {
1185      32, 31, 16, 1, 0
1186   };
1187   run_shader(kernel_source, inout.size(), 1, 1, inout);
1188   for (int i = 0; i < inout.size(); ++i)
1189      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1190}
1191
1192TEST_F(ComputeTest, sin)
1193{
1194   struct sin_vals { float in; float clc; float native; };
1195   const char *kernel_source =
1196   "struct sin_vals { float in; float clc; float native; };\n\
1197   __kernel void main_test(__global struct sin_vals *inout)\n\
1198   {\n\
1199       inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1200       inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1201   }\n";
1202   const vector<sin_vals> input = {
1203      { 0.0f, 0.0f, 0.0f },
1204      { 1.0f, 0.0f, 0.0f },
1205      { 2.0f, 0.0f, 0.0f },
1206      { 3.0f, 0.0f, 0.0f },
1207   };
1208   auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1209   const struct sin_vals expected[] = {
1210      { 0.0f, 0.0f,       0.0f       },
1211      { 1.0f, sin(1.0f), sin(1.0f) },
1212      { 2.0f, sin(2.0f), sin(2.0f) },
1213      { 3.0f, sin(3.0f), sin(3.0f) },
1214   };
1215   run_shader(kernel_source, inout.size(), 1, 1, inout);
1216   for (int i = 0; i < inout.size(); ++i) {
1217      EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1218      EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1219      EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1220   }
1221}
1222
1223TEST_F(ComputeTest, DISABLED_cosh)
1224{
1225   /* Disabled because of WARP failures, where we fetch incorrect results when
1226    * sourcing from non-float ICBs */
1227   const char *kernel_source =
1228   "__kernel void main_test(__global float *inout)\n\
1229   {\n\
1230       inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1231   }\n";
1232   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1233   const float expected[] = {
1234      cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1235   };
1236   run_shader(kernel_source, inout.size(), 1, 1, inout);
1237   for (int i = 0; i < inout.size(); ++i)
1238      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1239}
1240
1241TEST_F(ComputeTest, exp)
1242{
1243   const char *kernel_source =
1244   "__kernel void main_test(__global float *inout)\n\
1245   {\n\
1246       inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1247   }\n";
1248   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1249   const float expected[] = {
1250      exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1251   };
1252   run_shader(kernel_source, inout.size(), 1, 1, inout);
1253   for (int i = 0; i < inout.size(); ++i)
1254      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1255}
1256
1257TEST_F(ComputeTest, exp10)
1258{
1259   const char *kernel_source =
1260   "__kernel void main_test(__global float *inout)\n\
1261   {\n\
1262       inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1263   }\n";
1264   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1265   const float expected[] = {
1266      pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1267   };
1268   run_shader(kernel_source, inout.size(), 1, 1, inout);
1269   for (int i = 0; i < inout.size(); ++i)
1270      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1271}
1272
1273TEST_F(ComputeTest, exp2)
1274{
1275   const char *kernel_source =
1276   "__kernel void main_test(__global float *inout)\n\
1277   {\n\
1278       inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1279   }\n";
1280   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1281   const float expected[] = {
1282      pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1283   };
1284   run_shader(kernel_source, inout.size(), 1, 1, inout);
1285   for (int i = 0; i < inout.size(); ++i)
1286      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1287}
1288
1289TEST_F(ComputeTest, log)
1290{
1291   const char *kernel_source =
1292   "__kernel void main_test(__global float *inout)\n\
1293   {\n\
1294       inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1295   }\n";
1296   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1297   const float expected[] = {
1298      log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1299   };
1300   run_shader(kernel_source, inout.size(), 1, 1, inout);
1301   for (int i = 0; i < inout.size(); ++i)
1302      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1303}
1304
1305TEST_F(ComputeTest, log10)
1306{
1307   const char *kernel_source =
1308   "__kernel void main_test(__global float *inout)\n\
1309   {\n\
1310       inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1311   }\n";
1312   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1313   const float expected[] = {
1314      log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1315   };
1316   run_shader(kernel_source, inout.size(), 1, 1, inout);
1317   for (int i = 0; i < inout.size(); ++i)
1318      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1319}
1320
1321TEST_F(ComputeTest, log2)
1322{
1323   const char *kernel_source =
1324   "__kernel void main_test(__global float *inout)\n\
1325   {\n\
1326       inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1327   }\n";
1328   auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1329   const float expected[] = {
1330      log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2)
1331   };
1332   run_shader(kernel_source, inout.size(), 1, 1, inout);
1333   for (int i = 0; i < inout.size(); ++i)
1334      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1335}
1336
1337TEST_F(ComputeTest, rint)
1338{
1339   const char *kernel_source =
1340   "__kernel void main_test(__global float *inout)\n\
1341   {\n\
1342      inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1343   }\n";
1344
1345   auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1346   const float expected[] = {
1347      0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1348   };
1349   run_shader(kernel_source, inout.size(), 1, 1, inout);
1350   for (int i = 0; i < inout.size(); ++i)
1351      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1352}
1353
1354TEST_F(ComputeTest, round)
1355{
1356   const char *kernel_source =
1357   "__kernel void main_test(__global float *inout)\n\
1358   {\n\
1359       inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1360   }\n";
1361   auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1362                                 SHADER_ARG_INOUT);
1363   const float expected[] = {
1364      0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1365   };
1366   run_shader(kernel_source, inout.size(), 1, 1, inout);
1367   for (int i = 0; i < inout.size(); ++i)
1368      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1369}
1370
1371TEST_F(ComputeTest, arg_by_val)
1372{
1373   const char *kernel_source =
1374   "__kernel void main_test(__global float *inout, float mul)\n\
1375   {\n\
1376       inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1377   }\n";
1378   auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1379                                 SHADER_ARG_INOUT);
1380   auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1381   const float expected[] = {
1382      0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1383   };
1384   run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1385   for (int i = 0; i < inout.size(); ++i)
1386      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1387}
1388
1389TEST_F(ComputeTest, uint8_by_val)
1390{
1391   struct uint8 {
1392      uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1393      uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1394   };
1395   const char *kernel_source =
1396   "__kernel void main_test(__global uint *out, uint8 val)\n\
1397   {\n\
1398       out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1399                               val.s4 + val.s5 + val.s6 + val.s7;\n\
1400   }\n";
1401   auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1402   auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1403   const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1404   run_shader(kernel_source, out.size(), 1, 1, out, val);
1405   for (int i = 0; i < out.size(); ++i)
1406      EXPECT_EQ(out[i], expected[i]);
1407}
1408
1409TEST_F(ComputeTest, link)
1410{
1411   const char *foo_src =
1412   "float foo(float in)\n\
1413   {\n\
1414       return in * in;\n\
1415   }\n";
1416   const char *kernel_source =
1417   "float foo(float in);\n\
1418   __kernel void main_test(__global float *inout)\n\
1419   {\n\
1420       inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1421   }\n";
1422   std::vector<const char *> srcs = { foo_src, kernel_source };
1423   auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1424   const float expected[] = {
1425      4.0f,
1426   };
1427   run_shader(srcs, inout.size(), 1, 1, inout);
1428   for (int i = 0; i < inout.size(); ++i)
1429      EXPECT_EQ(inout[i], expected[i]);
1430}
1431
1432TEST_F(ComputeTest, link_library)
1433{
1434   const char *bar_src =
1435   "float bar(float in)\n\
1436   {\n\
1437      return in * 5;\n\
1438   }\n";
1439   const char *foo_src =
1440   "float bar(float in);\n\
1441   float foo(float in)\n\
1442   {\n\
1443       return in * bar(in);\n\
1444   }\n";
1445   const char *kernel_source =
1446   "float foo(float in);\n\
1447   __kernel void main_test(__global float *inout)\n\
1448   {\n\
1449       inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1450   }\n";
1451   std::vector<Shader> libraries = {
1452      compile({ bar_src, kernel_source }, {}, true),
1453      compile({ foo_src }, {}, true)
1454   };
1455   Shader exe = link(libraries);
1456   auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1457   const float expected[] = {
1458      20.0f,
1459   };
1460   run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1461   for (int i = 0; i < inout.size(); ++i)
1462      EXPECT_EQ(inout[i], expected[i]);
1463}
1464
1465TEST_F(ComputeTest, localvar)
1466{
1467   const char *kernel_source =
1468   "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1469   void main_test(__global float *inout)\n\
1470   {\n\
1471      __local float2 tmp[2];\n\
1472      tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1473      tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1474      barrier(CLK_LOCAL_MEM_FENCE);\n\
1475      inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1476   }\n";
1477
1478   auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1479   const float expected[] = {
1480      9.0f, 5.0f
1481   };
1482   run_shader(kernel_source, inout.size(), 1, 1, inout);
1483   for (int i = 0; i < inout.size(); ++i)
1484      EXPECT_EQ(inout[i], expected[i]);
1485}
1486
1487TEST_F(ComputeTest, localvar_uchar2)
1488{
1489   const char *kernel_source =
1490   "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1491   __kernel void main_test(__global uchar *inout)\n\
1492   {\n\
1493      __local uchar2 tmp[2];\n\
1494      tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1495      tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1496      barrier(CLK_LOCAL_MEM_FENCE);\n\
1497      inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1498   }\n";
1499
1500   auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1501   const uint8_t expected[] = { 9, 5 };
1502   run_shader(kernel_source, inout.size(), 1, 1, inout);
1503   for (int i = 0; i < inout.size(); ++i)
1504      EXPECT_EQ(inout[i], expected[i]);
1505}
1506
1507TEST_F(ComputeTest, work_group_size_hint)
1508{
1509   const char *kernel_source =
1510   "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1511   __kernel void main_test(__global uint *output)\n\
1512   {\n\
1513       output[get_global_id(0)] = get_local_id(0);\n\
1514   }\n";
1515   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1516                                     SHADER_ARG_OUTPUT);
1517   const uint32_t expected[] = {
1518      0, 1, 2, 3
1519   };
1520   run_shader(kernel_source, output.size(), 1, 1, output);
1521   for (int i = 0; i < output.size(); ++i)
1522      EXPECT_EQ(output[i], expected[i]);
1523}
1524
1525TEST_F(ComputeTest, reqd_work_group_size)
1526{
1527   const char *kernel_source =
1528   "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1529   __kernel void main_test(__global uint *output)\n\
1530   {\n\
1531       output[get_global_id(0)] = get_local_id(0);\n\
1532   }\n";
1533   auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1534                                     SHADER_ARG_OUTPUT);
1535   const uint32_t expected[] = {
1536      0, 1, 0, 1
1537   };
1538   run_shader(kernel_source, output.size(), 1, 1, output);
1539   for (int i = 0; i < output.size(); ++i)
1540      EXPECT_EQ(output[i], expected[i]);
1541}
1542
1543TEST_F(ComputeTest, image)
1544{
1545   const char* kernel_source =
1546   "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1547   {\n\
1548      int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1549      write_imagef(output, coords, read_imagef(input, coords));\n\
1550   }\n";
1551   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1552   validate(shader);
1553}
1554
1555TEST_F(ComputeTest, image_two_reads)
1556{
1557   const char* kernel_source =
1558   "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
1559   {\n\
1560      if (is_float)\n\
1561         output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
1562      else \n\
1563         output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
1564   }\n";
1565   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1566   validate(shader);
1567}
1568
1569TEST_F(ComputeTest, image_read_write)
1570{
1571   const char *kernel_source =
1572   R"(__kernel void main_test(read_write image2d_t image)
1573   {
1574      int2 coords = (int2)(get_global_id(0), get_global_id(1));
1575      write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1576   })";
1577   Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1578   validate(shader);
1579}
1580
1581TEST_F(ComputeTest, sampler)
1582{
1583   const char* kernel_source =
1584   "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1585   {\n\
1586      output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1587   }\n";
1588   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1589   validate(shader);
1590}
1591
1592TEST_F(ComputeTest, image_dims)
1593{
1594   const char* kernel_source =
1595   "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1596   {\n\
1597      output[get_global_id(0)] = get_image_width(roimage);\n\
1598      output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1599   }\n";
1600   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1601   validate(shader);
1602}
1603
1604TEST_F(ComputeTest, image_format)
1605{
1606   const char* kernel_source =
1607   "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1608   {\n\
1609      output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1610      output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1611   }\n";
1612   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1613   validate(shader);
1614}
1615
1616TEST_F(ComputeTest, image1d_buffer_t)
1617{
1618   const char* kernel_source =
1619   "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1620   {\n\
1621      write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1622   }\n";
1623   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1624   validate(shader);
1625}
1626
1627TEST_F(ComputeTest, local_ptr)
1628{
1629   struct uint2 { uint32_t x, y; };
1630   const char *kernel_source =
1631   "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1632   {\n\
1633      tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1634      tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1635      barrier(CLK_LOCAL_MEM_FENCE);\n\
1636      inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1637   }\n";
1638   auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1639   auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1640   const uint8_t expected[] = { 9, 5 };
1641   run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1642   for (int i = 0; i < inout.size(); ++i)
1643      EXPECT_EQ(inout[i], expected[i]);
1644}
1645
1646TEST_F(ComputeTest, two_local_ptrs)
1647{
1648   struct uint2 { uint32_t x, y; };
1649   const char *kernel_source =
1650   "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1651   {\n\
1652      tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1653      tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1654      tmp2[get_local_id(0)] = get_global_id(0);\n\
1655      barrier(CLK_LOCAL_MEM_FENCE);\n\
1656      inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1657   }\n";
1658   auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1659   auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1660   auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1661   const uint8_t expected[] = { 9, 6 };
1662   run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1663   for (int i = 0; i < inout.size(); ++i)
1664      EXPECT_EQ(inout[i], expected[i]);
1665}
1666
1667TEST_F(ComputeTest, int8_to_float)
1668{
1669   const char *kernel_source =
1670   "__kernel void main_test(__global char* in, __global float* out)\n\
1671   {\n\
1672      uint pos = get_global_id(0);\n\
1673      out[pos] = in[pos] / 100.0f;\n\
1674   }";
1675   auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1676   auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1677   const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1678   run_shader(kernel_source, in.size(), 1, 1, in, out);
1679   for (int i = 0; i < in.size(); ++i)
1680      EXPECT_FLOAT_EQ(out[i], expected[i]);
1681}
1682
1683TEST_F(ComputeTest, vec_hint_float4)
1684{
1685   const char *kernel_source =
1686   "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1687   {\n\
1688      inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1689   }";
1690   Shader shader = compile({ kernel_source });
1691   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1692   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1693}
1694
1695TEST_F(ComputeTest, vec_hint_uchar2)
1696{
1697   const char *kernel_source =
1698   "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1699   {\n\
1700      inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1701   }";
1702   Shader shader = compile({ kernel_source });
1703   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1704   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1705}
1706
1707TEST_F(ComputeTest, vec_hint_none)
1708{
1709   const char *kernel_source =
1710   "__kernel void main_test(__global float *inout)\n\
1711   {\n\
1712      inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1713   }";
1714   Shader shader = compile({ kernel_source });
1715   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1716}
1717
1718TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1719{
1720   const char *kernel_source =
1721   "__kernel void main_test(__global float *inout, float mul)\n\
1722   {\n\
1723       inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1724   }\n";
1725   auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1726                                 SHADER_ARG_INOUT);
1727   auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1728   const float expected[] = {
1729      0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1730   };
1731   ComPtr<ID3D12InfoQueue> info_queue;
1732   dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1733   if (!info_queue) {
1734      GTEST_SKIP() << "No info queue";
1735      return;
1736   }
1737
1738   info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1739   run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1740   for (int i = 0; i < inout.size(); ++i)
1741      EXPECT_FLOAT_EQ(inout[i], expected[i]);
1742}
1743
1744TEST_F(ComputeTest, compiler_defines)
1745{
1746   const char *kernel_source =
1747      "__kernel void main_test(__global int* out)\n\
1748   {\n\
1749      out[0] = OUT_VAL0;\n\
1750      out[1] = __OPENCL_C_VERSION__;\n\
1751   }";
1752   auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1753   CompileArgs compile_args = { 1, 1, 1 };
1754   compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1755   std::vector<RawShaderArg *> raw_args = { &out };
1756   run_shader({ kernel_source }, compile_args, out);
1757   EXPECT_EQ(out[0], 5);
1758   EXPECT_EQ(out[1], 100);
1759}
1760
1761/* There's a bug in WARP turning atomic_add(ptr, x) into
1762 * atomic_add(ptr, x * 4). Works fine on intel HW.
1763 */
1764TEST_F(ComputeTest, DISABLED_global_atomic_add)
1765{
1766   const char *kernel_source =
1767   "__kernel void main_test(__global int *inout, __global int *old)\n\
1768   {\n\
1769      old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1770   }\n";
1771   auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1772   auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1773   const int32_t expected_inout[] = { 5, 7 };
1774   const int32_t expected_old[] = { 2, 4 };
1775   run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1776   for (int i = 0; i < inout.size(); ++i) {
1777      EXPECT_EQ(inout[i], expected_inout[i]);
1778      EXPECT_EQ(old[i], expected_old[i]);
1779   }
1780}
1781
1782TEST_F(ComputeTest, global_atomic_imin)
1783{
1784   const char *kernel_source =
1785   "__kernel void main_test(__global int *inout, __global int *old)\n\
1786   {\n\
1787      old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1788   }\n";
1789   auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1790   auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1791   const int32_t expected_inout[] = { 0, 1, -1 };
1792   const int32_t expected_old[] = { 0, 2, -1 };
1793   run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1794   for (int i = 0; i < inout.size(); ++i) {
1795      EXPECT_EQ(inout[i], expected_inout[i]);
1796      EXPECT_EQ(old[i], expected_old[i]);
1797   }
1798}
1799
1800TEST_F(ComputeTest, global_atomic_and_or)
1801{
1802   const char *kernel_source =
1803   "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1804   __kernel void main_test(__global int *inout)\n\
1805   {\n\
1806      atomic_and(inout, ~(1 << get_global_id(0)));\n\
1807      atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1808   }\n";
1809   auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1810   const int32_t expected[] = { 0x78 };
1811   run_shader(kernel_source, 3, 1, 1, inout);
1812   for (int i = 0; i < inout.size(); ++i)
1813      EXPECT_EQ(inout[i], expected[i]);
1814}
1815
1816TEST_F(ComputeTest, global_atomic_cmpxchg)
1817{
1818   const char *kernel_source =
1819   "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1820   __kernel void main_test(__global int *inout)\n\
1821   {\n\
1822      while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1823         ;\n\
1824   }\n";
1825   auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1826   const int32_t expected_inout[] = { 2 };
1827   run_shader(kernel_source, 2, 1, 1, inout);
1828   for (int i = 0; i < inout.size(); ++i)
1829      EXPECT_EQ(inout[i], expected_inout[i]);
1830}
1831
1832TEST_F(ComputeTest, local_atomic_and_or)
1833{
1834   const char *kernel_source =
1835   "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1836   __kernel void main_test(__global ushort *inout)\n\
1837   {\n\
1838      __local ushort tmp;\n\
1839      atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1840      atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1841      barrier(CLK_LOCAL_MEM_FENCE);\n\
1842      inout[get_global_id(0)] = tmp;\n\
1843   }\n";
1844   auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1845   const uint16_t expected[] = { 0x402, 0x402 };
1846   run_shader(kernel_source, inout.size(), 1, 1, inout);
1847   for (int i = 0; i < inout.size(); ++i)
1848      EXPECT_EQ(inout[i], expected[i]);
1849}
1850
1851TEST_F(ComputeTest, local_atomic_cmpxchg)
1852{
1853   const char *kernel_source =
1854   "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1855   __kernel void main_test(__global int *out)\n\
1856   {\n\
1857      __local uint tmp;\n\
1858      tmp = 0;\n\
1859      barrier(CLK_LOCAL_MEM_FENCE);\n\
1860      while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1861         ;\n\
1862      barrier(CLK_LOCAL_MEM_FENCE);\n\
1863      out[0] = tmp;\n\
1864   }\n";
1865
1866   auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1867   const uint16_t expected[] = { 2 };
1868   run_shader(kernel_source, 2, 1, 1, out);
1869   for (int i = 0; i < out.size(); ++i)
1870      EXPECT_EQ(out[i], expected[i]);
1871}
1872
1873TEST_F(ComputeTest, constant_sampler)
1874{
1875   const char* kernel_source =
1876   "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1877   __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1878   {\n\
1879      int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1880      float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1881      write_imagef(output, coordsi, \n\
1882         read_imagef(input, sampler, coordsf) + \n\
1883         read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1884   }\n";
1885   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1886   validate(shader);
1887   EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1888}
1889
1890TEST_F(ComputeTest, hi)
1891{
1892   const char *kernel_source = R"(
1893   __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1894   {
1895       int  tid = get_global_id(0);
1896
1897       char2 tmp = srcA[tid].hi;
1898       dst[tid] = tmp;
1899   })";
1900   Shader shader = compile(std::vector<const char*>({ kernel_source }));
1901   validate(shader);
1902}
1903
1904TEST_F(ComputeTest, system_values)
1905{
1906   const char *kernel_source =
1907   "__kernel void main_test(__global uint* outputs)\n\
1908   {\n\
1909      outputs[0] = get_work_dim();\n\
1910      outputs[1] = get_global_size(0);\n\
1911      outputs[2] = get_local_size(0);\n\
1912      outputs[3] = get_num_groups(0);\n\
1913      outputs[4] = get_group_id(0);\n\
1914      outputs[5] = get_global_offset(0);\n\
1915      outputs[6] = get_global_id(0);\n\
1916   }\n";
1917   auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1918   const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1919   CompileArgs args = { 1, 1, 1 };
1920   Shader shader = compile({ kernel_source });
1921   run_shader(shader, args, out);
1922   for (int i = 0; i < out.size(); ++i)
1923      EXPECT_EQ(out[i], expected[i]);
1924
1925   args.work_props.work_dim = 2;
1926   args.work_props.global_offset_x = 100;
1927   args.work_props.group_id_offset_x = 2;
1928   args.work_props.group_count_total_x = 5;
1929   const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1930   run_shader(shader, args, out);
1931   for (int i = 0; i < out.size(); ++i)
1932      EXPECT_EQ(out[i], expected_withoffsets[i]);
1933}
1934
1935TEST_F(ComputeTest, convert_round_sat)
1936{
1937   const char *kernel_source =
1938   "__kernel void main_test(__global float *f, __global uchar *u)\n\
1939   {\n\
1940       uint idx = get_global_id(0);\n\
1941       u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1942   }\n";
1943   auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1944   auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1945   const uint8_t expected[] = {
1946      0, 2, 20, 255
1947   };
1948
1949   run_shader(kernel_source, f.size(), 1, 1, f, u);
1950   for (int i = 0; i < u.size(); ++i)
1951      EXPECT_EQ(u[i], expected[i]);
1952}
1953
1954TEST_F(ComputeTest, convert_round_sat_vec)
1955{
1956   const char *kernel_source =
1957   "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
1958   {\n\
1959       uint idx = get_global_id(0);\n\
1960       u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
1961   }\n";
1962   auto f = ShaderArg<float>({
1963      -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1964      -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1965       0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1966      -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1967   }, SHADER_ARG_INPUT);
1968   auto u = ShaderArg<uint8_t>({
1969      255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1970      255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1971      255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1972      255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1973   }, SHADER_ARG_OUTPUT);
1974   const uint8_t expected[] = {
1975      0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1976      0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1977      0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1978      0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1979   };
1980
1981   run_shader(kernel_source, 4, 1, 1, f, u);
1982   for (int i = 0; i < u.size(); ++i)
1983      EXPECT_EQ(u[i], expected[i]);
1984}
1985
1986TEST_F(ComputeTest, convert_char2_uchar2)
1987{
1988   const char *kernel_source =
1989   "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
1990   {\n\
1991      size_t i = get_global_id(0);\n\
1992      dest[i] = convert_uchar2_sat( src[i] );\n\
1993   }\n";
1994
1995   auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
1996   auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
1997   const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
1998   run_shader(kernel_source, 4, 1, 1, c, u);
1999   for (int i = 0; i < u.size(); i++)
2000      EXPECT_EQ(u[i], expected[i]);
2001}
2002
2003TEST_F(ComputeTest, async_copy)
2004{
2005   const char *kernel_source = R"(
2006   __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2007   {
2008    int i;
2009    for(i=0; i<copiesPerWorkItem; i++)
2010        localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2011       barrier( CLK_LOCAL_MEM_FENCE );
2012       event_t event;
2013       event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2014       wait_group_events( 1, &event );
2015    for(i=0; i<copiesPerWorkItem; i++)
2016     dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2017   })";
2018   Shader shader = compile({ kernel_source });
2019   validate(shader);
2020}
2021
2022TEST_F(ComputeTest, packed_struct_global)
2023{
2024#pragma pack(push, 1)
2025   struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2026#pragma pack(pop)
2027
2028   const char *kernel_source =
2029   "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2030   __kernel void main_test(__global struct s *inout, global uint *size)\n\
2031   {\n\
2032       uint idx = get_global_id(0);\n\
2033       inout[idx].uc = idx + 1;\n\
2034       inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2035       inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2036       *size = sizeof(struct s);\n\
2037   }\n";
2038   auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2039   auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2040   const struct s expected[] = {
2041      { 1, 0xfbfcfdff12345678, 0xa112 }
2042   };
2043
2044   run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2045   for (int i = 0; i < inout.size(); ++i) {
2046      EXPECT_EQ(inout[i].uc, expected[i].uc);
2047      EXPECT_EQ(inout[i].ul, expected[i].ul);
2048      EXPECT_EQ(inout[i].us, expected[i].us);
2049   }
2050   EXPECT_EQ(size, sizeof(struct s));
2051}
2052
2053TEST_F(ComputeTest, packed_struct_arg)
2054{
2055#pragma pack(push, 1)
2056   struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2057#pragma pack(pop)
2058
2059   const char *kernel_source =
2060   "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2061   __kernel void main_test(__global struct s *out, struct s in)\n\
2062   {\n\
2063       uint idx = get_global_id(0);\n\
2064       out[idx].uc = in.uc + 0x12;\n\
2065       out[idx].ul = in.ul + 0x123456789abcdef;\n\
2066       out[idx].us = in.us + 0x1234;\n\
2067   }\n";
2068   auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2069   auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2070   const struct s expected[] = {
2071      { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2072   };
2073
2074   run_shader(kernel_source, out.size(), 1, 1, out, in);
2075   for (int i = 0; i < out.size(); ++i) {
2076      EXPECT_EQ(out[i].uc, expected[i].uc);
2077      EXPECT_EQ(out[i].ul, expected[i].ul);
2078      EXPECT_EQ(out[i].us, expected[i].us);
2079   }
2080}
2081
2082TEST_F(ComputeTest, packed_struct_local)
2083{
2084#pragma pack(push, 1)
2085   struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2086#pragma pack(pop)
2087
2088   const char *kernel_source =
2089   "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2090   __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2091   {\n\
2092       uint idx = get_global_id(0);\n\
2093       __local struct s tmp[2];\n\
2094       tmp[get_local_id(0)] = in[idx];\n\
2095       barrier(CLK_LOCAL_MEM_FENCE);\n\
2096       out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2097   }\n";
2098   auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2099   auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2100   const struct s expected[] = {
2101      { 0x12, 0x123456789abcdef, 0x1234 },
2102      { 1, 2, 3 },
2103   };
2104
2105   run_shader(kernel_source, out.size(), 1, 1, out, in);
2106   for (int i = 0; i < out.size(); ++i) {
2107      EXPECT_EQ(out[i].uc, expected[i].uc);
2108      EXPECT_EQ(out[i].ul, expected[i].ul);
2109      EXPECT_EQ(out[i].us, expected[i].us);
2110   }
2111}
2112
2113/* DISABLED because current release versions of WARP either return
2114 * rubbish from reads or crash: they are not prepared to handle
2115 * non-float global constants */
2116TEST_F(ComputeTest, DISABLED_packed_struct_const)
2117{
2118#pragma pack(push, 1)
2119   struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2120#pragma pack(pop)
2121
2122   const char *kernel_source =
2123   "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2124   __kernel void main_test(__global struct s *out, struct s in)\n\
2125   {\n\
2126       __constant struct s base[] = {\n\
2127          {0x12, 0x123456789abcdef, 0x1234},\n\
2128          {0x11, 0x123456789abcdee, 0x1233},\n\
2129       };\n\
2130       uint idx = get_global_id(0);\n\
2131       out[idx].uc = base[idx % 2].uc + in.uc;\n\
2132       out[idx].ul = base[idx % 2].ul + in.ul;\n\
2133       out[idx].us = base[idx % 2].us + in.us;\n\
2134   }\n";
2135   auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2136   auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2137   const struct s expected[] = {
2138      { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2139      { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2140   };
2141
2142   run_shader(kernel_source, out.size(), 1, 1, out, in);
2143   for (int i = 0; i < out.size(); ++i) {
2144      EXPECT_EQ(out[i].uc, expected[i].uc);
2145      EXPECT_EQ(out[i].ul, expected[i].ul);
2146      EXPECT_EQ(out[i].us, expected[i].us);
2147   }
2148}
2149
2150TEST_F(ComputeTest, DISABLED_printf)
2151{
2152   const char *kernel_source = R"(
2153   __kernel void main_test(__global float *src, __global uint *dest)
2154   {
2155      __constant char *format_str = "%s: %f";
2156      __constant char *str_val = "Test";
2157      *dest = printf(format_str, str_val, src[0]);
2158   })";
2159
2160   auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2161   auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2162   run_shader(kernel_source, 1, 1, 1, src, dest);
2163   EXPECT_EQ(dest[0], 0);
2164}
2165
2166TEST_F(ComputeTest, vload_half)
2167{
2168   const char *kernel_source = R"(
2169   __kernel void main_test(__global half *src, __global float4 *dest)
2170   {
2171      int offset = get_global_id(0);
2172      dest[offset] = vload_half4(offset, src);
2173   })";
2174   auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2175                                    0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2176   auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2177                                  FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2178   run_shader(kernel_source, 2, 1, 1, src, dest);
2179   for (unsigned i = 0; i < 8; ++i)
2180      EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2181}
2182
2183TEST_F(ComputeTest, vstore_half)
2184{
2185   const char *kernel_source = R"(
2186   __kernel void main_test(__global half *dst, __global float4 *src)
2187   {
2188      int offset = get_global_id(0);
2189      vstore_half4(src[offset], offset, dst);
2190   })";
2191   auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2192                                   0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2193   auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2194                                  5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2195   run_shader(kernel_source, 2, 1, 1, dest, src);
2196   const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2197                                 0x4500, 0x4600, 0x4700, 0x4800 };
2198   for (unsigned i = 0; i < 8; ++i)
2199      EXPECT_EQ(dest[i], expected[i]);
2200}
2201
2202TEST_F(ComputeTest, inline_function)
2203{
2204   const char *kernel_source = R"(
2205   inline float helper(float foo)
2206   {
2207      return foo * 2;
2208   }
2209
2210   __kernel void main_test(__global float *dst, __global float *src)
2211   {
2212      *dst = helper(*src);
2213   })";
2214   auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2215   auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2216   run_shader(kernel_source, 1, 1, 1, dest, src);
2217   EXPECT_EQ(dest[0], 2.0f);
2218}
2219
2220TEST_F(ComputeTest, unused_arg)
2221{
2222   const char *kernel_source = R"(
2223   __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2224   {
2225      int i = get_global_id(0);
2226      dst[i] = src[i];
2227   })";
2228   auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2229   auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2230   auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2231   run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2232   for (int i = 0; i < 4; ++i)
2233      EXPECT_EQ(dest[i], i + 1);
2234}
2235
2236TEST_F(ComputeTest, spec_constant)
2237{
2238   const char *spirv_asm = R"(
2239               OpCapability Addresses
2240               OpCapability Kernel
2241               OpCapability Int64
2242          %1 = OpExtInstImport "OpenCL.std"
2243               OpMemoryModel Physical64 OpenCL
2244               OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2245          %4 = OpString "kernel_arg_type.main_test.uint*,"
2246               OpSource OpenCL_C 102000
2247               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2248               OpName %output "output"
2249               OpName %entry "entry"
2250               OpName %output_addr "output.addr"
2251               OpName %id "id"
2252               OpName %call "call"
2253               OpName %conv "conv"
2254               OpName %idxprom "idxprom"
2255               OpName %arrayidx "arrayidx"
2256               OpName %add "add"
2257               OpName %mul "mul"
2258               OpName %idxprom1 "idxprom1"
2259               OpName %arrayidx2 "arrayidx2"
2260               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2261               OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2262               OpDecorate %id Alignment 4
2263               OpDecorate %output_addr Alignment 8
2264               OpDecorate %uint_1 SpecId 1
2265      %ulong = OpTypeInt 64 0
2266       %uint = OpTypeInt 32 0
2267     %uint_1 = OpSpecConstant %uint 1
2268    %v3ulong = OpTypeVector %ulong 3
2269%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2270       %void = OpTypeVoid
2271%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2272         %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2273%_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2274%_ptr_Function_uint = OpTypePointer Function %uint
2275%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2276          %2 = OpFunction %void DontInline %24
2277     %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2278      %entry = OpLabel
2279%output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2280         %id = OpVariable %_ptr_Function_uint Function
2281               OpStore %output_addr %output Aligned 8
2282         %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2283       %call = OpCompositeExtract %ulong %27 0
2284       %conv = OpUConvert %uint %call
2285               OpStore %id %conv Aligned 4
2286         %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2287         %29 = OpLoad %uint %id Aligned 4
2288    %idxprom = OpUConvert %ulong %29
2289   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2290         %30 = OpLoad %uint %arrayidx Aligned 4
2291         %31 = OpLoad %uint %id Aligned 4
2292        %add = OpIAdd %uint %31 %uint_1
2293        %mul = OpIMul %uint %30 %add
2294         %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2295         %33 = OpLoad %uint %id Aligned 4
2296   %idxprom1 = OpUConvert %ulong %33
2297  %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2298               OpStore %arrayidx2 %mul Aligned 4
2299               OpReturn
2300               OpFunctionEnd)";
2301   Shader shader = assemble(spirv_asm);
2302   Shader spec_shader = specialize(shader, 1, 5);
2303
2304   auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2305      SHADER_ARG_INOUT);
2306   const uint32_t expected[] = {
2307      0x00000005, 0x60000006, 0x000e000e, 0x20081018
2308   };
2309   CompileArgs args = { inout.size(), 1, 1 };
2310   run_shader(spec_shader, args, inout);
2311   for (int i = 0; i < inout.size(); ++i)
2312      EXPECT_EQ(inout[i], expected[i]);
2313}
2314