1/* 2 * Copyright © Microsoft Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <stdio.h> 25#include <stdint.h> 26#include <stdexcept> 27#include <vector> 28 29#include <directx/d3d12.h> 30#include <dxgi1_4.h> 31#include <gtest/gtest.h> 32#include <wrl.h> 33 34#include "compute_test.h" 35 36using std::vector; 37 38TEST_F(ComputeTest, runtime_memcpy) 39{ 40 struct shift { uint8_t val; uint8_t shift; uint16_t ret; }; 41 const char *kernel_source = 42 "struct shift { uchar val; uchar shift; ushort ret; };\n\ 43 __kernel void main_test(__global struct shift *inout)\n\ 44 {\n\ 45 uint id = get_global_id(0);\n\ 46 uint id2 = id + get_global_id(1);\n\ 47 struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\ 48 lc[id] = inout[id];\n\ 49 inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\ 50 }\n"; 51 52 auto inout = ShaderArg<struct shift>({ 53 { 0x10, 1, 0xffff }, 54 { 0x20, 2, 0xffff }, 55 { 0x30, 3, 0xffff }, 56 { 0x40, 4, 0xffff }, 57 }, 58 SHADER_ARG_INOUT); 59 const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 }; 60 run_shader(kernel_source, inout.size(), 1, 1, inout); 61 for (int i = 0; i < inout.size(); ++i) 62 EXPECT_EQ(inout[i].ret, expected[i]); 63} 64 65TEST_F(ComputeTest, two_global_arrays) 66{ 67 const char *kernel_source = 68 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\ 69 {\n\ 70 uint idx = get_global_id(0);\n\ 71 g1[idx] -= g2[idx];\n\ 72 }\n"; 73 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT); 74 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT); 75 const uint32_t expected[] = { 76 9, 18, 27, 36 77 }; 78 79 run_shader(kernel_source, g1.size(), 1, 1, g1, g2); 80 for (int i = 0; i < g1.size(); ++i) 81 EXPECT_EQ(g1[i], expected[i]); 82} 83 84/* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */ 85TEST_F(ComputeTest, DISABLED_i64tof32) 86{ 87 const char *kernel_source = 88 "__kernel void main_test(__global long *out, __constant long *in)\n\ 89 {\n\ 90 __local float tmp[12];\n\ 91 uint idx = get_global_id(0);\n\ 92 tmp[idx] = in[idx];\n\ 93 barrier(CLK_LOCAL_MEM_FENCE);\n\ 94 out[idx] = tmp[idx + get_global_id(1)];\n\ 95 }\n"; 96 auto in = ShaderArg<int64_t>({ 0x100000000LL, 97 -0x100000000LL, 98 0x7fffffffffffffffLL, 99 0x4000004000000000LL, 100 0x4000003fffffffffLL, 101 0x4000004000000001LL, 102 -1, 103 -0x4000004000000000LL, 104 -0x4000003fffffffffLL, 105 -0x4000004000000001LL, 106 0, 107 INT64_MIN }, 108 SHADER_ARG_INPUT); 109 auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT); 110 const int64_t expected[] = { 111 0x100000000LL, 112 -0x100000000LL, 113 0x7fffffffffffffffLL, 114 0x4000000000000000LL, 115 0x4000000000000000LL, 116 0x4000008000000000LL, 117 -1, 118 -0x4000000000000000LL, 119 -0x4000000000000000LL, 120 -0x4000008000000000LL, 121 0, 122 INT64_MIN, 123 }; 124 125 run_shader(kernel_source, out.size(), 1, 1, out, in); 126 for (int i = 0; i < out.size(); ++i) { 127 EXPECT_EQ((int64_t)out[i], expected[i]); 128 } 129} 130TEST_F(ComputeTest, two_constant_arrays) 131{ 132 const char *kernel_source = 133 "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\ 134 {\n\ 135 uint idx = get_global_id(0);\n\ 136 g1[idx] -= c1[idx] + c2[idx];\n\ 137 }\n"; 138 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT); 139 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT); 140 auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT); 141 const uint32_t expected[] = { 142 4, 13, 22, 31 143 }; 144 145 run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2); 146 for (int i = 0; i < g1.size(); ++i) 147 EXPECT_EQ(g1[i], expected[i]); 148} 149 150TEST_F(ComputeTest, null_constant_ptr) 151{ 152 const char *kernel_source = 153 "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\ 154 {\n\ 155 __constant uint fallback[] = {2, 3, 4, 5};\n\ 156 __constant uint *c = c1 ? c1 : fallback;\n\ 157 uint idx = get_global_id(0);\n\ 158 g1[idx] -= c[idx];\n\ 159 }\n"; 160 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT); 161 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT); 162 const uint32_t expected1[] = { 163 9, 18, 27, 36 164 }; 165 166 run_shader(kernel_source, g1.size(), 1, 1, g1, c1); 167 for (int i = 0; i < g1.size(); ++i) 168 EXPECT_EQ(g1[i], expected1[i]); 169 170 const uint32_t expected2[] = { 171 8, 17, 26, 35 172 }; 173 174 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT); 175 auto c2 = NullShaderArg(); 176 run_shader(kernel_source, g1.size(), 1, 1, g1, c2); 177 for (int i = 0; i < g1.size(); ++i) 178 EXPECT_EQ(g1[i], expected2[i]); 179} 180 181/* This test seems to fail on older versions of WARP. */ 182TEST_F(ComputeTest, DISABLED_null_global_ptr) 183{ 184 const char *kernel_source = 185 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\ 186 {\n\ 187 __constant uint fallback[] = {2, 3, 4, 5};\n\ 188 uint idx = get_global_id(0);\n\ 189 g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\ 190 }\n"; 191 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT); 192 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT); 193 const uint32_t expected1[] = { 194 9, 18, 27, 36 195 }; 196 197 run_shader(kernel_source, g1.size(), 1, 1, g1, g2); 198 for (int i = 0; i < g1.size(); ++i) 199 EXPECT_EQ(g1[i], expected1[i]); 200 201 const uint32_t expected2[] = { 202 8, 17, 26, 35 203 }; 204 205 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT); 206 auto g2null = NullShaderArg(); 207 run_shader(kernel_source, g1.size(), 1, 1, g1, g2null); 208 for (int i = 0; i < g1.size(); ++i) 209 EXPECT_EQ(g1[i], expected2[i]); 210} 211 212TEST_F(ComputeTest, ret_constant_ptr) 213{ 214 struct s { uint64_t ptr; uint32_t val; }; 215 const char *kernel_source = 216 "struct s { __constant uint *ptr; uint val; };\n\ 217 __kernel void main_test(__global struct s *out, __constant uint *in)\n\ 218 {\n\ 219 __constant uint foo[] = { 1, 2 };\n\ 220 uint idx = get_global_id(0);\n\ 221 if (idx == 0)\n\ 222 out[idx].ptr = foo;\n\ 223 else\n\ 224 out[idx].ptr = in;\n\ 225 out[idx].val = out[idx].ptr[idx];\n\ 226 }\n"; 227 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT); 228 auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT); 229 const uint32_t expected_val[] = { 230 1, 4 231 }; 232 const uint64_t expected_ptr[] = { 233 2ull << 32, 1ull << 32 234 }; 235 236 run_shader(kernel_source, out.size(), 1, 1, out, in); 237 for (int i = 0; i < out.size(); ++i) { 238 EXPECT_EQ(out[i].val, expected_val[i]); 239 EXPECT_EQ(out[i].ptr, expected_ptr[i]); 240 } 241} 242 243TEST_F(ComputeTest, ret_global_ptr) 244{ 245 struct s { uint64_t ptr; uint32_t val; }; 246 const char *kernel_source = 247 "struct s { __global uint *ptr; uint val; };\n\ 248 __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\ 249 {\n\ 250 uint idx = get_global_id(0);\n\ 251 out[idx].ptr = idx ? in2 : in1;\n\ 252 out[idx].val = out[idx].ptr[idx];\n\ 253 }\n"; 254 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT); 255 auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT); 256 auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT); 257 const uint32_t expected_val[] = { 258 1, 4 259 }; 260 const uint64_t expected_ptr[] = { 261 1ull << 32, 2ull << 32 262 }; 263 264 run_shader(kernel_source, out.size(), 1, 1, out, in1, in2); 265 for (int i = 0; i < out.size(); ++i) { 266 EXPECT_EQ(out[i].val, expected_val[i]); 267 EXPECT_EQ(out[i].ptr, expected_ptr[i]); 268 } 269} 270 271TEST_F(ComputeTest, ret_local_ptr) 272{ 273 struct s { uint64_t ptr; }; 274 const char *kernel_source = 275 "struct s { __local uint *ptr; };\n\ 276 __kernel void main_test(__global struct s *out)\n\ 277 {\n\ 278 __local uint tmp[2];\n\ 279 uint idx = get_global_id(0);\n\ 280 tmp[idx] = idx;\n\ 281 out[idx].ptr = &tmp[idx];\n\ 282 }\n"; 283 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT); 284 const uint64_t expected_ptr[] = { 285 0, 4, 286 }; 287 288 run_shader(kernel_source, out.size(), 1, 1, out); 289 for (int i = 0; i < out.size(); ++i) { 290 EXPECT_EQ(out[i].ptr, expected_ptr[i]); 291 } 292} 293 294TEST_F(ComputeTest, ret_private_ptr) 295{ 296 struct s { uint64_t ptr; uint32_t value; }; 297 const char *kernel_source = 298 "struct s { __private uint *ptr; uint value; };\n\ 299 __kernel void main_test(__global struct s *out)\n\ 300 {\n\ 301 uint tmp[2] = {1, 2};\n\ 302 uint idx = get_global_id(0);\n\ 303 out[idx].ptr = &tmp[idx];\n\ 304 out[idx].value = *out[idx].ptr;\n\ 305 }\n"; 306 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT); 307 const uint64_t expected_ptr[] = { 308 0, 4, 309 }; 310 const uint32_t expected_value[] = { 311 1, 2 312 }; 313 314 run_shader(kernel_source, out.size(), 1, 1, out); 315 for (int i = 0; i < out.size(); ++i) { 316 EXPECT_EQ(out[i].ptr, expected_ptr[i]); 317 } 318} 319 320TEST_F(ComputeTest, globals_8bit) 321{ 322 const char *kernel_source = 323 "__kernel void main_test(__global unsigned char *inout)\n\ 324 {\n\ 325 uint idx = get_global_id(0);\n\ 326 inout[idx] = inout[idx] + 1;\n\ 327 }\n"; 328 auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT); 329 const uint8_t expected[] = { 330 101, 111, 121, 131 331 }; 332 run_shader(kernel_source, inout.size(), 1, 1, inout); 333 for (int i = 0; i < inout.size(); ++i) 334 EXPECT_EQ(inout[i], expected[i]); 335} 336 337TEST_F(ComputeTest, globals_16bit) 338{ 339 const char *kernel_source = 340 "__kernel void main_test(__global unsigned short *inout)\n\ 341 {\n\ 342 uint idx = get_global_id(0);\n\ 343 inout[idx] = inout[idx] + 1;\n\ 344 }\n"; 345 auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT); 346 const uint16_t expected[] = { 347 10001, 10011, 10021, 10031 348 }; 349 run_shader(kernel_source, inout.size(), 1, 1, inout); 350 for (int i = 0; i < inout.size(); ++i) 351 EXPECT_EQ(inout[i], expected[i]); 352} 353 354TEST_F(ComputeTest, DISABLED_globals_64bit) 355{ 356 /* Test disabled, because we need a fixed version of WARP that hasn't 357 been officially shipped yet */ 358 359 const char *kernel_source = 360 "__kernel void main_test(__global unsigned long *inout)\n\ 361 {\n\ 362 uint idx = get_global_id(0);\n\ 363 inout[idx] = inout[idx] + 1;\n\ 364 }\n"; 365 uint64_t base = 1ull << 50; 366 auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 }, 367 SHADER_ARG_INOUT); 368 const uint64_t expected[] = { 369 base + 1, base + 11, base + 21, base + 31 370 }; 371 run_shader(kernel_source, inout.size(), 1, 1, inout); 372 for (int i = 0; i < inout.size(); ++i) 373 EXPECT_EQ(inout[i], expected[i]); 374} 375 376TEST_F(ComputeTest, built_ins_global_id) 377{ 378 const char *kernel_source = 379 "__kernel void main_test(__global uint *output)\n\ 380 {\n\ 381 output[get_global_id(0)] = get_global_id(0);\n\ 382 }\n"; 383 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 384 SHADER_ARG_OUTPUT); 385 const uint32_t expected[] = { 386 0, 1, 2, 3 387 }; 388 389 run_shader(kernel_source, output.size(), 1, 1, output); 390 for (int i = 0; i < output.size(); ++i) 391 EXPECT_EQ(output[i], expected[i]); 392} 393 394TEST_F(ComputeTest, built_ins_global_id_rmw) 395{ 396 const char *kernel_source = 397 "__kernel void main_test(__global uint *output)\n\ 398 {\n\ 399 uint id = get_global_id(0);\n\ 400 output[id] = output[id] * (id + 1);\n\ 401 }\n"; 402 auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203}, 403 SHADER_ARG_INOUT); 404 const uint32_t expected[] = { 405 0x00000001, 0x20000002, 0x00060006, 0x1004080c 406 }; 407 run_shader(kernel_source, inout.size(), 1, 1, inout); 408 for (int i = 0; i < inout.size(); ++i) 409 EXPECT_EQ(inout[i], expected[i]); 410} 411 412TEST_F(ComputeTest, types_float_basics) 413{ 414 const char *kernel_source = 415 "__kernel void main_test(__global uint *output)\n\ 416 {\n\ 417 output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\ 418 }\n"; 419 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 420 SHADER_ARG_OUTPUT); 421 const uint32_t expected[] = { 422 1, 2, 3, 4 423 }; 424 run_shader(kernel_source, output.size(), 1, 1, output); 425 for (int i = 0; i < output.size(); ++i) 426 EXPECT_EQ(output[i], expected[i]); 427} 428 429TEST_F(ComputeTest, DISABLED_types_double_basics) 430{ 431 const char *kernel_source = 432 "__kernel void main_test(__global uint *output)\n\ 433 {\n\ 434 output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\ 435 }\n"; 436 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 437 SHADER_ARG_OUTPUT); 438 const uint32_t expected[] = { 439 1, 2, 3, 4 440 }; 441 run_shader(kernel_source, output.size(), 1, 1, output); 442 for (int i = 0; i < output.size(); ++i) 443 EXPECT_EQ(output[i], expected[i]); 444} 445 446TEST_F(ComputeTest, types_short_basics) 447{ 448 const char *kernel_source = 449 "__kernel void main_test(__global uint *output)\n\ 450 {\n\ 451 output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\ 452 }\n"; 453 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 454 SHADER_ARG_OUTPUT); 455 const uint32_t expected[] = { 456 1, 2, 3, 4 457 }; 458 run_shader(kernel_source, output.size(), 1, 1, output); 459 for (int i = 0; i < output.size(); ++i) 460 EXPECT_EQ(output[i], expected[i]); 461} 462 463TEST_F(ComputeTest, types_char_basics) 464{ 465 const char *kernel_source = 466 "__kernel void main_test(__global uint *output)\n\ 467 {\n\ 468 output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\ 469 }\n"; 470 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 471 SHADER_ARG_OUTPUT); 472 const uint32_t expected[] = { 473 1, 2, 3, 4 474 }; 475 run_shader(kernel_source, output.size(), 1, 1, output); 476 for (int i = 0; i < output.size(); ++i) 477 EXPECT_EQ(output[i], expected[i]); 478} 479 480TEST_F(ComputeTest, types_if_statement) 481{ 482 const char *kernel_source = 483 "__kernel void main_test(__global uint *output)\n\ 484 {\n\ 485 int idx = get_global_id(0);\n\ 486 if (idx > 0)\n\ 487 output[idx] = ~idx;\n\ 488 else\n\ 489 output[0] = 0xff;\n\ 490 }\n"; 491 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 492 SHADER_ARG_OUTPUT); 493 const uint32_t expected[] = { 494 0xff, ~1u, ~2u, ~3u 495 }; 496 run_shader(kernel_source, output.size(), 1, 1, output); 497 for (int i = 0; i < output.size(); ++i) 498 EXPECT_EQ(output[i], expected[i]); 499} 500 501TEST_F(ComputeTest, types_do_while_loop) 502{ 503 const char *kernel_source = 504 "__kernel void main_test(__global uint *output)\n\ 505 {\n\ 506 int value = 1;\n\ 507 int i = 1, n = get_global_id(0);\n\ 508 do {\n\ 509 value *= i++;\n\ 510 } while (i <= n);\n\ 511 output[n] = value;\n\ 512 }\n"; 513 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef), 514 SHADER_ARG_OUTPUT); 515 const uint32_t expected[] = { 516 1, 1, 1*2, 1*2*3, 1*2*3*4 517 }; 518 run_shader(kernel_source, output.size(), 1, 1, output); 519 for (int i = 0; i < output.size(); ++i) 520 EXPECT_EQ(output[i], expected[i]); 521} 522 523TEST_F(ComputeTest, types_for_loop) 524{ 525 const char *kernel_source = 526 "__kernel void main_test(__global uint *output)\n\ 527 {\n\ 528 int value = 1;\n\ 529 int n = get_global_id(0);\n\ 530 for (int i = 1; i <= n; ++i)\n\ 531 value *= i;\n\ 532 output[n] = value;\n\ 533 }\n"; 534 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef), 535 SHADER_ARG_OUTPUT); 536 const uint32_t expected[] = { 537 1, 1, 1*2, 1*2*3, 1*2*3*4 538 }; 539 run_shader(kernel_source, output.size(), 1, 1, output); 540 for (int i = 0; i < output.size(); ++i) 541 EXPECT_EQ(output[i], expected[i]); 542} 543 544TEST_F(ComputeTest, DISABLED_complex_types_local_array_long) 545{ 546 const char *kernel_source = 547 "__kernel void main_test(__global ulong *inout)\n\ 548 {\n\ 549 ushort tmp[] = {\n\ 550 get_global_id(1) + 0x00000000,\n\ 551 get_global_id(1) + 0x10000001,\n\ 552 get_global_id(1) + 0x20000020,\n\ 553 get_global_id(1) + 0x30000300,\n\ 554 };\n\ 555 uint idx = get_global_id(0);\n\ 556 inout[idx] = tmp[idx];\n\ 557 }\n"; 558 auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT); 559 const uint16_t expected[] = { 560 0x00000000, 0x10000001, 0x20000020, 0x30000300, 561 }; 562 run_shader(kernel_source, inout.size(), 1, 1, inout); 563 for (int i = 0; i < inout.size(); ++i) 564 EXPECT_EQ(inout[i], expected[i]); 565} 566 567TEST_F(ComputeTest, complex_types_local_array_short) 568{ 569 const char *kernel_source = 570 "__kernel void main_test(__global ushort *inout)\n\ 571 {\n\ 572 ushort tmp[] = {\n\ 573 get_global_id(1) + 0x00,\n\ 574 get_global_id(1) + 0x10,\n\ 575 get_global_id(1) + 0x20,\n\ 576 get_global_id(1) + 0x30,\n\ 577 };\n\ 578 uint idx = get_global_id(0);\n\ 579 inout[idx] = tmp[idx];\n\ 580 }\n"; 581 auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT); 582 const uint16_t expected[] = { 583 0x00, 0x10, 0x20, 0x30, 584 }; 585 run_shader(kernel_source, inout.size(), 1, 1, inout); 586 for (int i = 0; i < inout.size(); ++i) 587 EXPECT_EQ(inout[i], expected[i]); 588} 589 590TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned) 591{ 592 const char *kernel_source = 593 "struct has_vecs { uchar c; ushort s; float2 f; };\n\ 594 __kernel void main_test(__global uint *inout)\n\ 595 {\n\ 596 struct has_vecs tmp[] = {\n\ 597 { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\ 598 { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\ 599 { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\ 600 { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\ 601 };\n\ 602 uint idx = get_global_id(0);\n\ 603 uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\ 604 inout[idx] = mul + trunc(tmp[idx].f[1]);\n\ 605 }\n"; 606 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT); 607 const uint16_t expected[] = { 101, 404, 909, 1616 }; 608 run_shader(kernel_source, inout.size(), 1, 1, inout); 609 for (int i = 0; i < inout.size(); ++i) 610 EXPECT_EQ(inout[i], expected[i]); 611} 612 613TEST_F(ComputeTest, complex_types_local_array) 614{ 615 const char *kernel_source = 616 "__kernel void main_test(__global uint *inout)\n\ 617 {\n\ 618 uint tmp[] = {\n\ 619 get_global_id(1) + 0x00,\n\ 620 get_global_id(1) + 0x10,\n\ 621 get_global_id(1) + 0x20,\n\ 622 get_global_id(1) + 0x30,\n\ 623 };\n\ 624 uint idx = get_global_id(0);\n\ 625 inout[idx] = tmp[idx];\n\ 626 }\n"; 627 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT); 628 const uint32_t expected[] = { 629 0x00, 0x10, 0x20, 0x30, 630 }; 631 run_shader(kernel_source, inout.size(), 1, 1, inout); 632 for (int i = 0; i < inout.size(); ++i) 633 EXPECT_EQ(inout[i], expected[i]); 634} 635 636TEST_F(ComputeTest, complex_types_global_struct_array) 637{ 638 struct two_vals { uint32_t add; uint32_t mul; }; 639 const char *kernel_source = 640 "struct two_vals { uint add; uint mul; };\n\ 641 __kernel void main_test(__global struct two_vals *in_out)\n\ 642 {\n\ 643 uint id = get_global_id(0);\n\ 644 in_out[id].add = in_out[id].add + id;\n\ 645 in_out[id].mul = in_out[id].mul * id;\n\ 646 }\n"; 647 auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } }, 648 SHADER_ARG_INOUT); 649 const struct two_vals expected[] = { 650 { 8 + 0, 8 * 0 }, 651 { 16 + 1, 16 * 1 }, 652 { 64 + 2, 64 * 2 }, 653 { 65536 + 3, 65536 * 3 } 654 }; 655 run_shader(kernel_source, inout.size(), 1, 1, inout); 656 for (int i = 0; i < inout.size(); ++i) { 657 EXPECT_EQ(inout[i].add, expected[i].add); 658 EXPECT_EQ(inout[i].mul, expected[i].mul); 659 } 660} 661 662TEST_F(ComputeTest, complex_types_global_uint2) 663{ 664 struct uint2 { uint32_t x; uint32_t y; }; 665 const char *kernel_source = 666 "__kernel void main_test(__global uint2 *inout)\n\ 667 {\n\ 668 uint id = get_global_id(0);\n\ 669 inout[id].x = inout[id].x + id;\n\ 670 inout[id].y = inout[id].y * id;\n\ 671 }\n"; 672 auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } }, 673 SHADER_ARG_INOUT); 674 const struct uint2 expected[] = { 675 { 8 + 0, 8 * 0 }, 676 { 16 + 1, 16 * 1 }, 677 { 64 + 2, 64 * 2 }, 678 { 65536 + 3, 65536 * 3 } 679 }; 680 run_shader(kernel_source, inout.size(), 1, 1, inout); 681 for (int i = 0; i < inout.size(); ++i) { 682 EXPECT_EQ(inout[i].x, expected[i].x); 683 EXPECT_EQ(inout[i].y, expected[i].y); 684 } 685} 686 687TEST_F(ComputeTest, complex_types_global_ushort2) 688{ 689 struct ushort2 { uint16_t x; uint16_t y; }; 690 const char *kernel_source = 691 "__kernel void main_test(__global ushort2 *inout)\n\ 692 {\n\ 693 uint id = get_global_id(0);\n\ 694 inout[id].x = inout[id].x + id;\n\ 695 inout[id].y = inout[id].y * id;\n\ 696 }\n"; 697 auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, 698 { (uint16_t)65536, (uint16_t)65536 } }, 699 SHADER_ARG_INOUT); 700 const struct ushort2 expected[] = { 701 { 8 + 0, 8 * 0 }, 702 { 16 + 1, 16 * 1 }, 703 { 64 + 2, 64 * 2 }, 704 { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) } 705 }; 706 run_shader(kernel_source, inout.size(), 1, 1, inout); 707 for (int i = 0; i < inout.size(); ++i) { 708 EXPECT_EQ(inout[i].x, expected[i].x); 709 EXPECT_EQ(inout[i].y, expected[i].y); 710 } 711} 712 713TEST_F(ComputeTest, complex_types_global_uchar3) 714{ 715 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; }; 716 const char *kernel_source = 717 "__kernel void main_test(__global uchar3 *inout)\n\ 718 {\n\ 719 uint id = get_global_id(0);\n\ 720 inout[id].x = inout[id].x + id;\n\ 721 inout[id].y = inout[id].y * id;\n\ 722 inout[id].z = inout[id].y + inout[id].x;\n\ 723 }\n"; 724 auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } }, 725 SHADER_ARG_INOUT); 726 const struct uchar3 expected[] = { 727 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) }, 728 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) }, 729 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) }, 730 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) } 731 }; 732 run_shader(kernel_source, inout.size(), 1, 1, inout); 733 for (int i = 0; i < inout.size(); ++i) { 734 EXPECT_EQ(inout[i].x, expected[i].x); 735 EXPECT_EQ(inout[i].y, expected[i].y); 736 EXPECT_EQ(inout[i].z, expected[i].z); 737 } 738} 739 740TEST_F(ComputeTest, complex_types_constant_uchar3) 741{ 742 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; }; 743 const char *kernel_source = 744 "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\ 745 {\n\ 746 uint id = get_global_id(0);\n\ 747 out[id].x = in[id].x + id;\n\ 748 out[id].y = in[id].y * id;\n\ 749 out[id].z = out[id].y + out[id].x;\n\ 750 }\n"; 751 auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } }, 752 SHADER_ARG_INPUT); 753 auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }), 754 SHADER_ARG_OUTPUT); 755 const struct uchar3 expected[] = { 756 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) }, 757 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) }, 758 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) }, 759 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) } 760 }; 761 run_shader(kernel_source, out.size(), 1, 1, out, in); 762 for (int i = 0; i < out.size(); ++i) { 763 EXPECT_EQ(out[i].x, expected[i].x); 764 EXPECT_EQ(out[i].y, expected[i].y); 765 EXPECT_EQ(out[i].z, expected[i].z); 766 } 767} 768 769TEST_F(ComputeTest, complex_types_global_uint8) 770{ 771 struct uint8 { 772 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3; 773 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7; 774 }; 775 const char *kernel_source = 776 "__kernel void main_test(__global uint8 *inout)\n\ 777 {\n\ 778 uint id = get_global_id(0);\n\ 779 inout[id].s01234567 = inout[id].s01234567 * 2;\n\ 780 }\n"; 781 auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } }, 782 SHADER_ARG_INOUT); 783 const struct uint8 expected[] = { 784 { 2, 4, 6, 8, 10, 12, 14, 16 } 785 }; 786 run_shader(kernel_source, inout.size(), 1, 1, inout); 787 for (int i = 0; i < inout.size(); ++i) { 788 EXPECT_EQ(inout[i].s0, expected[i].s0); 789 EXPECT_EQ(inout[i].s1, expected[i].s1); 790 EXPECT_EQ(inout[i].s2, expected[i].s2); 791 EXPECT_EQ(inout[i].s3, expected[i].s3); 792 EXPECT_EQ(inout[i].s4, expected[i].s4); 793 EXPECT_EQ(inout[i].s5, expected[i].s5); 794 EXPECT_EQ(inout[i].s6, expected[i].s6); 795 EXPECT_EQ(inout[i].s7, expected[i].s7); 796 } 797} 798 799TEST_F(ComputeTest, complex_types_local_ulong16) 800{ 801 struct ulong16 { 802 uint64_t values[16]; 803 }; 804 const char *kernel_source = 805 R"(__kernel void main_test(__global ulong16 *inout) 806 { 807 __local ulong16 local_array[2]; 808 uint id = get_global_id(0); 809 local_array[id] = inout[id]; 810 barrier(CLK_LOCAL_MEM_FENCE); 811 inout[id] = local_array[0] * 2; 812 })"; 813 auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }, 814 SHADER_ARG_INOUT); 815 const struct ulong16 expected[] = { 816 { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } 817 }; 818 run_shader(kernel_source, inout.size(), 1, 1, inout); 819 for (int i = 0; i < inout.size(); ++i) { 820 for (int j = 0; j < 16; ++j) { 821 EXPECT_EQ(inout[i].values[j], expected[i].values[j]); 822 } 823 } 824} 825 826TEST_F(ComputeTest, complex_types_constant_uint8) 827{ 828 struct uint8 { 829 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3; 830 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7; 831 }; 832 const char *kernel_source = 833 "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\ 834 {\n\ 835 uint id = get_global_id(0);\n\ 836 out[id].s01234567 = in[id].s01234567 * 2;\n\ 837 }\n"; 838 auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } }, 839 SHADER_ARG_INPUT); 840 auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } }, 841 SHADER_ARG_INOUT); 842 const struct uint8 expected[] = { 843 { 2, 4, 6, 8, 10, 12, 14, 16 } 844 }; 845 run_shader(kernel_source, out.size(), 1, 1, out, in); 846 for (int i = 0; i < out.size(); ++i) { 847 EXPECT_EQ(out[i].s0, expected[i].s0); 848 EXPECT_EQ(out[i].s1, expected[i].s1); 849 EXPECT_EQ(out[i].s2, expected[i].s2); 850 EXPECT_EQ(out[i].s3, expected[i].s3); 851 EXPECT_EQ(out[i].s4, expected[i].s4); 852 EXPECT_EQ(out[i].s5, expected[i].s5); 853 EXPECT_EQ(out[i].s6, expected[i].s6); 854 EXPECT_EQ(out[i].s7, expected[i].s7); 855 } 856} 857 858TEST_F(ComputeTest, DISABLED_complex_types_const_array) 859{ 860 /* DISABLED because current release versions of WARP either return 861 * rubbish from reads or crash: they are not prepared to handle 862 * non-float global constants */ 863 const char *kernel_source = 864 "__kernel void main_test(__global uint *output)\n\ 865 {\n\ 866 const uint foo[] = { 100, 101, 102, 103 };\n\ 867 output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\ 868 }\n"; 869 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 870 SHADER_ARG_OUTPUT); 871 const uint32_t expected[] = { 872 100, 101, 102, 103 873 }; 874 run_shader(kernel_source, output.size(), 1, 1, output); 875 for (int i = 0; i < output.size(); ++i) 876 EXPECT_EQ(output[i], expected[i]); 877} 878 879TEST_F(ComputeTest, mem_access_load_store_ordering) 880{ 881 const char *kernel_source = 882 "__kernel void main_test(__global uint *output)\n\ 883 {\n\ 884 uint foo[4];\n\ 885 foo[0] = 0x11111111;\n\ 886 foo[1] = 0x22222222;\n\ 887 foo[2] = 0x44444444;\n\ 888 foo[3] = 0x88888888;\n\ 889 foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\ 890 foo[0] += get_global_id(0); // foo[0] = tid\n\ 891 foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\ 892 output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\ 893 }\n"; 894 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 895 SHADER_ARG_OUTPUT); 896 const uint16_t expected[] = { 897 0, 1, 2, 3 898 }; 899 run_shader(kernel_source, output.size(), 1, 1, output); 900 for (int i = 0; i < output.size(); ++i) 901 EXPECT_EQ(output[i], expected[i]); 902} 903 904TEST_F(ComputeTest, DISABLED_two_const_arrays) 905{ 906 /* DISABLED because current release versions of WARP either return 907 * rubbish from reads or crash: they are not prepared to handle 908 * non-float global constants */ 909 const char *kernel_source = 910 "__kernel void main_test(__global uint *output)\n\ 911 {\n\ 912 uint id = get_global_id(0);\n\ 913 uint foo[4] = {100, 101, 102, 103};\n\ 914 uint bar[4] = {1, 2, 3, 4};\n\ 915 output[id] = foo[id] * bar[id];\n\ 916 }\n"; 917 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 918 SHADER_ARG_OUTPUT); 919 const uint32_t expected[] = { 920 100, 202, 306, 412 921 }; 922 run_shader(kernel_source, output.size(), 1, 1, output); 923 for (int i = 0; i < output.size(); ++i) 924 EXPECT_EQ(output[i], expected[i]); 925} 926 927TEST_F(ComputeTest, imod_pos) 928{ 929 const char *kernel_source = 930 "__kernel void main_test(__global int *inout)\n\ 931 {\n\ 932 inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\ 933 }\n"; 934 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 }, 935 SHADER_ARG_INOUT); 936 const int32_t expected[] = { 937 -1, 0, -2, -1, 0, 1, 2, 0, 1 938 }; 939 run_shader(kernel_source, inout.size(), 1, 1, inout); 940 for (int i = 0; i < inout.size(); ++i) 941 EXPECT_EQ(inout[i], expected[i]); 942} 943 944TEST_F(ComputeTest, imod_neg) 945{ 946 const char *kernel_source = 947 "__kernel void main_test(__global int *inout)\n\ 948 {\n\ 949 inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\ 950 }\n"; 951 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 }, 952 SHADER_ARG_INOUT); 953 const int32_t expected[] = { 954 -1, 0, -2, -1, 0, 1, 2, 0, 1 955 }; 956 run_shader(kernel_source, inout.size(), 1, 1, inout); 957 for (int i = 0; i < inout.size(); ++i) 958 EXPECT_EQ(inout[i], expected[i]); 959} 960 961TEST_F(ComputeTest, umod) 962{ 963 const char *kernel_source = 964 "__kernel void main_test(__global uint *inout)\n\ 965 {\n\ 966 inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\ 967 }\n"; 968 auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe }, 969 SHADER_ARG_INOUT); 970 const uint32_t expected[] = { 971 0xfffffffa, 0xfffffffb, 0, 1, 2 972 }; 973 run_shader(kernel_source, inout.size(), 1, 1, inout); 974 for (int i = 0; i < inout.size(); ++i) 975 EXPECT_EQ(inout[i], expected[i]); 976} 977 978TEST_F(ComputeTest, rotate) 979{ 980 const char *kernel_source = 981 "__kernel void main_test(__global uint *inout)\n\ 982 {\n\ 983 inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\ 984 }\n"; 985 auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 986 SHADER_ARG_INOUT); 987 const uint32_t expected[] = { 988 0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea 989 }; 990 run_shader(kernel_source, inout.size(), 1, 1, inout); 991 for (int i = 0; i < inout.size(); ++i) 992 EXPECT_EQ(inout[i], expected[i]); 993} 994 995TEST_F(ComputeTest, popcount) 996{ 997 const char *kernel_source = 998 "__kernel void main_test(__global uint *inout)\n\ 999 {\n\ 1000 inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\ 1001 }\n"; 1002 auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u }, 1003 SHADER_ARG_INOUT); 1004 const uint32_t expected[] = { 1005 0, 1, 2, 2, 4, 32 1006 }; 1007 run_shader(kernel_source, inout.size(), 1, 1, inout); 1008 for (int i = 0; i < inout.size(); ++i) 1009 EXPECT_EQ(inout[i], expected[i]); 1010} 1011 1012TEST_F(ComputeTest, hadd) 1013{ 1014 const char *kernel_source = 1015 "__kernel void main_test(__global uint *inout)\n\ 1016 {\n\ 1017 inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\ 1018 }\n"; 1019 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }, 1020 SHADER_ARG_INOUT); 1021 const uint32_t expected[] = { 1022 (1u << 31) >> 1, 1023 ((1u << 31) + 1) >> 1, 1024 ((1u << 31) + 2) >> 1, 1025 ((1u << 31) + 3) >> 1, 1026 ((1ull << 31) + 0xfffffffc) >> 1, 1027 ((1ull << 31) + 0xfffffffd) >> 1, 1028 ((1ull << 31) + 0xfffffffe) >> 1, 1029 ((1ull << 31) + 0xffffffff) >> 1, 1030 }; 1031 run_shader(kernel_source, inout.size(), 1, 1, inout); 1032 for (int i = 0; i < inout.size(); ++i) 1033 EXPECT_EQ(inout[i], expected[i]); 1034} 1035 1036TEST_F(ComputeTest, rhadd) 1037{ 1038 const char *kernel_source = 1039 "__kernel void main_test(__global uint *inout)\n\ 1040 {\n\ 1041 inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\ 1042 }\n"; 1043 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff }, 1044 SHADER_ARG_INOUT); 1045 const uint32_t expected[] = { 1046 ((1u << 31) + 1) >> 1, 1047 ((1u << 31) + 2) >> 1, 1048 ((1u << 31) + 3) >> 1, 1049 ((1u << 31) + 4) >> 1, 1050 ((1ull << 31) + 0xfffffffd) >> 1, 1051 ((1ull << 31) + 0xfffffffe) >> 1, 1052 ((1ull << 31) + 0xffffffff) >> 1, 1053 ((1ull << 31) + (1ull << 32)) >> 1, 1054 }; 1055 run_shader(kernel_source, inout.size(), 1, 1, inout); 1056 for (int i = 0; i < inout.size(); ++i) 1057 EXPECT_EQ(inout[i], expected[i]); 1058} 1059 1060TEST_F(ComputeTest, add_sat) 1061{ 1062 const char *kernel_source = 1063 "__kernel void main_test(__global uint *inout)\n\ 1064 {\n\ 1065 inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\ 1066 }\n"; 1067 auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff }, 1068 SHADER_ARG_INOUT); 1069 const uint32_t expected[] = { 1070 0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff 1071 }; 1072 run_shader(kernel_source, inout.size(), 1, 1, inout); 1073 for (int i = 0; i < inout.size(); ++i) 1074 EXPECT_EQ(inout[i], expected[i]); 1075} 1076 1077TEST_F(ComputeTest, sub_sat) 1078{ 1079 const char *kernel_source = 1080 "__kernel void main_test(__global uint *inout)\n\ 1081 {\n\ 1082 inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\ 1083 }\n"; 1084 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT); 1085 const uint32_t expected[] = { 1086 0, 0, 0, 1 1087 }; 1088 run_shader(kernel_source, inout.size(), 1, 1, inout); 1089 for (int i = 0; i < inout.size(); ++i) 1090 EXPECT_EQ(inout[i], expected[i]); 1091} 1092 1093TEST_F(ComputeTest, mul_hi) 1094{ 1095 const char *kernel_source = 1096 "__kernel void main_test(__global uint *inout)\n\ 1097 {\n\ 1098 inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\ 1099 }\n"; 1100 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT); 1101 const uint32_t expected[] = { 1102 0, 0, 1, 1, (1u << 30) 1103 }; 1104 run_shader(kernel_source, inout.size(), 1, 1, inout); 1105 for (int i = 0; i < inout.size(); ++i) 1106 EXPECT_EQ(inout[i], expected[i]); 1107} 1108 1109TEST_F(ComputeTest, ldexp_x) 1110{ 1111 const char *kernel_source = 1112 "__kernel void main_test(__global float *inout)\n\ 1113 {\n\ 1114 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\ 1115 }\n"; 1116 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT); 1117 const float expected[] = { 1118 ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5) 1119 }; 1120 run_shader(kernel_source, inout.size(), 1, 1, inout); 1121 for (int i = 0; i < inout.size(); ++i) 1122 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1123} 1124 1125TEST_F(ComputeTest, ldexp_y) 1126{ 1127 const char *kernel_source = 1128 "__kernel void main_test(__global float *inout)\n\ 1129 {\n\ 1130 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\ 1131 }\n"; 1132 auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT); 1133 const float expected[] = { 1134 ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3) 1135 }; 1136 run_shader(kernel_source, inout.size(), 1, 1, inout); 1137 for (int i = 0; i < inout.size(); ++i) 1138 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1139} 1140 1141TEST_F(ComputeTest, frexp_ret) 1142{ 1143 const char *kernel_source = 1144 "__kernel void main_test(__global float *inout)\n\ 1145 {\n\ 1146 int exp;\n\ 1147 inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\ 1148 }\n"; 1149 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT); 1150 const float expected[] = { 1151 0.0f, 0.5f, 0.5f, 0.75f 1152 }; 1153 run_shader(kernel_source, inout.size(), 1, 1, inout); 1154 for (int i = 0; i < inout.size(); ++i) 1155 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1156} 1157 1158TEST_F(ComputeTest, frexp_exp) 1159{ 1160 const char *kernel_source = 1161 "__kernel void main_test(__global float *inout)\n\ 1162 {\n\ 1163 int exp;\n\ 1164 frexp(inout[get_global_id(0)], &exp);\n\ 1165 inout[get_global_id(0)] = (float)exp;\n\ 1166 }\n"; 1167 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT); 1168 const float expected[] = { 1169 0.0f, 0.0f, 1.0f, 2.0f 1170 }; 1171 run_shader(kernel_source, inout.size(), 1, 1, inout); 1172 for (int i = 0; i < inout.size(); ++i) 1173 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1174} 1175 1176TEST_F(ComputeTest, clz) 1177{ 1178 const char *kernel_source = 1179 "__kernel void main_test(__global uint *inout)\n\ 1180 {\n\ 1181 inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\ 1182 }\n"; 1183 auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff, (1u << 30), (1u << 31) }, SHADER_ARG_INOUT); 1184 const uint32_t expected[] = { 1185 32, 31, 16, 1, 0 1186 }; 1187 run_shader(kernel_source, inout.size(), 1, 1, inout); 1188 for (int i = 0; i < inout.size(); ++i) 1189 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1190} 1191 1192TEST_F(ComputeTest, sin) 1193{ 1194 struct sin_vals { float in; float clc; float native; }; 1195 const char *kernel_source = 1196 "struct sin_vals { float in; float clc; float native; };\n\ 1197 __kernel void main_test(__global struct sin_vals *inout)\n\ 1198 {\n\ 1199 inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\ 1200 inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\ 1201 }\n"; 1202 const vector<sin_vals> input = { 1203 { 0.0f, 0.0f, 0.0f }, 1204 { 1.0f, 0.0f, 0.0f }, 1205 { 2.0f, 0.0f, 0.0f }, 1206 { 3.0f, 0.0f, 0.0f }, 1207 }; 1208 auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT); 1209 const struct sin_vals expected[] = { 1210 { 0.0f, 0.0f, 0.0f }, 1211 { 1.0f, sin(1.0f), sin(1.0f) }, 1212 { 2.0f, sin(2.0f), sin(2.0f) }, 1213 { 3.0f, sin(3.0f), sin(3.0f) }, 1214 }; 1215 run_shader(kernel_source, inout.size(), 1, 1, inout); 1216 for (int i = 0; i < inout.size(); ++i) { 1217 EXPECT_FLOAT_EQ(inout[i].in, inout[i].in); 1218 EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc); 1219 EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec 1220 } 1221} 1222 1223TEST_F(ComputeTest, DISABLED_cosh) 1224{ 1225 /* Disabled because of WARP failures, where we fetch incorrect results when 1226 * sourcing from non-float ICBs */ 1227 const char *kernel_source = 1228 "__kernel void main_test(__global float *inout)\n\ 1229 {\n\ 1230 inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\ 1231 }\n"; 1232 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1233 const float expected[] = { 1234 cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f) 1235 }; 1236 run_shader(kernel_source, inout.size(), 1, 1, inout); 1237 for (int i = 0; i < inout.size(); ++i) 1238 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1239} 1240 1241TEST_F(ComputeTest, exp) 1242{ 1243 const char *kernel_source = 1244 "__kernel void main_test(__global float *inout)\n\ 1245 {\n\ 1246 inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\ 1247 }\n"; 1248 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1249 const float expected[] = { 1250 exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f) 1251 }; 1252 run_shader(kernel_source, inout.size(), 1, 1, inout); 1253 for (int i = 0; i < inout.size(); ++i) 1254 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1255} 1256 1257TEST_F(ComputeTest, exp10) 1258{ 1259 const char *kernel_source = 1260 "__kernel void main_test(__global float *inout)\n\ 1261 {\n\ 1262 inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\ 1263 }\n"; 1264 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1265 const float expected[] = { 1266 pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f) 1267 }; 1268 run_shader(kernel_source, inout.size(), 1, 1, inout); 1269 for (int i = 0; i < inout.size(); ++i) 1270 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1271} 1272 1273TEST_F(ComputeTest, exp2) 1274{ 1275 const char *kernel_source = 1276 "__kernel void main_test(__global float *inout)\n\ 1277 {\n\ 1278 inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\ 1279 }\n"; 1280 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1281 const float expected[] = { 1282 pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f) 1283 }; 1284 run_shader(kernel_source, inout.size(), 1, 1, inout); 1285 for (int i = 0; i < inout.size(); ++i) 1286 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1287} 1288 1289TEST_F(ComputeTest, log) 1290{ 1291 const char *kernel_source = 1292 "__kernel void main_test(__global float *inout)\n\ 1293 {\n\ 1294 inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\ 1295 }\n"; 1296 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1297 const float expected[] = { 1298 log(0.0f), log(1.0f), log(2.0f), log(3.0f) 1299 }; 1300 run_shader(kernel_source, inout.size(), 1, 1, inout); 1301 for (int i = 0; i < inout.size(); ++i) 1302 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1303} 1304 1305TEST_F(ComputeTest, log10) 1306{ 1307 const char *kernel_source = 1308 "__kernel void main_test(__global float *inout)\n\ 1309 {\n\ 1310 inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\ 1311 }\n"; 1312 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1313 const float expected[] = { 1314 log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f) 1315 }; 1316 run_shader(kernel_source, inout.size(), 1, 1, inout); 1317 for (int i = 0; i < inout.size(); ++i) 1318 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1319} 1320 1321TEST_F(ComputeTest, log2) 1322{ 1323 const char *kernel_source = 1324 "__kernel void main_test(__global float *inout)\n\ 1325 {\n\ 1326 inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\ 1327 }\n"; 1328 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT); 1329 const float expected[] = { 1330 log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2) 1331 }; 1332 run_shader(kernel_source, inout.size(), 1, 1, inout); 1333 for (int i = 0; i < inout.size(); ++i) 1334 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1335} 1336 1337TEST_F(ComputeTest, rint) 1338{ 1339 const char *kernel_source = 1340 "__kernel void main_test(__global float *inout)\n\ 1341 {\n\ 1342 inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\ 1343 }\n"; 1344 1345 auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT); 1346 const float expected[] = { 1347 0.0f, 2.0f, 0.0f, -2.0f, 1.0f, 1348 }; 1349 run_shader(kernel_source, inout.size(), 1, 1, inout); 1350 for (int i = 0; i < inout.size(); ++i) 1351 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1352} 1353 1354TEST_F(ComputeTest, round) 1355{ 1356 const char *kernel_source = 1357 "__kernel void main_test(__global float *inout)\n\ 1358 {\n\ 1359 inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\ 1360 }\n"; 1361 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f }, 1362 SHADER_ARG_INOUT); 1363 const float expected[] = { 1364 0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f 1365 }; 1366 run_shader(kernel_source, inout.size(), 1, 1, inout); 1367 for (int i = 0; i < inout.size(); ++i) 1368 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1369} 1370 1371TEST_F(ComputeTest, arg_by_val) 1372{ 1373 const char *kernel_source = 1374 "__kernel void main_test(__global float *inout, float mul)\n\ 1375 {\n\ 1376 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\ 1377 }\n"; 1378 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f }, 1379 SHADER_ARG_INOUT); 1380 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT); 1381 const float expected[] = { 1382 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f 1383 }; 1384 run_shader(kernel_source, inout.size(), 1, 1, inout, mul); 1385 for (int i = 0; i < inout.size(); ++i) 1386 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1387} 1388 1389TEST_F(ComputeTest, uint8_by_val) 1390{ 1391 struct uint8 { 1392 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3; 1393 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7; 1394 }; 1395 const char *kernel_source = 1396 "__kernel void main_test(__global uint *out, uint8 val)\n\ 1397 {\n\ 1398 out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\ 1399 val.s4 + val.s5 + val.s6 + val.s7;\n\ 1400 }\n"; 1401 auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT); 1402 auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT); 1403 const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 }; 1404 run_shader(kernel_source, out.size(), 1, 1, out, val); 1405 for (int i = 0; i < out.size(); ++i) 1406 EXPECT_EQ(out[i], expected[i]); 1407} 1408 1409TEST_F(ComputeTest, link) 1410{ 1411 const char *foo_src = 1412 "float foo(float in)\n\ 1413 {\n\ 1414 return in * in;\n\ 1415 }\n"; 1416 const char *kernel_source = 1417 "float foo(float in);\n\ 1418 __kernel void main_test(__global float *inout)\n\ 1419 {\n\ 1420 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\ 1421 }\n"; 1422 std::vector<const char *> srcs = { foo_src, kernel_source }; 1423 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT); 1424 const float expected[] = { 1425 4.0f, 1426 }; 1427 run_shader(srcs, inout.size(), 1, 1, inout); 1428 for (int i = 0; i < inout.size(); ++i) 1429 EXPECT_EQ(inout[i], expected[i]); 1430} 1431 1432TEST_F(ComputeTest, link_library) 1433{ 1434 const char *bar_src = 1435 "float bar(float in)\n\ 1436 {\n\ 1437 return in * 5;\n\ 1438 }\n"; 1439 const char *foo_src = 1440 "float bar(float in);\n\ 1441 float foo(float in)\n\ 1442 {\n\ 1443 return in * bar(in);\n\ 1444 }\n"; 1445 const char *kernel_source = 1446 "float foo(float in);\n\ 1447 __kernel void main_test(__global float *inout)\n\ 1448 {\n\ 1449 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\ 1450 }\n"; 1451 std::vector<Shader> libraries = { 1452 compile({ bar_src, kernel_source }, {}, true), 1453 compile({ foo_src }, {}, true) 1454 }; 1455 Shader exe = link(libraries); 1456 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT); 1457 const float expected[] = { 1458 20.0f, 1459 }; 1460 run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout); 1461 for (int i = 0; i < inout.size(); ++i) 1462 EXPECT_EQ(inout[i], expected[i]); 1463} 1464 1465TEST_F(ComputeTest, localvar) 1466{ 1467 const char *kernel_source = 1468 "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\ 1469 void main_test(__global float *inout)\n\ 1470 {\n\ 1471 __local float2 tmp[2];\n\ 1472 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\ 1473 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\ 1474 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1475 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\ 1476 }\n"; 1477 1478 auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT); 1479 const float expected[] = { 1480 9.0f, 5.0f 1481 }; 1482 run_shader(kernel_source, inout.size(), 1, 1, inout); 1483 for (int i = 0; i < inout.size(); ++i) 1484 EXPECT_EQ(inout[i], expected[i]); 1485} 1486 1487TEST_F(ComputeTest, localvar_uchar2) 1488{ 1489 const char *kernel_source = 1490 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\ 1491 __kernel void main_test(__global uchar *inout)\n\ 1492 {\n\ 1493 __local uchar2 tmp[2];\n\ 1494 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\ 1495 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\ 1496 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1497 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\ 1498 }\n"; 1499 1500 auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT); 1501 const uint8_t expected[] = { 9, 5 }; 1502 run_shader(kernel_source, inout.size(), 1, 1, inout); 1503 for (int i = 0; i < inout.size(); ++i) 1504 EXPECT_EQ(inout[i], expected[i]); 1505} 1506 1507TEST_F(ComputeTest, work_group_size_hint) 1508{ 1509 const char *kernel_source = 1510 "__attribute__((work_group_size_hint(2, 1, 1)))\n\ 1511 __kernel void main_test(__global uint *output)\n\ 1512 {\n\ 1513 output[get_global_id(0)] = get_local_id(0);\n\ 1514 }\n"; 1515 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 1516 SHADER_ARG_OUTPUT); 1517 const uint32_t expected[] = { 1518 0, 1, 2, 3 1519 }; 1520 run_shader(kernel_source, output.size(), 1, 1, output); 1521 for (int i = 0; i < output.size(); ++i) 1522 EXPECT_EQ(output[i], expected[i]); 1523} 1524 1525TEST_F(ComputeTest, reqd_work_group_size) 1526{ 1527 const char *kernel_source = 1528 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\ 1529 __kernel void main_test(__global uint *output)\n\ 1530 {\n\ 1531 output[get_global_id(0)] = get_local_id(0);\n\ 1532 }\n"; 1533 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef), 1534 SHADER_ARG_OUTPUT); 1535 const uint32_t expected[] = { 1536 0, 1, 0, 1 1537 }; 1538 run_shader(kernel_source, output.size(), 1, 1, output); 1539 for (int i = 0; i < output.size(); ++i) 1540 EXPECT_EQ(output[i], expected[i]); 1541} 1542 1543TEST_F(ComputeTest, image) 1544{ 1545 const char* kernel_source = 1546 "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\ 1547 {\n\ 1548 int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\ 1549 write_imagef(output, coords, read_imagef(input, coords));\n\ 1550 }\n"; 1551 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1552 validate(shader); 1553} 1554 1555TEST_F(ComputeTest, image_two_reads) 1556{ 1557 const char* kernel_source = 1558 "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\ 1559 {\n\ 1560 if (is_float)\n\ 1561 output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\ 1562 else \n\ 1563 output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\ 1564 }\n"; 1565 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1566 validate(shader); 1567} 1568 1569TEST_F(ComputeTest, image_read_write) 1570{ 1571 const char *kernel_source = 1572 R"(__kernel void main_test(read_write image2d_t image) 1573 { 1574 int2 coords = (int2)(get_global_id(0), get_global_id(1)); 1575 write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f)); 1576 })"; 1577 Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" }); 1578 validate(shader); 1579} 1580 1581TEST_F(ComputeTest, sampler) 1582{ 1583 const char* kernel_source = 1584 "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\ 1585 {\n\ 1586 output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\ 1587 }\n"; 1588 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1589 validate(shader); 1590} 1591 1592TEST_F(ComputeTest, image_dims) 1593{ 1594 const char* kernel_source = 1595 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\ 1596 {\n\ 1597 output[get_global_id(0)] = get_image_width(roimage);\n\ 1598 output[get_global_id(0) + 1] = get_image_width(woimage);\n\ 1599 }\n"; 1600 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1601 validate(shader); 1602} 1603 1604TEST_F(ComputeTest, image_format) 1605{ 1606 const char* kernel_source = 1607 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\ 1608 {\n\ 1609 output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\ 1610 output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\ 1611 }\n"; 1612 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1613 validate(shader); 1614} 1615 1616TEST_F(ComputeTest, image1d_buffer_t) 1617{ 1618 const char* kernel_source = 1619 "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\ 1620 {\n\ 1621 write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\ 1622 }\n"; 1623 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1624 validate(shader); 1625} 1626 1627TEST_F(ComputeTest, local_ptr) 1628{ 1629 struct uint2 { uint32_t x, y; }; 1630 const char *kernel_source = 1631 "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\ 1632 {\n\ 1633 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\ 1634 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\ 1635 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1636 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\ 1637 }\n"; 1638 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT); 1639 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT); 1640 const uint8_t expected[] = { 9, 5 }; 1641 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp); 1642 for (int i = 0; i < inout.size(); ++i) 1643 EXPECT_EQ(inout[i], expected[i]); 1644} 1645 1646TEST_F(ComputeTest, two_local_ptrs) 1647{ 1648 struct uint2 { uint32_t x, y; }; 1649 const char *kernel_source = 1650 "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\ 1651 {\n\ 1652 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\ 1653 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\ 1654 tmp2[get_local_id(0)] = get_global_id(0);\n\ 1655 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1656 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\ 1657 }\n"; 1658 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT); 1659 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT); 1660 auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT); 1661 const uint8_t expected[] = { 9, 6 }; 1662 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2); 1663 for (int i = 0; i < inout.size(); ++i) 1664 EXPECT_EQ(inout[i], expected[i]); 1665} 1666 1667TEST_F(ComputeTest, int8_to_float) 1668{ 1669 const char *kernel_source = 1670 "__kernel void main_test(__global char* in, __global float* out)\n\ 1671 {\n\ 1672 uint pos = get_global_id(0);\n\ 1673 out[pos] = in[pos] / 100.0f;\n\ 1674 }"; 1675 auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT); 1676 auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT); 1677 const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f }; 1678 run_shader(kernel_source, in.size(), 1, 1, in, out); 1679 for (int i = 0; i < in.size(); ++i) 1680 EXPECT_FLOAT_EQ(out[i], expected[i]); 1681} 1682 1683TEST_F(ComputeTest, vec_hint_float4) 1684{ 1685 const char *kernel_source = 1686 "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\ 1687 {\n\ 1688 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\ 1689 }"; 1690 Shader shader = compile({ kernel_source }); 1691 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4); 1692 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT); 1693} 1694 1695TEST_F(ComputeTest, vec_hint_uchar2) 1696{ 1697 const char *kernel_source = 1698 "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\ 1699 {\n\ 1700 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\ 1701 }"; 1702 Shader shader = compile({ kernel_source }); 1703 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2); 1704 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR); 1705} 1706 1707TEST_F(ComputeTest, vec_hint_none) 1708{ 1709 const char *kernel_source = 1710 "__kernel void main_test(__global float *inout)\n\ 1711 {\n\ 1712 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\ 1713 }"; 1714 Shader shader = compile({ kernel_source }); 1715 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0); 1716} 1717 1718TEST_F(ComputeTest, DISABLED_debug_layer_failure) 1719{ 1720 const char *kernel_source = 1721 "__kernel void main_test(__global float *inout, float mul)\n\ 1722 {\n\ 1723 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\ 1724 }\n"; 1725 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f }, 1726 SHADER_ARG_INOUT); 1727 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT); 1728 const float expected[] = { 1729 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f 1730 }; 1731 ComPtr<ID3D12InfoQueue> info_queue; 1732 dev->QueryInterface(info_queue.ReleaseAndGetAddressOf()); 1733 if (!info_queue) { 1734 GTEST_SKIP() << "No info queue"; 1735 return; 1736 } 1737 1738 info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail"); 1739 run_shader(kernel_source, inout.size(), 1, 1, inout, mul); 1740 for (int i = 0; i < inout.size(); ++i) 1741 EXPECT_FLOAT_EQ(inout[i], expected[i]); 1742} 1743 1744TEST_F(ComputeTest, compiler_defines) 1745{ 1746 const char *kernel_source = 1747 "__kernel void main_test(__global int* out)\n\ 1748 {\n\ 1749 out[0] = OUT_VAL0;\n\ 1750 out[1] = __OPENCL_C_VERSION__;\n\ 1751 }"; 1752 auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT); 1753 CompileArgs compile_args = { 1, 1, 1 }; 1754 compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" }; 1755 std::vector<RawShaderArg *> raw_args = { &out }; 1756 run_shader({ kernel_source }, compile_args, out); 1757 EXPECT_EQ(out[0], 5); 1758 EXPECT_EQ(out[1], 100); 1759} 1760 1761/* There's a bug in WARP turning atomic_add(ptr, x) into 1762 * atomic_add(ptr, x * 4). Works fine on intel HW. 1763 */ 1764TEST_F(ComputeTest, DISABLED_global_atomic_add) 1765{ 1766 const char *kernel_source = 1767 "__kernel void main_test(__global int *inout, __global int *old)\n\ 1768 {\n\ 1769 old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\ 1770 }\n"; 1771 auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT); 1772 auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT); 1773 const int32_t expected_inout[] = { 5, 7 }; 1774 const int32_t expected_old[] = { 2, 4 }; 1775 run_shader(kernel_source, inout.size(), 1, 1, inout, old); 1776 for (int i = 0; i < inout.size(); ++i) { 1777 EXPECT_EQ(inout[i], expected_inout[i]); 1778 EXPECT_EQ(old[i], expected_old[i]); 1779 } 1780} 1781 1782TEST_F(ComputeTest, global_atomic_imin) 1783{ 1784 const char *kernel_source = 1785 "__kernel void main_test(__global int *inout, __global int *old)\n\ 1786 {\n\ 1787 old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\ 1788 }\n"; 1789 auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT); 1790 auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT); 1791 const int32_t expected_inout[] = { 0, 1, -1 }; 1792 const int32_t expected_old[] = { 0, 2, -1 }; 1793 run_shader(kernel_source, inout.size(), 1, 1, inout, old); 1794 for (int i = 0; i < inout.size(); ++i) { 1795 EXPECT_EQ(inout[i], expected_inout[i]); 1796 EXPECT_EQ(old[i], expected_old[i]); 1797 } 1798} 1799 1800TEST_F(ComputeTest, global_atomic_and_or) 1801{ 1802 const char *kernel_source = 1803 "__attribute__((reqd_work_group_size(3, 1, 1)))\n\ 1804 __kernel void main_test(__global int *inout)\n\ 1805 {\n\ 1806 atomic_and(inout, ~(1 << get_global_id(0)));\n\ 1807 atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\ 1808 }\n"; 1809 auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT); 1810 const int32_t expected[] = { 0x78 }; 1811 run_shader(kernel_source, 3, 1, 1, inout); 1812 for (int i = 0; i < inout.size(); ++i) 1813 EXPECT_EQ(inout[i], expected[i]); 1814} 1815 1816TEST_F(ComputeTest, global_atomic_cmpxchg) 1817{ 1818 const char *kernel_source = 1819 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\ 1820 __kernel void main_test(__global int *inout)\n\ 1821 {\n\ 1822 while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\ 1823 ;\n\ 1824 }\n"; 1825 auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT); 1826 const int32_t expected_inout[] = { 2 }; 1827 run_shader(kernel_source, 2, 1, 1, inout); 1828 for (int i = 0; i < inout.size(); ++i) 1829 EXPECT_EQ(inout[i], expected_inout[i]); 1830} 1831 1832TEST_F(ComputeTest, local_atomic_and_or) 1833{ 1834 const char *kernel_source = 1835 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\ 1836 __kernel void main_test(__global ushort *inout)\n\ 1837 {\n\ 1838 __local ushort tmp;\n\ 1839 atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\ 1840 atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\ 1841 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1842 inout[get_global_id(0)] = tmp;\n\ 1843 }\n"; 1844 auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT); 1845 const uint16_t expected[] = { 0x402, 0x402 }; 1846 run_shader(kernel_source, inout.size(), 1, 1, inout); 1847 for (int i = 0; i < inout.size(); ++i) 1848 EXPECT_EQ(inout[i], expected[i]); 1849} 1850 1851TEST_F(ComputeTest, local_atomic_cmpxchg) 1852{ 1853 const char *kernel_source = 1854 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\ 1855 __kernel void main_test(__global int *out)\n\ 1856 {\n\ 1857 __local uint tmp;\n\ 1858 tmp = 0;\n\ 1859 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1860 while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\ 1861 ;\n\ 1862 barrier(CLK_LOCAL_MEM_FENCE);\n\ 1863 out[0] = tmp;\n\ 1864 }\n"; 1865 1866 auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT); 1867 const uint16_t expected[] = { 2 }; 1868 run_shader(kernel_source, 2, 1, 1, out); 1869 for (int i = 0; i < out.size(); ++i) 1870 EXPECT_EQ(out[i], expected[i]); 1871} 1872 1873TEST_F(ComputeTest, constant_sampler) 1874{ 1875 const char* kernel_source = 1876 "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\ 1877 __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\ 1878 {\n\ 1879 int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\ 1880 float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\ 1881 write_imagef(output, coordsi, \n\ 1882 read_imagef(input, sampler, coordsf) + \n\ 1883 read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\ 1884 }\n"; 1885 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1886 validate(shader); 1887 EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1); 1888} 1889 1890TEST_F(ComputeTest, hi) 1891{ 1892 const char *kernel_source = R"( 1893 __kernel void main_test(__global char3 *srcA, __global char2 *dst) 1894 { 1895 int tid = get_global_id(0); 1896 1897 char2 tmp = srcA[tid].hi; 1898 dst[tid] = tmp; 1899 })"; 1900 Shader shader = compile(std::vector<const char*>({ kernel_source })); 1901 validate(shader); 1902} 1903 1904TEST_F(ComputeTest, system_values) 1905{ 1906 const char *kernel_source = 1907 "__kernel void main_test(__global uint* outputs)\n\ 1908 {\n\ 1909 outputs[0] = get_work_dim();\n\ 1910 outputs[1] = get_global_size(0);\n\ 1911 outputs[2] = get_local_size(0);\n\ 1912 outputs[3] = get_num_groups(0);\n\ 1913 outputs[4] = get_group_id(0);\n\ 1914 outputs[5] = get_global_offset(0);\n\ 1915 outputs[6] = get_global_id(0);\n\ 1916 }\n"; 1917 auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT); 1918 const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, }; 1919 CompileArgs args = { 1, 1, 1 }; 1920 Shader shader = compile({ kernel_source }); 1921 run_shader(shader, args, out); 1922 for (int i = 0; i < out.size(); ++i) 1923 EXPECT_EQ(out[i], expected[i]); 1924 1925 args.work_props.work_dim = 2; 1926 args.work_props.global_offset_x = 100; 1927 args.work_props.group_id_offset_x = 2; 1928 args.work_props.group_count_total_x = 5; 1929 const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 }; 1930 run_shader(shader, args, out); 1931 for (int i = 0; i < out.size(); ++i) 1932 EXPECT_EQ(out[i], expected_withoffsets[i]); 1933} 1934 1935TEST_F(ComputeTest, convert_round_sat) 1936{ 1937 const char *kernel_source = 1938 "__kernel void main_test(__global float *f, __global uchar *u)\n\ 1939 {\n\ 1940 uint idx = get_global_id(0);\n\ 1941 u[idx] = convert_uchar_sat_rtp(f[idx]);\n\ 1942 }\n"; 1943 auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT); 1944 auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT); 1945 const uint8_t expected[] = { 1946 0, 2, 20, 255 1947 }; 1948 1949 run_shader(kernel_source, f.size(), 1, 1, f, u); 1950 for (int i = 0; i < u.size(); ++i) 1951 EXPECT_EQ(u[i], expected[i]); 1952} 1953 1954TEST_F(ComputeTest, convert_round_sat_vec) 1955{ 1956 const char *kernel_source = 1957 "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\ 1958 {\n\ 1959 uint idx = get_global_id(0);\n\ 1960 u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\ 1961 }\n"; 1962 auto f = ShaderArg<float>({ 1963 -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, 1964 -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, 1965 0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, 1966 -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, 1967 }, SHADER_ARG_INPUT); 1968 auto u = ShaderArg<uint8_t>({ 1969 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 1970 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 1971 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 1972 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 1973 }, SHADER_ARG_OUTPUT); 1974 const uint8_t expected[] = { 1975 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 1976 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 1977 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 1978 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 1979 }; 1980 1981 run_shader(kernel_source, 4, 1, 1, f, u); 1982 for (int i = 0; i < u.size(); ++i) 1983 EXPECT_EQ(u[i], expected[i]); 1984} 1985 1986TEST_F(ComputeTest, convert_char2_uchar2) 1987{ 1988 const char *kernel_source = 1989 "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\ 1990 {\n\ 1991 size_t i = get_global_id(0);\n\ 1992 dest[i] = convert_uchar2_sat( src[i] );\n\ 1993 }\n"; 1994 1995 auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT); 1996 auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT); 1997 const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 }; 1998 run_shader(kernel_source, 4, 1, 1, c, u); 1999 for (int i = 0; i < u.size(); i++) 2000 EXPECT_EQ(u[i], expected[i]); 2001} 2002 2003TEST_F(ComputeTest, async_copy) 2004{ 2005 const char *kernel_source = R"( 2006 __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem ) 2007 { 2008 int i; 2009 for(i=0; i<copiesPerWorkItem; i++) 2010 localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0; 2011 barrier( CLK_LOCAL_MEM_FENCE ); 2012 event_t event; 2013 event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 ); 2014 wait_group_events( 1, &event ); 2015 for(i=0; i<copiesPerWorkItem; i++) 2016 dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ]; 2017 })"; 2018 Shader shader = compile({ kernel_source }); 2019 validate(shader); 2020} 2021 2022TEST_F(ComputeTest, packed_struct_global) 2023{ 2024#pragma pack(push, 1) 2025 struct s { uint8_t uc; uint64_t ul; uint16_t us; }; 2026#pragma pack(pop) 2027 2028 const char *kernel_source = 2029 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\ 2030 __kernel void main_test(__global struct s *inout, global uint *size)\n\ 2031 {\n\ 2032 uint idx = get_global_id(0);\n\ 2033 inout[idx].uc = idx + 1;\n\ 2034 inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\ 2035 inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\ 2036 *size = sizeof(struct s);\n\ 2037 }\n"; 2038 auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT); 2039 auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT); 2040 const struct s expected[] = { 2041 { 1, 0xfbfcfdff12345678, 0xa112 } 2042 }; 2043 2044 run_shader(kernel_source, inout.size(), 1, 1, inout, size); 2045 for (int i = 0; i < inout.size(); ++i) { 2046 EXPECT_EQ(inout[i].uc, expected[i].uc); 2047 EXPECT_EQ(inout[i].ul, expected[i].ul); 2048 EXPECT_EQ(inout[i].us, expected[i].us); 2049 } 2050 EXPECT_EQ(size, sizeof(struct s)); 2051} 2052 2053TEST_F(ComputeTest, packed_struct_arg) 2054{ 2055#pragma pack(push, 1) 2056 struct s { uint8_t uc; uint64_t ul; uint16_t us; }; 2057#pragma pack(pop) 2058 2059 const char *kernel_source = 2060 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\ 2061 __kernel void main_test(__global struct s *out, struct s in)\n\ 2062 {\n\ 2063 uint idx = get_global_id(0);\n\ 2064 out[idx].uc = in.uc + 0x12;\n\ 2065 out[idx].ul = in.ul + 0x123456789abcdef;\n\ 2066 out[idx].us = in.us + 0x1234;\n\ 2067 }\n"; 2068 auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT); 2069 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT); 2070 const struct s expected[] = { 2071 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 } 2072 }; 2073 2074 run_shader(kernel_source, out.size(), 1, 1, out, in); 2075 for (int i = 0; i < out.size(); ++i) { 2076 EXPECT_EQ(out[i].uc, expected[i].uc); 2077 EXPECT_EQ(out[i].ul, expected[i].ul); 2078 EXPECT_EQ(out[i].us, expected[i].us); 2079 } 2080} 2081 2082TEST_F(ComputeTest, packed_struct_local) 2083{ 2084#pragma pack(push, 1) 2085 struct s { uint8_t uc; uint64_t ul; uint16_t us; }; 2086#pragma pack(pop) 2087 2088 const char *kernel_source = 2089 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\ 2090 __kernel void main_test(__global struct s *out, __constant struct s *in)\n\ 2091 {\n\ 2092 uint idx = get_global_id(0);\n\ 2093 __local struct s tmp[2];\n\ 2094 tmp[get_local_id(0)] = in[idx];\n\ 2095 barrier(CLK_LOCAL_MEM_FENCE);\n\ 2096 out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\ 2097 }\n"; 2098 auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT); 2099 auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT); 2100 const struct s expected[] = { 2101 { 0x12, 0x123456789abcdef, 0x1234 }, 2102 { 1, 2, 3 }, 2103 }; 2104 2105 run_shader(kernel_source, out.size(), 1, 1, out, in); 2106 for (int i = 0; i < out.size(); ++i) { 2107 EXPECT_EQ(out[i].uc, expected[i].uc); 2108 EXPECT_EQ(out[i].ul, expected[i].ul); 2109 EXPECT_EQ(out[i].us, expected[i].us); 2110 } 2111} 2112 2113/* DISABLED because current release versions of WARP either return 2114 * rubbish from reads or crash: they are not prepared to handle 2115 * non-float global constants */ 2116TEST_F(ComputeTest, DISABLED_packed_struct_const) 2117{ 2118#pragma pack(push, 1) 2119 struct s { uint8_t uc; uint64_t ul; uint16_t us; }; 2120#pragma pack(pop) 2121 2122 const char *kernel_source = 2123 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\ 2124 __kernel void main_test(__global struct s *out, struct s in)\n\ 2125 {\n\ 2126 __constant struct s base[] = {\n\ 2127 {0x12, 0x123456789abcdef, 0x1234},\n\ 2128 {0x11, 0x123456789abcdee, 0x1233},\n\ 2129 };\n\ 2130 uint idx = get_global_id(0);\n\ 2131 out[idx].uc = base[idx % 2].uc + in.uc;\n\ 2132 out[idx].ul = base[idx % 2].ul + in.ul;\n\ 2133 out[idx].us = base[idx % 2].us + in.us;\n\ 2134 }\n"; 2135 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT); 2136 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT); 2137 const struct s expected[] = { 2138 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }, 2139 { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 }, 2140 }; 2141 2142 run_shader(kernel_source, out.size(), 1, 1, out, in); 2143 for (int i = 0; i < out.size(); ++i) { 2144 EXPECT_EQ(out[i].uc, expected[i].uc); 2145 EXPECT_EQ(out[i].ul, expected[i].ul); 2146 EXPECT_EQ(out[i].us, expected[i].us); 2147 } 2148} 2149 2150TEST_F(ComputeTest, DISABLED_printf) 2151{ 2152 const char *kernel_source = R"( 2153 __kernel void main_test(__global float *src, __global uint *dest) 2154 { 2155 __constant char *format_str = "%s: %f"; 2156 __constant char *str_val = "Test"; 2157 *dest = printf(format_str, str_val, src[0]); 2158 })"; 2159 2160 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT); 2161 auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT); 2162 run_shader(kernel_source, 1, 1, 1, src, dest); 2163 EXPECT_EQ(dest[0], 0); 2164} 2165 2166TEST_F(ComputeTest, vload_half) 2167{ 2168 const char *kernel_source = R"( 2169 __kernel void main_test(__global half *src, __global float4 *dest) 2170 { 2171 int offset = get_global_id(0); 2172 dest[offset] = vload_half4(offset, src); 2173 })"; 2174 auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400, 2175 0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT); 2176 auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 2177 FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT); 2178 run_shader(kernel_source, 2, 1, 1, src, dest); 2179 for (unsigned i = 0; i < 8; ++i) 2180 EXPECT_FLOAT_EQ(dest[i], (float)(i + 1)); 2181} 2182 2183TEST_F(ComputeTest, vstore_half) 2184{ 2185 const char *kernel_source = R"( 2186 __kernel void main_test(__global half *dst, __global float4 *src) 2187 { 2188 int offset = get_global_id(0); 2189 vstore_half4(src[offset], offset, dst); 2190 })"; 2191 auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead, 2192 0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT); 2193 auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0, 2194 5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT); 2195 run_shader(kernel_source, 2, 1, 1, dest, src); 2196 const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400, 2197 0x4500, 0x4600, 0x4700, 0x4800 }; 2198 for (unsigned i = 0; i < 8; ++i) 2199 EXPECT_EQ(dest[i], expected[i]); 2200} 2201 2202TEST_F(ComputeTest, inline_function) 2203{ 2204 const char *kernel_source = R"( 2205 inline float helper(float foo) 2206 { 2207 return foo * 2; 2208 } 2209 2210 __kernel void main_test(__global float *dst, __global float *src) 2211 { 2212 *dst = helper(*src); 2213 })"; 2214 auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT); 2215 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT); 2216 run_shader(kernel_source, 1, 1, 1, dest, src); 2217 EXPECT_EQ(dest[0], 2.0f); 2218} 2219 2220TEST_F(ComputeTest, unused_arg) 2221{ 2222 const char *kernel_source = R"( 2223 __kernel void main_test(__global int *dst, __global int *unused, __global int *src) 2224 { 2225 int i = get_global_id(0); 2226 dst[i] = src[i]; 2227 })"; 2228 auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT); 2229 auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT); 2230 auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT); 2231 run_shader(kernel_source, 4, 1, 1, dest, unused, src); 2232 for (int i = 0; i < 4; ++i) 2233 EXPECT_EQ(dest[i], i + 1); 2234} 2235 2236TEST_F(ComputeTest, spec_constant) 2237{ 2238 const char *spirv_asm = R"( 2239 OpCapability Addresses 2240 OpCapability Kernel 2241 OpCapability Int64 2242 %1 = OpExtInstImport "OpenCL.std" 2243 OpMemoryModel Physical64 OpenCL 2244 OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId 2245 %4 = OpString "kernel_arg_type.main_test.uint*," 2246 OpSource OpenCL_C 102000 2247 OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId" 2248 OpName %output "output" 2249 OpName %entry "entry" 2250 OpName %output_addr "output.addr" 2251 OpName %id "id" 2252 OpName %call "call" 2253 OpName %conv "conv" 2254 OpName %idxprom "idxprom" 2255 OpName %arrayidx "arrayidx" 2256 OpName %add "add" 2257 OpName %mul "mul" 2258 OpName %idxprom1 "idxprom1" 2259 OpName %arrayidx2 "arrayidx2" 2260 OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId 2261 OpDecorate %__spirv_BuiltInGlobalInvocationId Constant 2262 OpDecorate %id Alignment 4 2263 OpDecorate %output_addr Alignment 8 2264 OpDecorate %uint_1 SpecId 1 2265 %ulong = OpTypeInt 64 0 2266 %uint = OpTypeInt 32 0 2267 %uint_1 = OpSpecConstant %uint 1 2268 %v3ulong = OpTypeVector %ulong 3 2269%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong 2270 %void = OpTypeVoid 2271%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint 2272 %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint 2273%_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint 2274%_ptr_Function_uint = OpTypePointer Function %uint 2275%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input 2276 %2 = OpFunction %void DontInline %24 2277 %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint 2278 %entry = OpLabel 2279%output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function 2280 %id = OpVariable %_ptr_Function_uint Function 2281 OpStore %output_addr %output Aligned 8 2282 %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32 2283 %call = OpCompositeExtract %ulong %27 0 2284 %conv = OpUConvert %uint %call 2285 OpStore %id %conv Aligned 4 2286 %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8 2287 %29 = OpLoad %uint %id Aligned 4 2288 %idxprom = OpUConvert %ulong %29 2289 %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom 2290 %30 = OpLoad %uint %arrayidx Aligned 4 2291 %31 = OpLoad %uint %id Aligned 4 2292 %add = OpIAdd %uint %31 %uint_1 2293 %mul = OpIMul %uint %30 %add 2294 %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8 2295 %33 = OpLoad %uint %id Aligned 4 2296 %idxprom1 = OpUConvert %ulong %33 2297 %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1 2298 OpStore %arrayidx2 %mul Aligned 4 2299 OpReturn 2300 OpFunctionEnd)"; 2301 Shader shader = assemble(spirv_asm); 2302 Shader spec_shader = specialize(shader, 1, 5); 2303 2304 auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 }, 2305 SHADER_ARG_INOUT); 2306 const uint32_t expected[] = { 2307 0x00000005, 0x60000006, 0x000e000e, 0x20081018 2308 }; 2309 CompileArgs args = { inout.size(), 1, 1 }; 2310 run_shader(spec_shader, args, inout); 2311 for (int i = 0; i < inout.size(); ++i) 2312 EXPECT_EQ(inout[i], expected[i]); 2313} 2314