compute.c revision 848b8605
1/* 2 * Copyright (C) 2011 Francisco Jerez. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27#include <fcntl.h> 28#include <stdio.h> 29#include <sys/stat.h> 30#include <inttypes.h> 31#include "pipe/p_state.h" 32#include "pipe/p_context.h" 33#include "pipe/p_screen.h" 34#include "pipe/p_defines.h" 35#include "pipe/p_shader_tokens.h" 36#include "util/u_memory.h" 37#include "util/u_inlines.h" 38#include "util/u_sampler.h" 39#include "util/u_format.h" 40#include "tgsi/tgsi_text.h" 41#include "pipe-loader/pipe_loader.h" 42 43#define MAX_RESOURCES 4 44 45struct context { 46 struct pipe_loader_device *dev; 47 struct pipe_screen *screen; 48 struct pipe_context *pipe; 49 void *hwcs; 50 void *hwsmp[MAX_RESOURCES]; 51 struct pipe_resource *tex[MAX_RESOURCES]; 52 bool tex_rw[MAX_RESOURCES]; 53 struct pipe_sampler_view *view[MAX_RESOURCES]; 54 struct pipe_surface *surf[MAX_RESOURCES]; 55}; 56 57#define DUMP_COMPUTE_PARAM(p, c) do { \ 58 uint64_t __v[4]; \ 59 int __i, __n; \ 60 \ 61 __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \ 62 printf("%s: {", #c); \ 63 \ 64 for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ 65 printf(" %"PRIu64, __v[__i]); \ 66 \ 67 printf(" }\n"); \ 68 } while (0) 69 70static void init_ctx(struct context *ctx) 71{ 72 int ret; 73 74 ret = pipe_loader_probe(&ctx->dev, 1); 75 assert(ret); 76 77 ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); 78 assert(ctx->screen); 79 80 ctx->pipe = ctx->screen->context_create(ctx->screen, NULL); 81 assert(ctx->pipe); 82 83 DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION); 84 DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE); 85 DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); 86} 87 88static void destroy_ctx(struct context *ctx) 89{ 90 ctx->pipe->destroy(ctx->pipe); 91 ctx->screen->destroy(ctx->screen); 92 pipe_loader_release(&ctx->dev, 1); 93 FREE(ctx); 94} 95 96static char * 97preprocess_prog(struct context *ctx, const char *src, const char *defs) 98{ 99 const char header[] = 100 "#define RGLOBAL RES[32767]\n" 101 "#define RLOCAL RES[32766]\n" 102 "#define RPRIVATE RES[32765]\n" 103 "#define RINPUT RES[32764]\n"; 104 char cmd[512]; 105 char tmp[] = "/tmp/test-compute.tgsi-XXXXXX"; 106 char *buf; 107 int fd, ret; 108 struct stat st; 109 FILE *p; 110 111 /* Open a temporary file */ 112 fd = mkstemp(tmp); 113 assert(fd >= 0); 114 snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s", 115 defs ? defs : "", tmp); 116 117 /* Preprocess */ 118 p = popen(cmd, "w"); 119 fwrite(header, strlen(header), 1, p); 120 fwrite(src, strlen(src), 1, p); 121 ret = pclose(p); 122 assert(!ret); 123 124 /* Read back */ 125 ret = fstat(fd, &st); 126 assert(!ret); 127 128 buf = malloc(st.st_size + 1); 129 ret = read(fd, buf, st.st_size); 130 assert(ret == st.st_size); 131 buf[ret] = 0; 132 133 /* Clean up */ 134 close(fd); 135 unlink(tmp); 136 137 return buf; 138} 139 140static void init_prog(struct context *ctx, unsigned local_sz, 141 unsigned private_sz, unsigned input_sz, 142 const char *src, const char *defs) 143{ 144 struct pipe_context *pipe = ctx->pipe; 145 struct tgsi_token prog[1024]; 146 struct pipe_compute_state cs = { 147 .prog = prog, 148 .req_local_mem = local_sz, 149 .req_private_mem = private_sz, 150 .req_input_mem = input_sz 151 }; 152 char *psrc = preprocess_prog(ctx, src, defs); 153 int ret; 154 155 ret = tgsi_text_translate(psrc, prog, Elements(prog)); 156 assert(ret); 157 free(psrc); 158 159 ctx->hwcs = pipe->create_compute_state(pipe, &cs); 160 assert(ctx->hwcs); 161 162 pipe->bind_compute_state(pipe, ctx->hwcs); 163} 164 165static void destroy_prog(struct context *ctx) 166{ 167 struct pipe_context *pipe = ctx->pipe; 168 169 pipe->delete_compute_state(pipe, ctx->hwcs); 170 ctx->hwcs = NULL; 171} 172 173static void init_tex(struct context *ctx, int slot, 174 enum pipe_texture_target target, bool rw, 175 enum pipe_format format, int w, int h, 176 void (*init)(void *, int, int, int)) 177{ 178 struct pipe_context *pipe = ctx->pipe; 179 struct pipe_resource **tex = &ctx->tex[slot]; 180 struct pipe_resource ttex = { 181 .target = target, 182 .format = format, 183 .width0 = w, 184 .height0 = h, 185 .depth0 = 1, 186 .array_size = 1, 187 .bind = (PIPE_BIND_SAMPLER_VIEW | 188 PIPE_BIND_COMPUTE_RESOURCE | 189 PIPE_BIND_GLOBAL) 190 }; 191 int dx = util_format_get_blocksize(format); 192 int dy = util_format_get_stride(format, w); 193 int nx = (target == PIPE_BUFFER ? (w / dx) : 194 util_format_get_nblocksx(format, w)); 195 int ny = (target == PIPE_BUFFER ? 1 : 196 util_format_get_nblocksy(format, h)); 197 struct pipe_transfer *xfer; 198 char *map; 199 int x, y; 200 201 *tex = ctx->screen->resource_create(ctx->screen, &ttex); 202 assert(*tex); 203 204 map = pipe->transfer_map(pipe, *tex, 0, PIPE_TRANSFER_WRITE, 205 &(struct pipe_box) { .width = w, 206 .height = h, 207 .depth = 1 }, &xfer); 208 assert(xfer); 209 assert(map); 210 211 for (y = 0; y < ny; ++y) { 212 for (x = 0; x < nx; ++x) { 213 init(map + y * dy + x * dx, slot, x, y); 214 } 215 } 216 217 pipe->transfer_unmap(pipe, xfer); 218 219 ctx->tex_rw[slot] = rw; 220} 221 222static bool default_check(void *x, void *y, int sz) { 223 return !memcmp(x, y, sz); 224} 225 226static void check_tex(struct context *ctx, int slot, 227 void (*expect)(void *, int, int, int), 228 bool (*check)(void *, void *, int)) 229{ 230 struct pipe_context *pipe = ctx->pipe; 231 struct pipe_resource *tex = ctx->tex[slot]; 232 int dx = util_format_get_blocksize(tex->format); 233 int dy = util_format_get_stride(tex->format, tex->width0); 234 int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) : 235 util_format_get_nblocksx(tex->format, tex->width0)); 236 int ny = (tex->target == PIPE_BUFFER ? 1 : 237 util_format_get_nblocksy(tex->format, tex->height0)); 238 struct pipe_transfer *xfer; 239 char *map; 240 int x, y, i; 241 int err = 0; 242 243 if (!check) 244 check = default_check; 245 246 map = pipe->transfer_map(pipe, tex, 0, PIPE_TRANSFER_READ, 247 &(struct pipe_box) { .width = tex->width0, 248 .height = tex->height0, 249 .depth = 1 }, &xfer); 250 assert(xfer); 251 assert(map); 252 253 for (y = 0; y < ny; ++y) { 254 for (x = 0; x < nx; ++x) { 255 uint32_t exp[4]; 256 uint32_t *res = (uint32_t *)(map + y * dy + x * dx); 257 258 expect(exp, slot, x, y); 259 if (check(res, exp, dx) || (++err) > 20) 260 continue; 261 262 if (dx < 4) { 263 uint32_t u = 0, v = 0; 264 265 for (i = 0; i < dx; i++) { 266 u |= ((uint8_t *)exp)[i] << (8 * i); 267 v |= ((uint8_t *)res)[i] << (8 * i); 268 } 269 printf("(%d, %d): got 0x%x, expected 0x%x\n", 270 x, y, v, u); 271 } else { 272 for (i = 0; i < dx / 4; i++) { 273 printf("(%d, %d)[%d]: got 0x%x/%f," 274 " expected 0x%x/%f\n", x, y, i, 275 res[i], ((float *)res)[i], 276 exp[i], ((float *)exp)[i]); 277 } 278 } 279 } 280 } 281 282 pipe->transfer_unmap(pipe, xfer); 283 284 if (err) 285 printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err); 286 else 287 printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y); 288} 289 290static void destroy_tex(struct context *ctx) 291{ 292 int i; 293 294 for (i = 0; i < MAX_RESOURCES; ++i) { 295 if (ctx->tex[i]) 296 pipe_resource_reference(&ctx->tex[i], NULL); 297 } 298} 299 300static void init_sampler_views(struct context *ctx, const int *slots) 301{ 302 struct pipe_context *pipe = ctx->pipe; 303 struct pipe_sampler_view tview; 304 int i; 305 306 for (i = 0; *slots >= 0; ++i, ++slots) { 307 u_sampler_view_default_template(&tview, ctx->tex[*slots], 308 ctx->tex[*slots]->format); 309 310 ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots], 311 &tview); 312 assert(ctx->view[i]); 313 } 314 315 pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->view); 316} 317 318static void destroy_sampler_views(struct context *ctx) 319{ 320 struct pipe_context *pipe = ctx->pipe; 321 int i; 322 323 pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, MAX_RESOURCES, NULL); 324 325 for (i = 0; i < MAX_RESOURCES; ++i) { 326 if (ctx->view[i]) { 327 pipe->sampler_view_destroy(pipe, ctx->view[i]); 328 ctx->view[i] = NULL; 329 } 330 } 331} 332 333static void init_compute_resources(struct context *ctx, const int *slots) 334{ 335 struct pipe_context *pipe = ctx->pipe; 336 int i; 337 338 for (i = 0; *slots >= 0; ++i, ++slots) { 339 struct pipe_surface tsurf = { 340 .format = ctx->tex[*slots]->format, 341 .writable = ctx->tex_rw[*slots] 342 }; 343 344 if (ctx->tex[*slots]->target == PIPE_BUFFER) 345 tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1; 346 347 ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots], 348 &tsurf); 349 assert(ctx->surf[i]); 350 } 351 352 pipe->set_compute_resources(pipe, 0, i, ctx->surf); 353} 354 355static void destroy_compute_resources(struct context *ctx) 356{ 357 struct pipe_context *pipe = ctx->pipe; 358 int i; 359 360 pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL); 361 362 for (i = 0; i < MAX_RESOURCES; ++i) { 363 if (ctx->surf[i]) { 364 pipe->surface_destroy(pipe, ctx->surf[i]); 365 ctx->surf[i] = NULL; 366 } 367 } 368} 369 370static void init_sampler_states(struct context *ctx, int n) 371{ 372 struct pipe_context *pipe = ctx->pipe; 373 struct pipe_sampler_state smp = { 374 .normalized_coords = 1, 375 }; 376 int i; 377 378 for (i = 0; i < n; ++i) { 379 ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp); 380 assert(ctx->hwsmp[i]); 381 } 382 383 pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp); 384} 385 386static void destroy_sampler_states(struct context *ctx) 387{ 388 struct pipe_context *pipe = ctx->pipe; 389 int i; 390 391 pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 392 0, MAX_RESOURCES, NULL); 393 394 for (i = 0; i < MAX_RESOURCES; ++i) { 395 if (ctx->hwsmp[i]) { 396 pipe->delete_sampler_state(pipe, ctx->hwsmp[i]); 397 ctx->hwsmp[i] = NULL; 398 } 399 } 400} 401 402static void init_globals(struct context *ctx, const int *slots, 403 uint32_t **handles) 404{ 405 struct pipe_context *pipe = ctx->pipe; 406 struct pipe_resource *res[MAX_RESOURCES]; 407 int i; 408 409 for (i = 0; *slots >= 0; ++i, ++slots) 410 res[i] = ctx->tex[*slots]; 411 412 pipe->set_global_binding(pipe, 0, i, res, handles); 413} 414 415static void destroy_globals(struct context *ctx) 416{ 417 struct pipe_context *pipe = ctx->pipe; 418 419 pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL); 420} 421 422static void launch_grid(struct context *ctx, const uint *block_layout, 423 const uint *grid_layout, uint32_t pc, 424 const void *input) 425{ 426 struct pipe_context *pipe = ctx->pipe; 427 428 pipe->launch_grid(pipe, block_layout, grid_layout, pc, input); 429} 430 431static void test_system_values(struct context *ctx) 432{ 433 const char *src = "COMP\n" 434 "DCL RES[0], BUFFER, RAW, WR\n" 435 "DCL SV[0], BLOCK_ID[0]\n" 436 "DCL SV[1], BLOCK_SIZE[0]\n" 437 "DCL SV[2], GRID_SIZE[0]\n" 438 "DCL SV[3], THREAD_ID[0]\n" 439 "DCL TEMP[0], LOCAL\n" 440 "DCL TEMP[1], LOCAL\n" 441 "IMM UINT32 { 64, 0, 0, 0 }\n" 442 "IMM UINT32 { 16, 0, 0, 0 }\n" 443 "IMM UINT32 { 0, 0, 0, 0 }\n" 444 "\n" 445 "BGNSUB" 446 " UMUL TEMP[0], SV[0], SV[1]\n" 447 " UADD TEMP[0], TEMP[0], SV[3]\n" 448 " UMUL TEMP[1], SV[1], SV[2]\n" 449 " UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n" 450 " UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n" 451 " UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n" 452 " UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n" 453 " UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n" 454 " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" 455 " STORE RES[0].xyzw, TEMP[0], SV[0]\n" 456 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 457 " STORE RES[0].xyzw, TEMP[0], SV[1]\n" 458 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 459 " STORE RES[0].xyzw, TEMP[0], SV[2]\n" 460 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 461 " STORE RES[0].xyzw, TEMP[0], SV[3]\n" 462 " RET\n" 463 "ENDSUB\n"; 464 void init(void *p, int s, int x, int y) { 465 *(uint32_t *)p = 0xdeadbeef; 466 } 467 void expect(void *p, int s, int x, int y) { 468 int id = x / 16, sv = (x % 16) / 4, c = x % 4; 469 int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 }; 470 int bsz[] = { 4, 3, 5, 1}; 471 int gsz[] = { 5, 4, 1, 1}; 472 473 switch (sv) { 474 case 0: 475 *(uint32_t *)p = tid[c] / bsz[c]; 476 break; 477 case 1: 478 *(uint32_t *)p = bsz[c]; 479 break; 480 case 2: 481 *(uint32_t *)p = gsz[c]; 482 break; 483 case 3: 484 *(uint32_t *)p = tid[c] % bsz[c]; 485 break; 486 } 487 } 488 489 printf("- %s\n", __func__); 490 491 init_prog(ctx, 0, 0, 0, src, NULL); 492 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 493 76800, 0, init); 494 init_compute_resources(ctx, (int []) { 0, -1 }); 495 launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL); 496 check_tex(ctx, 0, expect, NULL); 497 destroy_compute_resources(ctx); 498 destroy_tex(ctx); 499 destroy_prog(ctx); 500} 501 502static void test_resource_access(struct context *ctx) 503{ 504 const char *src = "COMP\n" 505 "DCL RES[0], BUFFER, RAW, WR\n" 506 "DCL RES[1], 2D, RAW, WR\n" 507 "DCL SV[0], BLOCK_ID[0]\n" 508 "DCL TEMP[0], LOCAL\n" 509 "DCL TEMP[1], LOCAL\n" 510 "IMM UINT32 { 15, 0, 0, 0 }\n" 511 "IMM UINT32 { 16, 1, 0, 0 }\n" 512 "\n" 513 " BGNSUB\n" 514 " UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n" 515 " AND TEMP[0].x, TEMP[0], IMM[0]\n" 516 " UMUL TEMP[0].x, TEMP[0], IMM[1]\n" 517 " LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n" 518 " UMUL TEMP[1], SV[0], IMM[1]\n" 519 " STORE RES[1].xyzw, TEMP[1], TEMP[0]\n" 520 " RET\n" 521 " ENDSUB\n"; 522 void init0(void *p, int s, int x, int y) { 523 *(float *)p = 8.0 - (float)x; 524 } 525 void init1(void *p, int s, int x, int y) { 526 *(uint32_t *)p = 0xdeadbeef; 527 } 528 void expect(void *p, int s, int x, int y) { 529 *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f); 530 } 531 532 printf("- %s\n", __func__); 533 534 init_prog(ctx, 0, 0, 0, src, NULL); 535 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 536 256, 0, init0); 537 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 538 60, 12, init1); 539 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 540 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL); 541 check_tex(ctx, 1, expect, NULL); 542 destroy_compute_resources(ctx); 543 destroy_tex(ctx); 544 destroy_prog(ctx); 545} 546 547static void test_function_calls(struct context *ctx) 548{ 549 const char *src = "COMP\n" 550 "DCL RES[0], 2D, RAW, WR\n" 551 "DCL SV[0], BLOCK_ID[0]\n" 552 "DCL SV[1], BLOCK_SIZE[0]\n" 553 "DCL SV[2], GRID_SIZE[0]\n" 554 "DCL SV[3], THREAD_ID[0]\n" 555 "DCL TEMP[0]\n" 556 "DCL TEMP[1]\n" 557 "DCL TEMP[2], LOCAL\n" 558 "IMM UINT32 { 0, 11, 22, 33 }\n" 559 "IMM FLT32 { 11, 33, 55, 99 }\n" 560 "IMM UINT32 { 4, 1, 0, 0 }\n" 561 "IMM UINT32 { 12, 0, 0, 0 }\n" 562 "\n" 563 "00: BGNSUB\n" 564 "01: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" 565 "02: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" 566 "03: USLT TEMP[0].x, TEMP[0], IMM[0]\n" 567 "04: RET\n" 568 "05: ENDSUB\n" 569 "06: BGNSUB\n" 570 "07: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" 571 "08: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" 572 "09: USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n" 573 "10: IF TEMP[0].xxxx\n" 574 "11: CAL :0\n" 575 "12: ENDIF\n" 576 "13: RET\n" 577 "14: ENDSUB\n" 578 "15: BGNSUB\n" 579 "16: UMUL TEMP[2], SV[0], SV[1]\n" 580 "17: UADD TEMP[2], TEMP[2], SV[3]\n" 581 "18: UMUL TEMP[2], TEMP[2], IMM[2]\n" 582 "00: MOV TEMP[1].x, IMM[2].wwww\n" 583 "19: LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n" 584 "20: CAL :6\n" 585 "21: STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n" 586 "22: RET\n" 587 "23: ENDSUB\n"; 588 void init(void *p, int s, int x, int y) { 589 *(uint32_t *)p = 15 * y + x; 590 } 591 void expect(void *p, int s, int x, int y) { 592 *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ; 593 } 594 595 printf("- %s\n", __func__); 596 597 init_prog(ctx, 0, 0, 0, src, NULL); 598 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 599 15, 12, init); 600 init_compute_resources(ctx, (int []) { 0, -1 }); 601 launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL); 602 check_tex(ctx, 0, expect, NULL); 603 destroy_compute_resources(ctx); 604 destroy_tex(ctx); 605 destroy_prog(ctx); 606} 607 608static void test_input_global(struct context *ctx) 609{ 610 const char *src = "COMP\n" 611 "DCL SV[0], THREAD_ID[0]\n" 612 "DCL TEMP[0], LOCAL\n" 613 "DCL TEMP[1], LOCAL\n" 614 "IMM UINT32 { 8, 0, 0, 0 }\n" 615 "\n" 616 " BGNSUB\n" 617 " UMUL TEMP[0], SV[0], IMM[0]\n" 618 " LOAD TEMP[1].xy, RINPUT, TEMP[0]\n" 619 " LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n" 620 " UADD TEMP[1].x, TEMP[0], -TEMP[1]\n" 621 " STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n" 622 " RET\n" 623 " ENDSUB\n"; 624 void init(void *p, int s, int x, int y) { 625 *(uint32_t *)p = 0xdeadbeef; 626 } 627 void expect(void *p, int s, int x, int y) { 628 *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0); 629 } 630 uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004, 631 0x10005, 0x10006, 0x10007, 0x10008 }; 632 633 printf("- %s\n", __func__); 634 635 init_prog(ctx, 0, 0, 32, src, NULL); 636 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 637 init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 638 init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 639 init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); 640 init_globals(ctx, (int []){ 0, 1, 2, 3, -1 }, 641 (uint32_t *[]){ &input[1], &input[3], 642 &input[5], &input[7] }); 643 launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input); 644 check_tex(ctx, 0, expect, NULL); 645 check_tex(ctx, 1, expect, NULL); 646 check_tex(ctx, 2, expect, NULL); 647 check_tex(ctx, 3, expect, NULL); 648 destroy_globals(ctx); 649 destroy_tex(ctx); 650 destroy_prog(ctx); 651} 652 653static void test_private(struct context *ctx) 654{ 655 const char *src = "COMP\n" 656 "DCL RES[0], BUFFER, RAW, WR\n" 657 "DCL SV[0], BLOCK_ID[0]\n" 658 "DCL SV[1], BLOCK_SIZE[0]\n" 659 "DCL SV[2], THREAD_ID[0]\n" 660 "DCL TEMP[0], LOCAL\n" 661 "DCL TEMP[1], LOCAL\n" 662 "DCL TEMP[2], LOCAL\n" 663 "IMM UINT32 { 128, 0, 0, 0 }\n" 664 "IMM UINT32 { 4, 0, 0, 0 }\n" 665 "\n" 666 " BGNSUB\n" 667 " UMUL TEMP[0].x, SV[0], SV[1]\n" 668 " UADD TEMP[0].x, TEMP[0], SV[2]\n" 669 " MOV TEMP[1].x, IMM[0].wwww\n" 670 " BGNLOOP\n" 671 " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" 672 " IF TEMP[2]\n" 673 " BRK\n" 674 " ENDIF\n" 675 " UDIV TEMP[2].x, TEMP[1], IMM[1]\n" 676 " UADD TEMP[2].x, TEMP[2], TEMP[0]\n" 677 " STORE RPRIVATE.x, TEMP[1], TEMP[2]\n" 678 " UADD TEMP[1].x, TEMP[1], IMM[1]\n" 679 " ENDLOOP\n" 680 " MOV TEMP[1].x, IMM[0].wwww\n" 681 " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" 682 " BGNLOOP\n" 683 " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" 684 " IF TEMP[2]\n" 685 " BRK\n" 686 " ENDIF\n" 687 " LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n" 688 " STORE RES[0].x, TEMP[0], TEMP[2]\n" 689 " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 690 " UADD TEMP[1].x, TEMP[1], IMM[1]\n" 691 " ENDLOOP\n" 692 " RET\n" 693 " ENDSUB\n"; 694 void init(void *p, int s, int x, int y) { 695 *(uint32_t *)p = 0xdeadbeef; 696 } 697 void expect(void *p, int s, int x, int y) { 698 *(uint32_t *)p = (x / 32) + x % 32; 699 } 700 701 printf("- %s\n", __func__); 702 703 init_prog(ctx, 0, 128, 0, src, NULL); 704 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 705 32768, 0, init); 706 init_compute_resources(ctx, (int []) { 0, -1 }); 707 launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 708 check_tex(ctx, 0, expect, NULL); 709 destroy_compute_resources(ctx); 710 destroy_tex(ctx); 711 destroy_prog(ctx); 712} 713 714static void test_local(struct context *ctx) 715{ 716 const char *src = "COMP\n" 717 "DCL RES[0], BUFFER, RAW, WR\n" 718 "DCL SV[0], BLOCK_ID[0]\n" 719 "DCL SV[1], BLOCK_SIZE[0]\n" 720 "DCL SV[2], THREAD_ID[0]\n" 721 "DCL TEMP[0], LOCAL\n" 722 "DCL TEMP[1], LOCAL\n" 723 "DCL TEMP[2], LOCAL\n" 724 "IMM UINT32 { 1, 0, 0, 0 }\n" 725 "IMM UINT32 { 2, 0, 0, 0 }\n" 726 "IMM UINT32 { 4, 0, 0, 0 }\n" 727 "IMM UINT32 { 32, 0, 0, 0 }\n" 728 "IMM UINT32 { 128, 0, 0, 0 }\n" 729 "\n" 730 " BGNSUB\n" 731 " UMUL TEMP[0].x, SV[2], IMM[2]\n" 732 " STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n" 733 " MFENCE RLOCAL\n" 734 " USLT TEMP[1].x, SV[2], IMM[3]\n" 735 " IF TEMP[1]\n" 736 " UADD TEMP[1].x, TEMP[0], IMM[4]\n" 737 " BGNLOOP\n" 738 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 739 " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" 740 " IF TEMP[2]\n" 741 " BRK\n" 742 " ENDIF\n" 743 " ENDLOOP\n" 744 " STORE RLOCAL.x, TEMP[0], IMM[0]\n" 745 " MFENCE RLOCAL\n" 746 " BGNLOOP\n" 747 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 748 " USEQ TEMP[2].x, TEMP[2], IMM[1]\n" 749 " IF TEMP[2]\n" 750 " BRK\n" 751 " ENDIF\n" 752 " ENDLOOP\n" 753 " ELSE\n" 754 " UADD TEMP[1].x, TEMP[0], -IMM[4]\n" 755 " BGNLOOP\n" 756 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 757 " USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n" 758 " IF TEMP[2]\n" 759 " BRK\n" 760 " ENDIF\n" 761 " ENDLOOP\n" 762 " STORE RLOCAL.x, TEMP[0], IMM[0]\n" 763 " MFENCE RLOCAL\n" 764 " BGNLOOP\n" 765 " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 766 " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" 767 " IF TEMP[2]\n" 768 " BRK\n" 769 " ENDIF\n" 770 " ENDLOOP\n" 771 " STORE RLOCAL.x, TEMP[0], IMM[1]\n" 772 " MFENCE RLOCAL\n" 773 " ENDIF\n" 774 " UMUL TEMP[1].x, SV[0], SV[1]\n" 775 " UMUL TEMP[1].x, TEMP[1], IMM[2]\n" 776 " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" 777 " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" 778 " STORE RES[0].x, TEMP[1], TEMP[0]\n" 779 " RET\n" 780 " ENDSUB\n"; 781 void init(void *p, int s, int x, int y) { 782 *(uint32_t *)p = 0xdeadbeef; 783 } 784 void expect(void *p, int s, int x, int y) { 785 *(uint32_t *)p = x & 0x20 ? 2 : 1; 786 } 787 788 printf("- %s\n", __func__); 789 790 init_prog(ctx, 256, 0, 0, src, NULL); 791 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 792 4096, 0, init); 793 init_compute_resources(ctx, (int []) { 0, -1 }); 794 launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 795 check_tex(ctx, 0, expect, NULL); 796 destroy_compute_resources(ctx); 797 destroy_tex(ctx); 798 destroy_prog(ctx); 799} 800 801static void test_sample(struct context *ctx) 802{ 803 const char *src = "COMP\n" 804 "DCL SVIEW[0], 2D, FLOAT\n" 805 "DCL RES[0], 2D, RAW, WR\n" 806 "DCL SAMP[0]\n" 807 "DCL SV[0], BLOCK_ID[0]\n" 808 "DCL TEMP[0], LOCAL\n" 809 "DCL TEMP[1], LOCAL\n" 810 "IMM UINT32 { 16, 1, 0, 0 }\n" 811 "IMM FLT32 { 128, 32, 0, 0 }\n" 812 "\n" 813 " BGNSUB\n" 814 " I2F TEMP[1], SV[0]\n" 815 " DIV TEMP[1], TEMP[1], IMM[1]\n" 816 " SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n" 817 " UMUL TEMP[0], SV[0], IMM[0]\n" 818 " STORE RES[0].xyzw, TEMP[0], TEMP[1]\n" 819 " RET\n" 820 " ENDSUB\n"; 821 void init(void *p, int s, int x, int y) { 822 *(float *)p = s ? 1 : x * y; 823 } 824 void expect(void *p, int s, int x, int y) { 825 switch (x % 4) { 826 case 0: 827 *(float *)p = x / 4 * y; 828 break; 829 case 1: 830 case 2: 831 *(float *)p = 0; 832 break; 833 case 3: 834 *(float *)p = 1; 835 break; 836 } 837 } 838 839 printf("- %s\n", __func__); 840 841 init_prog(ctx, 0, 0, 0, src, NULL); 842 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 843 128, 32, init); 844 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 845 512, 32, init); 846 init_compute_resources(ctx, (int []) { 1, -1 }); 847 init_sampler_views(ctx, (int []) { 0, -1 }); 848 init_sampler_states(ctx, 2); 849 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL); 850 check_tex(ctx, 1, expect, NULL); 851 destroy_sampler_states(ctx); 852 destroy_sampler_views(ctx); 853 destroy_compute_resources(ctx); 854 destroy_tex(ctx); 855 destroy_prog(ctx); 856} 857 858static void test_many_kern(struct context *ctx) 859{ 860 const char *src = "COMP\n" 861 "DCL RES[0], BUFFER, RAW, WR\n" 862 "DCL TEMP[0], LOCAL\n" 863 "IMM UINT32 { 0, 1, 2, 3 }\n" 864 "IMM UINT32 { 4, 0, 0, 0 }\n" 865 "\n" 866 " BGNSUB\n" 867 " UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n" 868 " STORE RES[0].x, TEMP[0], IMM[0].xxxx\n" 869 " RET\n" 870 " ENDSUB\n" 871 " BGNSUB\n" 872 " UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n" 873 " STORE RES[0].x, TEMP[0], IMM[0].yyyy\n" 874 " RET\n" 875 " ENDSUB\n" 876 " BGNSUB\n" 877 " UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n" 878 " STORE RES[0].x, TEMP[0], IMM[0].zzzz\n" 879 " RET\n" 880 " ENDSUB\n" 881 " BGNSUB\n" 882 " UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n" 883 " STORE RES[0].x, TEMP[0], IMM[0].wwww\n" 884 " RET\n" 885 " ENDSUB\n"; 886 void init(void *p, int s, int x, int y) { 887 *(uint32_t *)p = 0xdeadbeef; 888 } 889 void expect(void *p, int s, int x, int y) { 890 *(uint32_t *)p = x; 891 } 892 893 printf("- %s\n", __func__); 894 895 init_prog(ctx, 0, 0, 0, src, NULL); 896 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 897 16, 0, init); 898 init_compute_resources(ctx, (int []) { 0, -1 }); 899 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL); 900 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL); 901 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL); 902 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL); 903 check_tex(ctx, 0, expect, NULL); 904 destroy_compute_resources(ctx); 905 destroy_tex(ctx); 906 destroy_prog(ctx); 907} 908 909static void test_constant(struct context *ctx) 910{ 911 const char *src = "COMP\n" 912 "DCL RES[0], BUFFER, RAW\n" 913 "DCL RES[1], BUFFER, RAW, WR\n" 914 "DCL SV[0], BLOCK_ID[0]\n" 915 "DCL TEMP[0], LOCAL\n" 916 "DCL TEMP[1], LOCAL\n" 917 "IMM UINT32 { 4, 0, 0, 0 }\n" 918 "\n" 919 " BGNSUB\n" 920 " UMUL TEMP[0].x, SV[0], IMM[0]\n" 921 " LOAD TEMP[1].x, RES[0], TEMP[0]\n" 922 " STORE RES[1].x, TEMP[0], TEMP[1]\n" 923 " RET\n" 924 " ENDSUB\n"; 925 void init(void *p, int s, int x, int y) { 926 *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x; 927 } 928 void expect(void *p, int s, int x, int y) { 929 *(float *)p = 8.0 - (float)x; 930 } 931 932 printf("- %s\n", __func__); 933 934 init_prog(ctx, 0, 0, 0, src, NULL); 935 init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 936 256, 0, init); 937 init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 938 256, 0, init); 939 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 940 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); 941 check_tex(ctx, 1, expect, NULL); 942 destroy_compute_resources(ctx); 943 destroy_tex(ctx); 944 destroy_prog(ctx); 945} 946 947static void test_resource_indirect(struct context *ctx) 948{ 949 const char *src = "COMP\n" 950 "DCL RES[0], BUFFER, RAW, WR\n" 951 "DCL RES[1..3], BUFFER, RAW\n" 952 "DCL SV[0], BLOCK_ID[0]\n" 953 "DCL TEMP[0], LOCAL\n" 954 "DCL TEMP[1], LOCAL\n" 955 "IMM UINT32 { 4, 0, 0, 0 }\n" 956 "\n" 957 " BGNSUB\n" 958 " UMUL TEMP[0].x, SV[0], IMM[0]\n" 959 " LOAD TEMP[1].x, RES[1], TEMP[0]\n" 960 " LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n" 961 " STORE RES[0].x, TEMP[0], TEMP[1]\n" 962 " RET\n" 963 " ENDSUB\n"; 964 void init(void *p, int s, int x, int y) { 965 *(uint32_t *)p = s == 0 ? 0xdeadbeef : 966 s == 1 ? x % 2 : 967 s == 2 ? 2 * x : 968 2 * x + 1; 969 } 970 void expect(void *p, int s, int x, int y) { 971 *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0); 972 } 973 974 printf("- %s\n", __func__); 975 976 init_prog(ctx, 0, 0, 0, src, NULL); 977 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 978 256, 0, init); 979 init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 980 256, 0, init); 981 init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 982 256, 0, init); 983 init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 984 256, 0, init); 985 init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 }); 986 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); 987 check_tex(ctx, 0, expect, NULL); 988 destroy_compute_resources(ctx); 989 destroy_tex(ctx); 990 destroy_prog(ctx); 991} 992 993enum pipe_format surface_fmts[] = { 994 PIPE_FORMAT_B8G8R8A8_UNORM, 995 PIPE_FORMAT_B8G8R8X8_UNORM, 996 PIPE_FORMAT_A8R8G8B8_UNORM, 997 PIPE_FORMAT_X8R8G8B8_UNORM, 998 PIPE_FORMAT_X8R8G8B8_UNORM, 999 PIPE_FORMAT_L8_UNORM, 1000 PIPE_FORMAT_A8_UNORM, 1001 PIPE_FORMAT_I8_UNORM, 1002 PIPE_FORMAT_L8A8_UNORM, 1003 PIPE_FORMAT_R32_FLOAT, 1004 PIPE_FORMAT_R32G32_FLOAT, 1005 PIPE_FORMAT_R32G32B32A32_FLOAT, 1006 PIPE_FORMAT_R32_UNORM, 1007 PIPE_FORMAT_R32G32_UNORM, 1008 PIPE_FORMAT_R32G32B32A32_UNORM, 1009 PIPE_FORMAT_R32_SNORM, 1010 PIPE_FORMAT_R32G32_SNORM, 1011 PIPE_FORMAT_R32G32B32A32_SNORM, 1012 PIPE_FORMAT_R8_UINT, 1013 PIPE_FORMAT_R8G8_UINT, 1014 PIPE_FORMAT_R8G8B8A8_UINT, 1015 PIPE_FORMAT_R8_SINT, 1016 PIPE_FORMAT_R8G8_SINT, 1017 PIPE_FORMAT_R8G8B8A8_SINT, 1018 PIPE_FORMAT_R32_UINT, 1019 PIPE_FORMAT_R32G32_UINT, 1020 PIPE_FORMAT_R32G32B32A32_UINT, 1021 PIPE_FORMAT_R32_SINT, 1022 PIPE_FORMAT_R32G32_SINT, 1023 PIPE_FORMAT_R32G32B32A32_SINT 1024}; 1025 1026static void test_surface_ld(struct context *ctx) 1027{ 1028 const char *src = "COMP\n" 1029 "DCL RES[0], 2D\n" 1030 "DCL RES[1], 2D, RAW, WR\n" 1031 "DCL SV[0], BLOCK_ID[0]\n" 1032 "DCL TEMP[0], LOCAL\n" 1033 "DCL TEMP[1], LOCAL\n" 1034 "IMM UINT32 { 16, 1, 0, 0 }\n" 1035 "\n" 1036 " BGNSUB\n" 1037 " LOAD TEMP[1], RES[0], SV[0]\n" 1038 " UMUL TEMP[0], SV[0], IMM[0]\n" 1039 " STORE RES[1].xyzw, TEMP[0], TEMP[1]\n" 1040 " RET\n" 1041 " ENDSUB\n"; 1042 int i = 0; 1043 void init0f(void *p, int s, int x, int y) { 1044 float v[] = { 1.0, -.75, .50, -.25 }; 1045 util_format_write_4f(surface_fmts[i], v, 0, 1046 p, 0, 0, 0, 1, 1); 1047 } 1048 void init0i(void *p, int s, int x, int y) { 1049 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; 1050 util_format_write_4i(surface_fmts[i], v, 0, 1051 p, 0, 0, 0, 1, 1); 1052 } 1053 void init1(void *p, int s, int x, int y) { 1054 *(uint32_t *)p = 0xdeadbeef; 1055 } 1056 void expectf(void *p, int s, int x, int y) { 1057 float v[4], w[4]; 1058 init0f(v, s, x / 4, y); 1059 util_format_read_4f(surface_fmts[i], w, 0, 1060 v, 0, 0, 0, 1, 1); 1061 *(float *)p = w[x % 4]; 1062 } 1063 void expecti(void *p, int s, int x, int y) { 1064 int32_t v[4], w[4]; 1065 init0i(v, s, x / 4, y); 1066 util_format_read_4i(surface_fmts[i], w, 0, 1067 v, 0, 0, 0, 1, 1); 1068 *(uint32_t *)p = w[x % 4]; 1069 } 1070 1071 printf("- %s\n", __func__); 1072 1073 init_prog(ctx, 0, 0, 0, src, NULL); 1074 1075 for (i = 0; i < Elements(surface_fmts); i++) { 1076 bool is_int = util_format_is_pure_integer(surface_fmts[i]); 1077 1078 printf(" - %s\n", util_format_name(surface_fmts[i])); 1079 1080 if (!ctx->screen->is_format_supported(ctx->screen, 1081 surface_fmts[i], PIPE_TEXTURE_2D, 1, 1082 PIPE_BIND_COMPUTE_RESOURCE)) { 1083 printf("(unsupported)\n"); 1084 continue; 1085 } 1086 1087 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i], 1088 128, 32, (is_int ? init0i : init0f)); 1089 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 1090 512, 32, init1); 1091 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 1092 init_sampler_states(ctx, 2); 1093 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, 1094 NULL); 1095 check_tex(ctx, 1, (is_int ? expecti : expectf), NULL); 1096 destroy_sampler_states(ctx); 1097 destroy_compute_resources(ctx); 1098 destroy_tex(ctx); 1099 } 1100 1101 destroy_prog(ctx); 1102} 1103 1104static void test_surface_st(struct context *ctx) 1105{ 1106 const char *src = "COMP\n" 1107 "DCL RES[0], 2D, RAW\n" 1108 "DCL RES[1], 2D, WR\n" 1109 "DCL SV[0], BLOCK_ID[0]\n" 1110 "DCL TEMP[0], LOCAL\n" 1111 "DCL TEMP[1], LOCAL\n" 1112 "IMM UINT32 { 16, 1, 0, 0 }\n" 1113 "\n" 1114 " BGNSUB\n" 1115 " UMUL TEMP[0], SV[0], IMM[0]\n" 1116 " LOAD TEMP[1], RES[0], TEMP[0]\n" 1117 " STORE RES[1], SV[0], TEMP[1]\n" 1118 " RET\n" 1119 " ENDSUB\n"; 1120 int i = 0; 1121 void init0f(void *p, int s, int x, int y) { 1122 float v[] = { 1.0, -.75, 0.5, -.25 }; 1123 *(float *)p = v[x % 4]; 1124 } 1125 void init0i(void *p, int s, int x, int y) { 1126 int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; 1127 *(int32_t *)p = v[x % 4]; 1128 } 1129 void init1(void *p, int s, int x, int y) { 1130 memset(p, 1, util_format_get_blocksize(surface_fmts[i])); 1131 } 1132 void expectf(void *p, int s, int x, int y) { 1133 float vf[4]; 1134 int j; 1135 1136 for (j = 0; j < 4; j++) 1137 init0f(&vf[j], s, 4 * x + j, y); 1138 util_format_write_4f(surface_fmts[i], vf, 0, 1139 p, 0, 0, 0, 1, 1); 1140 } 1141 void expects(void *p, int s, int x, int y) { 1142 int32_t v[4]; 1143 int j; 1144 1145 for (j = 0; j < 4; j++) 1146 init0i(&v[j], s, 4 * x + j, y); 1147 util_format_write_4i(surface_fmts[i], v, 0, 1148 p, 0, 0, 0, 1, 1); 1149 } 1150 void expectu(void *p, int s, int x, int y) { 1151 uint32_t v[4]; 1152 int j; 1153 1154 for (j = 0; j < 4; j++) 1155 init0i(&v[j], s, 4 * x + j, y); 1156 util_format_write_4ui(surface_fmts[i], v, 0, 1157 p, 0, 0, 0, 1, 1); 1158 } 1159 bool check(void *x, void *y, int sz) { 1160 int j; 1161 1162 if (util_format_is_float(surface_fmts[i])) { 1163 return fabs(*(float *)x - *(float *)y) < 3.92156863e-3; 1164 1165 } else if ((sz % 4) == 0) { 1166 for (j = 0; j < sz / 4; j++) 1167 if (abs(((uint32_t *)x)[j] - 1168 ((uint32_t *)y)[j]) > 1) 1169 return false; 1170 return true; 1171 } else { 1172 return !memcmp(x, y, sz); 1173 } 1174 } 1175 1176 printf("- %s\n", __func__); 1177 1178 init_prog(ctx, 0, 0, 0, src, NULL); 1179 1180 for (i = 0; i < Elements(surface_fmts); i++) { 1181 bool is_signed = (util_format_description(surface_fmts[i]) 1182 ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED); 1183 bool is_int = util_format_is_pure_integer(surface_fmts[i]); 1184 1185 printf(" - %s\n", util_format_name(surface_fmts[i])); 1186 1187 if (!ctx->screen->is_format_supported(ctx->screen, 1188 surface_fmts[i], PIPE_TEXTURE_2D, 1, 1189 PIPE_BIND_COMPUTE_RESOURCE)) { 1190 printf("(unsupported)\n"); 1191 continue; 1192 } 1193 1194 init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 1195 512, 32, (is_int ? init0i : init0f)); 1196 init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i], 1197 128, 32, init1); 1198 init_compute_resources(ctx, (int []) { 0, 1, -1 }); 1199 init_sampler_states(ctx, 2); 1200 launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, 1201 NULL); 1202 check_tex(ctx, 1, (is_int && is_signed ? expects : 1203 is_int && !is_signed ? expectu : 1204 expectf), check); 1205 destroy_sampler_states(ctx); 1206 destroy_compute_resources(ctx); 1207 destroy_tex(ctx); 1208 } 1209 1210 destroy_prog(ctx); 1211} 1212 1213static void test_barrier(struct context *ctx) 1214{ 1215 const char *src = "COMP\n" 1216 "DCL RES[0], BUFFER, RAW, WR\n" 1217 "DCL SV[0], BLOCK_ID[0]\n" 1218 "DCL SV[1], BLOCK_SIZE[0]\n" 1219 "DCL SV[2], THREAD_ID[0]\n" 1220 "DCL TEMP[0], LOCAL\n" 1221 "DCL TEMP[1], LOCAL\n" 1222 "DCL TEMP[2], LOCAL\n" 1223 "DCL TEMP[3], LOCAL\n" 1224 "IMM UINT32 { 1, 0, 0, 0 }\n" 1225 "IMM UINT32 { 4, 0, 0, 0 }\n" 1226 "IMM UINT32 { 32, 0, 0, 0 }\n" 1227 "\n" 1228 " BGNSUB\n" 1229 " UMUL TEMP[0].x, SV[2], IMM[1]\n" 1230 " MOV TEMP[1].x, IMM[0].wwww\n" 1231 " BGNLOOP\n" 1232 " BARRIER\n" 1233 " STORE RLOCAL.x, TEMP[0], TEMP[1]\n" 1234 " BARRIER\n" 1235 " MOV TEMP[2].x, IMM[0].wwww\n" 1236 " BGNLOOP\n" 1237 " UMUL TEMP[3].x, TEMP[2], IMM[1]\n" 1238 " LOAD TEMP[3].x, RLOCAL, TEMP[3]\n" 1239 " USNE TEMP[3].x, TEMP[3], TEMP[1]\n" 1240 " IF TEMP[3]\n" 1241 " END\n" 1242 " ENDIF\n" 1243 " UADD TEMP[2].x, TEMP[2], IMM[0]\n" 1244 " USEQ TEMP[3].x, TEMP[2], SV[1]\n" 1245 " IF TEMP[3]\n" 1246 " BRK\n" 1247 " ENDIF\n" 1248 " ENDLOOP\n" 1249 " UADD TEMP[1].x, TEMP[1], IMM[0]\n" 1250 " USEQ TEMP[2].x, TEMP[1], IMM[2]\n" 1251 " IF TEMP[2]\n" 1252 " BRK\n" 1253 " ENDIF\n" 1254 " ENDLOOP\n" 1255 " UMUL TEMP[1].x, SV[0], SV[1]\n" 1256 " UMUL TEMP[1].x, TEMP[1], IMM[1]\n" 1257 " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" 1258 " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" 1259 " STORE RES[0].x, TEMP[1], TEMP[0]\n" 1260 " RET\n" 1261 " ENDSUB\n"; 1262 void init(void *p, int s, int x, int y) { 1263 *(uint32_t *)p = 0xdeadbeef; 1264 } 1265 void expect(void *p, int s, int x, int y) { 1266 *(uint32_t *)p = 31; 1267 } 1268 1269 printf("- %s\n", __func__); 1270 1271 init_prog(ctx, 256, 0, 0, src, NULL); 1272 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 1273 4096, 0, init); 1274 init_compute_resources(ctx, (int []) { 0, -1 }); 1275 launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 1276 check_tex(ctx, 0, expect, NULL); 1277 destroy_compute_resources(ctx); 1278 destroy_tex(ctx); 1279 destroy_prog(ctx); 1280} 1281 1282static void test_atom_ops(struct context *ctx, bool global) 1283{ 1284 const char *src = "COMP\n" 1285 "#ifdef TARGET_GLOBAL\n" 1286 "#define target RES[0]\n" 1287 "#else\n" 1288 "#define target RLOCAL\n" 1289 "#endif\n" 1290 "" 1291 "DCL RES[0], BUFFER, RAW, WR\n" 1292 "#define threadid SV[0]\n" 1293 "DCL threadid, THREAD_ID[0]\n" 1294 "" 1295 "#define offset TEMP[0]\n" 1296 "DCL offset, LOCAL\n" 1297 "#define tmp TEMP[1]\n" 1298 "DCL tmp, LOCAL\n" 1299 "" 1300 "#define k0 IMM[0]\n" 1301 "IMM UINT32 { 0, 0, 0, 0 }\n" 1302 "#define k1 IMM[1]\n" 1303 "IMM UINT32 { 1, 0, 0, 0 }\n" 1304 "#define k2 IMM[2]\n" 1305 "IMM UINT32 { 2, 0, 0, 0 }\n" 1306 "#define k3 IMM[3]\n" 1307 "IMM UINT32 { 3, 0, 0, 0 }\n" 1308 "#define k4 IMM[4]\n" 1309 "IMM UINT32 { 4, 0, 0, 0 }\n" 1310 "#define k5 IMM[5]\n" 1311 "IMM UINT32 { 5, 0, 0, 0 }\n" 1312 "#define k6 IMM[6]\n" 1313 "IMM UINT32 { 6, 0, 0, 0 }\n" 1314 "#define k7 IMM[7]\n" 1315 "IMM UINT32 { 7, 0, 0, 0 }\n" 1316 "#define k8 IMM[8]\n" 1317 "IMM UINT32 { 8, 0, 0, 0 }\n" 1318 "#define k9 IMM[9]\n" 1319 "IMM UINT32 { 9, 0, 0, 0 }\n" 1320 "#define korig IMM[10].xxxx\n" 1321 "#define karg IMM[10].yyyy\n" 1322 "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n" 1323 "\n" 1324 " BGNSUB\n" 1325 " UMUL offset.x, threadid, k4\n" 1326 " STORE target.x, offset, korig\n" 1327 " USEQ tmp.x, threadid, k0\n" 1328 " IF tmp\n" 1329 " ATOMUADD tmp.x, target, offset, karg\n" 1330 " ATOMUADD tmp.x, target, offset, tmp\n" 1331 " ENDIF\n" 1332 " USEQ tmp.x, threadid, k1\n" 1333 " IF tmp\n" 1334 " ATOMXCHG tmp.x, target, offset, karg\n" 1335 " ATOMXCHG tmp.x, target, offset, tmp\n" 1336 " ENDIF\n" 1337 " USEQ tmp.x, threadid, k2\n" 1338 " IF tmp\n" 1339 " ATOMCAS tmp.x, target, offset, korig, karg\n" 1340 " ATOMCAS tmp.x, target, offset, tmp, k0\n" 1341 " ENDIF\n" 1342 " USEQ tmp.x, threadid, k3\n" 1343 " IF tmp\n" 1344 " ATOMAND tmp.x, target, offset, karg\n" 1345 " ATOMAND tmp.x, target, offset, tmp\n" 1346 " ENDIF\n" 1347 " USEQ tmp.x, threadid, k4\n" 1348 " IF tmp\n" 1349 " ATOMOR tmp.x, target, offset, karg\n" 1350 " ATOMOR tmp.x, target, offset, tmp\n" 1351 " ENDIF\n" 1352 " USEQ tmp.x, threadid, k5\n" 1353 " IF tmp\n" 1354 " ATOMXOR tmp.x, target, offset, karg\n" 1355 " ATOMXOR tmp.x, target, offset, tmp\n" 1356 " ENDIF\n" 1357 " USEQ tmp.x, threadid, k6\n" 1358 " IF tmp\n" 1359 " ATOMUMIN tmp.x, target, offset, karg\n" 1360 " ATOMUMIN tmp.x, target, offset, tmp\n" 1361 " ENDIF\n" 1362 " USEQ tmp.x, threadid, k7\n" 1363 " IF tmp\n" 1364 " ATOMUMAX tmp.x, target, offset, karg\n" 1365 " ATOMUMAX tmp.x, target, offset, tmp\n" 1366 " ENDIF\n" 1367 " USEQ tmp.x, threadid, k8\n" 1368 " IF tmp\n" 1369 " ATOMIMIN tmp.x, target, offset, karg\n" 1370 " ATOMIMIN tmp.x, target, offset, tmp\n" 1371 " ENDIF\n" 1372 " USEQ tmp.x, threadid, k9\n" 1373 " IF tmp\n" 1374 " ATOMIMAX tmp.x, target, offset, karg\n" 1375 " ATOMIMAX tmp.x, target, offset, tmp\n" 1376 " ENDIF\n" 1377 "#ifdef TARGET_LOCAL\n" 1378 " LOAD tmp.x, RLOCAL, offset\n" 1379 " STORE RES[0].x, offset, tmp\n" 1380 "#endif\n" 1381 " RET\n" 1382 " ENDSUB\n"; 1383 1384 void init(void *p, int s, int x, int y) { 1385 *(uint32_t *)p = 0xbad; 1386 } 1387 void expect(void *p, int s, int x, int y) { 1388 switch (x) { 1389 case 0: 1390 *(uint32_t *)p = 0xce6c8eef; 1391 break; 1392 case 1: 1393 *(uint32_t *)p = 0xdeadbeef; 1394 break; 1395 case 2: 1396 *(uint32_t *)p = 0x11111111; 1397 break; 1398 case 3: 1399 *(uint32_t *)p = 0x10011001; 1400 break; 1401 case 4: 1402 *(uint32_t *)p = 0xdfbdbfff; 1403 break; 1404 case 5: 1405 *(uint32_t *)p = 0x11111111; 1406 break; 1407 case 6: 1408 *(uint32_t *)p = 0x11111111; 1409 break; 1410 case 7: 1411 *(uint32_t *)p = 0xdeadbeef; 1412 break; 1413 case 8: 1414 *(uint32_t *)p = 0xdeadbeef; 1415 break; 1416 case 9: 1417 *(uint32_t *)p = 0x11111111; 1418 break; 1419 } 1420 } 1421 1422 printf("- %s (%s)\n", __func__, global ? "global" : "local"); 1423 1424 init_prog(ctx, 40, 0, 0, src, 1425 (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); 1426 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 1427 40, 0, init); 1428 init_compute_resources(ctx, (int []) { 0, -1 }); 1429 launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL); 1430 check_tex(ctx, 0, expect, NULL); 1431 destroy_compute_resources(ctx); 1432 destroy_tex(ctx); 1433 destroy_prog(ctx); 1434} 1435 1436static void test_atom_race(struct context *ctx, bool global) 1437{ 1438 const char *src = "COMP\n" 1439 "#ifdef TARGET_GLOBAL\n" 1440 "#define target RES[0]\n" 1441 "#else\n" 1442 "#define target RLOCAL\n" 1443 "#endif\n" 1444 "" 1445 "DCL RES[0], BUFFER, RAW, WR\n" 1446 "" 1447 "#define blockid SV[0]\n" 1448 "DCL blockid, BLOCK_ID[0]\n" 1449 "#define blocksz SV[1]\n" 1450 "DCL blocksz, BLOCK_SIZE[0]\n" 1451 "#define threadid SV[2]\n" 1452 "DCL threadid, THREAD_ID[0]\n" 1453 "" 1454 "#define offset TEMP[0]\n" 1455 "DCL offset, LOCAL\n" 1456 "#define arg TEMP[1]\n" 1457 "DCL arg, LOCAL\n" 1458 "#define count TEMP[2]\n" 1459 "DCL count, LOCAL\n" 1460 "#define vlocal TEMP[3]\n" 1461 "DCL vlocal, LOCAL\n" 1462 "#define vshared TEMP[4]\n" 1463 "DCL vshared, LOCAL\n" 1464 "#define last TEMP[5]\n" 1465 "DCL last, LOCAL\n" 1466 "#define tmp0 TEMP[6]\n" 1467 "DCL tmp0, LOCAL\n" 1468 "#define tmp1 TEMP[7]\n" 1469 "DCL tmp1, LOCAL\n" 1470 "" 1471 "#define k0 IMM[0]\n" 1472 "IMM UINT32 { 0, 0, 0, 0 }\n" 1473 "#define k1 IMM[1]\n" 1474 "IMM UINT32 { 1, 0, 0, 0 }\n" 1475 "#define k4 IMM[2]\n" 1476 "IMM UINT32 { 4, 0, 0, 0 }\n" 1477 "#define k32 IMM[3]\n" 1478 "IMM UINT32 { 32, 0, 0, 0 }\n" 1479 "#define k128 IMM[4]\n" 1480 "IMM UINT32 { 128, 0, 0, 0 }\n" 1481 "#define kdeadcafe IMM[5]\n" 1482 "IMM UINT32 { 3735931646, 0, 0, 0 }\n" 1483 "#define kallowed_set IMM[6]\n" 1484 "IMM UINT32 { 559035650, 0, 0, 0 }\n" 1485 "#define k11111111 IMM[7]\n" 1486 "IMM UINT32 { 286331153, 0, 0, 0 }\n" 1487 "\n" 1488 " BGNSUB\n" 1489 " MOV offset.x, threadid\n" 1490 "#ifdef TARGET_GLOBAL\n" 1491 " UMUL tmp0.x, blockid, blocksz\n" 1492 " UADD offset.x, offset, tmp0\n" 1493 "#endif\n" 1494 " UMUL offset.x, offset, k4\n" 1495 " USLT tmp0.x, threadid, k32\n" 1496 " STORE target.x, offset, k0\n" 1497 " BARRIER\n" 1498 " IF tmp0\n" 1499 " MOV vlocal.x, k0\n" 1500 " MOV arg.x, kdeadcafe\n" 1501 " BGNLOOP\n" 1502 " INEG arg.x, arg\n" 1503 " ATOMUADD vshared.x, target, offset, arg\n" 1504 " SFENCE target\n" 1505 " USNE tmp0.x, vshared, vlocal\n" 1506 " IF tmp0\n" 1507 " BRK\n" 1508 " ENDIF\n" 1509 " UADD vlocal.x, vlocal, arg\n" 1510 " ENDLOOP\n" 1511 " UADD vlocal.x, vshared, arg\n" 1512 " LOAD vshared.x, target, offset\n" 1513 " USEQ tmp0.x, vshared, vlocal\n" 1514 " STORE target.x, offset, tmp0\n" 1515 " ELSE\n" 1516 " UADD offset.x, offset, -k128\n" 1517 " MOV count.x, k0\n" 1518 " MOV last.x, k0\n" 1519 " BGNLOOP\n" 1520 " LOAD vshared.x, target, offset\n" 1521 " USEQ tmp0.x, vshared, kallowed_set.xxxx\n" 1522 " USEQ tmp1.x, vshared, kallowed_set.yyyy\n" 1523 " OR tmp0.x, tmp0, tmp1\n" 1524 " IF tmp0\n" 1525 " USEQ tmp0.x, vshared, last\n" 1526 " IF tmp0\n" 1527 " CONT\n" 1528 " ENDIF\n" 1529 " MOV last.x, vshared\n" 1530 " ELSE\n" 1531 " END\n" 1532 " ENDIF\n" 1533 " UADD count.x, count, k1\n" 1534 " USEQ tmp0.x, count, k128\n" 1535 " IF tmp0\n" 1536 " BRK\n" 1537 " ENDIF\n" 1538 " ENDLOOP\n" 1539 " ATOMXCHG tmp0.x, target, offset, k11111111\n" 1540 " UADD offset.x, offset, k128\n" 1541 " ATOMXCHG tmp0.x, target, offset, k11111111\n" 1542 " SFENCE target\n" 1543 " ENDIF\n" 1544 "#ifdef TARGET_LOCAL\n" 1545 " LOAD tmp0.x, RLOCAL, offset\n" 1546 " UMUL tmp1.x, blockid, blocksz\n" 1547 " UMUL tmp1.x, tmp1, k4\n" 1548 " UADD offset.x, offset, tmp1\n" 1549 " STORE RES[0].x, offset, tmp0\n" 1550 "#endif\n" 1551 " RET\n" 1552 " ENDSUB\n"; 1553 1554 void init(void *p, int s, int x, int y) { 1555 *(uint32_t *)p = 0xdeadbeef; 1556 } 1557 void expect(void *p, int s, int x, int y) { 1558 *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff; 1559 } 1560 1561 printf("- %s (%s)\n", __func__, global ? "global" : "local"); 1562 1563 init_prog(ctx, 256, 0, 0, src, 1564 (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); 1565 init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 1566 4096, 0, init); 1567 init_compute_resources(ctx, (int []) { 0, -1 }); 1568 launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 1569 check_tex(ctx, 0, expect, NULL); 1570 destroy_compute_resources(ctx); 1571 destroy_tex(ctx); 1572 destroy_prog(ctx); 1573} 1574 1575int main(int argc, char *argv[]) 1576{ 1577 struct context *ctx = CALLOC_STRUCT(context); 1578 1579 unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0; 1580 1581 init_ctx(ctx); 1582 1583 if (tests & (1 << 0)) 1584 test_system_values(ctx); 1585 if (tests & (1 << 1)) 1586 test_resource_access(ctx); 1587 if (tests & (1 << 2)) 1588 test_function_calls(ctx); 1589 if (tests & (1 << 3)) 1590 test_input_global(ctx); 1591 if (tests & (1 << 4)) 1592 test_private(ctx); 1593 if (tests & (1 << 5)) 1594 test_local(ctx); 1595 if (tests & (1 << 6)) 1596 test_sample(ctx); 1597 if (tests & (1 << 7)) 1598 test_many_kern(ctx); 1599 if (tests & (1 << 8)) 1600 test_constant(ctx); 1601 if (tests & (1 << 9)) 1602 test_resource_indirect(ctx); 1603 if (tests & (1 << 10)) 1604 test_surface_ld(ctx); 1605 if (tests & (1 << 11)) 1606 test_surface_st(ctx); 1607 if (tests & (1 << 12)) 1608 test_barrier(ctx); 1609 if (tests & (1 << 13)) 1610 test_atom_ops(ctx, true); 1611 if (tests & (1 << 14)) 1612 test_atom_race(ctx, true); 1613 if (tests & (1 << 15)) 1614 test_atom_ops(ctx, false); 1615 if (tests & (1 << 16)) 1616 test_atom_race(ctx, false); 1617 1618 destroy_ctx(ctx); 1619 1620 return 0; 1621} 1622