1af69d88dSmrg/* 2af69d88dSmrg * Copyright (C) 2011 Francisco Jerez. 3af69d88dSmrg * All Rights Reserved. 4af69d88dSmrg * 5af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining 6af69d88dSmrg * a copy of this software and associated documentation files (the 7af69d88dSmrg * "Software"), to deal in the Software without restriction, including 8af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish, 9af69d88dSmrg * distribute, sublicense, and/or sell copies of the Software, and to 10af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to 11af69d88dSmrg * the following conditions: 12af69d88dSmrg * 13af69d88dSmrg * The above copyright notice and this permission notice (including the 14af69d88dSmrg * next paragraph) shall be included in all copies or substantial 15af69d88dSmrg * portions of the Software. 16af69d88dSmrg * 17af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18af69d88dSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20af69d88dSmrg * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21af69d88dSmrg * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22af69d88dSmrg * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23af69d88dSmrg * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24af69d88dSmrg * 25af69d88dSmrg */ 26af69d88dSmrg 27af69d88dSmrg#include <fcntl.h> 28af69d88dSmrg#include <stdio.h> 29af69d88dSmrg#include <sys/stat.h> 30af69d88dSmrg#include <inttypes.h> 31af69d88dSmrg#include "pipe/p_state.h" 32af69d88dSmrg#include "pipe/p_context.h" 33af69d88dSmrg#include "pipe/p_screen.h" 34af69d88dSmrg#include "pipe/p_defines.h" 35af69d88dSmrg#include "pipe/p_shader_tokens.h" 36af69d88dSmrg#include "util/u_memory.h" 37af69d88dSmrg#include "util/u_inlines.h" 38af69d88dSmrg#include "util/u_sampler.h" 397ec681f3Smrg#include "util/format/u_format.h" 40af69d88dSmrg#include "tgsi/tgsi_text.h" 41af69d88dSmrg#include "pipe-loader/pipe_loader.h" 42af69d88dSmrg 43af69d88dSmrg#define MAX_RESOURCES 4 44af69d88dSmrg 45af69d88dSmrgstruct context { 46af69d88dSmrg struct pipe_loader_device *dev; 47af69d88dSmrg struct pipe_screen *screen; 48af69d88dSmrg struct pipe_context *pipe; 49af69d88dSmrg void *hwcs; 50af69d88dSmrg void *hwsmp[MAX_RESOURCES]; 51af69d88dSmrg struct pipe_resource *tex[MAX_RESOURCES]; 52af69d88dSmrg bool tex_rw[MAX_RESOURCES]; 53af69d88dSmrg struct pipe_sampler_view *view[MAX_RESOURCES]; 54af69d88dSmrg struct pipe_surface *surf[MAX_RESOURCES]; 55af69d88dSmrg}; 56af69d88dSmrg 57af69d88dSmrg#define DUMP_COMPUTE_PARAM(p, c) do { \ 58af69d88dSmrg uint64_t __v[4]; \ 59af69d88dSmrg int __i, __n; \ 60af69d88dSmrg \ 6101e04c3fSmrg __n = ctx->screen->get_compute_param(ctx->screen, \ 6201e04c3fSmrg PIPE_SHADER_IR_TGSI, \ 6301e04c3fSmrg c, __v); \ 64af69d88dSmrg printf("%s: {", #c); \ 65af69d88dSmrg \ 66af69d88dSmrg for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ 67af69d88dSmrg printf(" %"PRIu64, __v[__i]); \ 68af69d88dSmrg \ 69af69d88dSmrg printf(" }\n"); \ 70af69d88dSmrg } while (0) 71af69d88dSmrg 72af69d88dSmrgstatic void init_ctx(struct context *ctx) 73af69d88dSmrg{ 747ec681f3Smrg ASSERTED int ret; 75af69d88dSmrg 76af69d88dSmrg ret = pipe_loader_probe(&ctx->dev, 1); 77af69d88dSmrg assert(ret); 78af69d88dSmrg 7901e04c3fSmrg ctx->screen = pipe_loader_create_screen(ctx->dev); 80af69d88dSmrg assert(ctx->screen); 81af69d88dSmrg 8201e04c3fSmrg ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0); 83af69d88dSmrg assert(ctx->pipe); 84af69d88dSmrg 85af69d88dSmrg DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION); 86af69d88dSmrg DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE); 87af69d88dSmrg DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); 88af69d88dSmrg} 89af69d88dSmrg 90af69d88dSmrgstatic void destroy_ctx(struct context *ctx) 91af69d88dSmrg{ 92af69d88dSmrg ctx->pipe->destroy(ctx->pipe); 93af69d88dSmrg ctx->screen->destroy(ctx->screen); 94af69d88dSmrg pipe_loader_release(&ctx->dev, 1); 95af69d88dSmrg FREE(ctx); 96af69d88dSmrg} 97af69d88dSmrg 98af69d88dSmrgstatic char * 99af69d88dSmrgpreprocess_prog(struct context *ctx, const char *src, const char *defs) 100af69d88dSmrg{ 101af69d88dSmrg const char header[] = 102af69d88dSmrg "#define RGLOBAL RES[32767]\n" 103af69d88dSmrg "#define RLOCAL RES[32766]\n" 104af69d88dSmrg "#define RPRIVATE RES[32765]\n" 105af69d88dSmrg "#define RINPUT RES[32764]\n"; 106af69d88dSmrg char cmd[512]; 107af69d88dSmrg char tmp[] = "/tmp/test-compute.tgsi-XXXXXX"; 108af69d88dSmrg char *buf; 109af69d88dSmrg int fd, ret; 110af69d88dSmrg struct stat st; 111af69d88dSmrg FILE *p; 112af69d88dSmrg 113af69d88dSmrg /* Open a temporary file */ 114af69d88dSmrg fd = mkstemp(tmp); 115af69d88dSmrg assert(fd >= 0); 116af69d88dSmrg snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s", 117af69d88dSmrg defs ? defs : "", tmp); 118af69d88dSmrg 119af69d88dSmrg /* Preprocess */ 120af69d88dSmrg p = popen(cmd, "w"); 121af69d88dSmrg fwrite(header, strlen(header), 1, p); 122af69d88dSmrg fwrite(src, strlen(src), 1, p); 123af69d88dSmrg ret = pclose(p); 124af69d88dSmrg assert(!ret); 125af69d88dSmrg 126af69d88dSmrg /* Read back */ 127af69d88dSmrg ret = fstat(fd, &st); 128af69d88dSmrg assert(!ret); 129af69d88dSmrg 130af69d88dSmrg buf = malloc(st.st_size + 1); 131af69d88dSmrg ret = read(fd, buf, st.st_size); 132af69d88dSmrg assert(ret == st.st_size); 133af69d88dSmrg buf[ret] = 0; 134af69d88dSmrg 135af69d88dSmrg /* Clean up */ 136af69d88dSmrg close(fd); 137af69d88dSmrg unlink(tmp); 138af69d88dSmrg 139af69d88dSmrg return buf; 140af69d88dSmrg} 141af69d88dSmrg 142af69d88dSmrgstatic void init_prog(struct context *ctx, unsigned local_sz, 143af69d88dSmrg unsigned private_sz, unsigned input_sz, 144af69d88dSmrg const char *src, const char *defs) 145af69d88dSmrg{ 146af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 147af69d88dSmrg struct tgsi_token prog[1024]; 148af69d88dSmrg struct pipe_compute_state cs = { 14901e04c3fSmrg .ir_type = PIPE_SHADER_IR_TGSI, 150af69d88dSmrg .prog = prog, 151af69d88dSmrg .req_local_mem = local_sz, 152af69d88dSmrg .req_private_mem = private_sz, 153af69d88dSmrg .req_input_mem = input_sz 154af69d88dSmrg }; 155af69d88dSmrg char *psrc = preprocess_prog(ctx, src, defs); 1567ec681f3Smrg ASSERTED int ret; 157af69d88dSmrg 15801e04c3fSmrg ret = tgsi_text_translate(psrc, prog, ARRAY_SIZE(prog)); 159af69d88dSmrg assert(ret); 160af69d88dSmrg free(psrc); 161af69d88dSmrg 162af69d88dSmrg ctx->hwcs = pipe->create_compute_state(pipe, &cs); 163af69d88dSmrg assert(ctx->hwcs); 164af69d88dSmrg 165af69d88dSmrg pipe->bind_compute_state(pipe, ctx->hwcs); 166af69d88dSmrg} 167af69d88dSmrg 168af69d88dSmrgstatic void destroy_prog(struct context *ctx) 169af69d88dSmrg{ 170af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 171af69d88dSmrg 172af69d88dSmrg pipe->delete_compute_state(pipe, ctx->hwcs); 173af69d88dSmrg ctx->hwcs = NULL; 174af69d88dSmrg} 175af69d88dSmrg 176af69d88dSmrgstatic void init_tex(struct context *ctx, int slot, 177af69d88dSmrg enum pipe_texture_target target, bool rw, 178af69d88dSmrg enum pipe_format format, int w, int h, 179af69d88dSmrg void (*init)(void *, int, int, int)) 180af69d88dSmrg{ 181af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 182af69d88dSmrg struct pipe_resource **tex = &ctx->tex[slot]; 183af69d88dSmrg struct pipe_resource ttex = { 184af69d88dSmrg .target = target, 185af69d88dSmrg .format = format, 186af69d88dSmrg .width0 = w, 187af69d88dSmrg .height0 = h, 188af69d88dSmrg .depth0 = 1, 189af69d88dSmrg .array_size = 1, 190af69d88dSmrg .bind = (PIPE_BIND_SAMPLER_VIEW | 191af69d88dSmrg PIPE_BIND_COMPUTE_RESOURCE | 192af69d88dSmrg PIPE_BIND_GLOBAL) 193af69d88dSmrg }; 194af69d88dSmrg int dx = util_format_get_blocksize(format); 195af69d88dSmrg int dy = util_format_get_stride(format, w); 196af69d88dSmrg int nx = (target == PIPE_BUFFER ? (w / dx) : 197af69d88dSmrg util_format_get_nblocksx(format, w)); 198af69d88dSmrg int ny = (target == PIPE_BUFFER ? 1 : 199af69d88dSmrg util_format_get_nblocksy(format, h)); 200af69d88dSmrg struct pipe_transfer *xfer; 201af69d88dSmrg char *map; 202af69d88dSmrg int x, y; 203af69d88dSmrg 204af69d88dSmrg *tex = ctx->screen->resource_create(ctx->screen, &ttex); 205af69d88dSmrg assert(*tex); 206af69d88dSmrg 2077ec681f3Smrg map = pipe->texture_map(pipe, *tex, 0, PIPE_MAP_WRITE, 208af69d88dSmrg &(struct pipe_box) { .width = w, 209af69d88dSmrg .height = h, 210af69d88dSmrg .depth = 1 }, &xfer); 211af69d88dSmrg assert(xfer); 212af69d88dSmrg assert(map); 213af69d88dSmrg 214af69d88dSmrg for (y = 0; y < ny; ++y) { 215af69d88dSmrg for (x = 0; x < nx; ++x) { 216af69d88dSmrg init(map + y * dy + x * dx, slot, x, y); 217af69d88dSmrg } 218af69d88dSmrg } 219af69d88dSmrg 2207ec681f3Smrg pipe->texture_unmap(pipe, xfer); 221af69d88dSmrg 222af69d88dSmrg ctx->tex_rw[slot] = rw; 223af69d88dSmrg} 224af69d88dSmrg 225af69d88dSmrgstatic bool default_check(void *x, void *y, int sz) { 226af69d88dSmrg return !memcmp(x, y, sz); 227af69d88dSmrg} 228af69d88dSmrg 229af69d88dSmrgstatic void check_tex(struct context *ctx, int slot, 230af69d88dSmrg void (*expect)(void *, int, int, int), 231af69d88dSmrg bool (*check)(void *, void *, int)) 232af69d88dSmrg{ 233af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 234af69d88dSmrg struct pipe_resource *tex = ctx->tex[slot]; 235af69d88dSmrg int dx = util_format_get_blocksize(tex->format); 236af69d88dSmrg int dy = util_format_get_stride(tex->format, tex->width0); 237af69d88dSmrg int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) : 238af69d88dSmrg util_format_get_nblocksx(tex->format, tex->width0)); 239af69d88dSmrg int ny = (tex->target == PIPE_BUFFER ? 1 : 240af69d88dSmrg util_format_get_nblocksy(tex->format, tex->height0)); 241af69d88dSmrg struct pipe_transfer *xfer; 242af69d88dSmrg char *map; 2439f464c52Smaya int x = 0, y, i; 244af69d88dSmrg int err = 0; 245af69d88dSmrg 246af69d88dSmrg if (!check) 247af69d88dSmrg check = default_check; 248af69d88dSmrg 2497ec681f3Smrg map = pipe->texture_map(pipe, tex, 0, PIPE_MAP_READ, 250af69d88dSmrg &(struct pipe_box) { .width = tex->width0, 251af69d88dSmrg .height = tex->height0, 252af69d88dSmrg .depth = 1 }, &xfer); 253af69d88dSmrg assert(xfer); 254af69d88dSmrg assert(map); 255af69d88dSmrg 256af69d88dSmrg for (y = 0; y < ny; ++y) { 257af69d88dSmrg for (x = 0; x < nx; ++x) { 258af69d88dSmrg uint32_t exp[4]; 259af69d88dSmrg uint32_t *res = (uint32_t *)(map + y * dy + x * dx); 260af69d88dSmrg 261af69d88dSmrg expect(exp, slot, x, y); 262af69d88dSmrg if (check(res, exp, dx) || (++err) > 20) 263af69d88dSmrg continue; 264af69d88dSmrg 265af69d88dSmrg if (dx < 4) { 266af69d88dSmrg uint32_t u = 0, v = 0; 267af69d88dSmrg 268af69d88dSmrg for (i = 0; i < dx; i++) { 269af69d88dSmrg u |= ((uint8_t *)exp)[i] << (8 * i); 270af69d88dSmrg v |= ((uint8_t *)res)[i] << (8 * i); 271af69d88dSmrg } 272af69d88dSmrg printf("(%d, %d): got 0x%x, expected 0x%x\n", 273af69d88dSmrg x, y, v, u); 274af69d88dSmrg } else { 275af69d88dSmrg for (i = 0; i < dx / 4; i++) { 276af69d88dSmrg printf("(%d, %d)[%d]: got 0x%x/%f," 277af69d88dSmrg " expected 0x%x/%f\n", x, y, i, 278af69d88dSmrg res[i], ((float *)res)[i], 279af69d88dSmrg exp[i], ((float *)exp)[i]); 280af69d88dSmrg } 281af69d88dSmrg } 282af69d88dSmrg } 283af69d88dSmrg } 284af69d88dSmrg 2857ec681f3Smrg pipe->texture_unmap(pipe, xfer); 286af69d88dSmrg 287af69d88dSmrg if (err) 288af69d88dSmrg printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err); 289af69d88dSmrg else 290af69d88dSmrg printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y); 291af69d88dSmrg} 292af69d88dSmrg 293af69d88dSmrgstatic void destroy_tex(struct context *ctx) 294af69d88dSmrg{ 295af69d88dSmrg int i; 296af69d88dSmrg 297af69d88dSmrg for (i = 0; i < MAX_RESOURCES; ++i) { 298af69d88dSmrg if (ctx->tex[i]) 299af69d88dSmrg pipe_resource_reference(&ctx->tex[i], NULL); 300af69d88dSmrg } 301af69d88dSmrg} 302af69d88dSmrg 303af69d88dSmrgstatic void init_sampler_views(struct context *ctx, const int *slots) 304af69d88dSmrg{ 305af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 306af69d88dSmrg struct pipe_sampler_view tview; 307af69d88dSmrg int i; 308af69d88dSmrg 309af69d88dSmrg for (i = 0; *slots >= 0; ++i, ++slots) { 310af69d88dSmrg u_sampler_view_default_template(&tview, ctx->tex[*slots], 311af69d88dSmrg ctx->tex[*slots]->format); 312af69d88dSmrg 313af69d88dSmrg ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots], 314af69d88dSmrg &tview); 315af69d88dSmrg assert(ctx->view[i]); 316af69d88dSmrg } 317af69d88dSmrg 3187ec681f3Smrg pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, 0, false, ctx->view); 319af69d88dSmrg} 320af69d88dSmrg 321af69d88dSmrgstatic void destroy_sampler_views(struct context *ctx) 322af69d88dSmrg{ 323af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 324af69d88dSmrg int i; 325af69d88dSmrg 3267ec681f3Smrg pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, MAX_RESOURCES, false, NULL); 327af69d88dSmrg 328af69d88dSmrg for (i = 0; i < MAX_RESOURCES; ++i) { 329af69d88dSmrg if (ctx->view[i]) { 330af69d88dSmrg pipe->sampler_view_destroy(pipe, ctx->view[i]); 331af69d88dSmrg ctx->view[i] = NULL; 332af69d88dSmrg } 333af69d88dSmrg } 334af69d88dSmrg} 335af69d88dSmrg 336af69d88dSmrgstatic void init_compute_resources(struct context *ctx, const int *slots) 337af69d88dSmrg{ 338af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 339af69d88dSmrg int i; 340af69d88dSmrg 341af69d88dSmrg for (i = 0; *slots >= 0; ++i, ++slots) { 342af69d88dSmrg struct pipe_surface tsurf = { 343af69d88dSmrg .format = ctx->tex[*slots]->format, 344af69d88dSmrg .writable = ctx->tex_rw[*slots] 345af69d88dSmrg }; 346af69d88dSmrg 347af69d88dSmrg if (ctx->tex[*slots]->target == PIPE_BUFFER) 348af69d88dSmrg tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1; 349af69d88dSmrg 350af69d88dSmrg ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots], 351af69d88dSmrg &tsurf); 352af69d88dSmrg assert(ctx->surf[i]); 353af69d88dSmrg } 354af69d88dSmrg 355af69d88dSmrg pipe->set_compute_resources(pipe, 0, i, ctx->surf); 356af69d88dSmrg} 357af69d88dSmrg 358af69d88dSmrgstatic void destroy_compute_resources(struct context *ctx) 359af69d88dSmrg{ 360af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 361af69d88dSmrg int i; 362af69d88dSmrg 363af69d88dSmrg pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL); 364af69d88dSmrg 365af69d88dSmrg for (i = 0; i < MAX_RESOURCES; ++i) { 366af69d88dSmrg if (ctx->surf[i]) { 367af69d88dSmrg pipe->surface_destroy(pipe, ctx->surf[i]); 368af69d88dSmrg ctx->surf[i] = NULL; 369af69d88dSmrg } 370af69d88dSmrg } 371af69d88dSmrg} 372af69d88dSmrg 373af69d88dSmrgstatic void init_sampler_states(struct context *ctx, int n) 374af69d88dSmrg{ 375af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 376af69d88dSmrg struct pipe_sampler_state smp = { 377af69d88dSmrg .normalized_coords = 1, 378af69d88dSmrg }; 379af69d88dSmrg int i; 380af69d88dSmrg 381af69d88dSmrg for (i = 0; i < n; ++i) { 382af69d88dSmrg ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp); 383af69d88dSmrg assert(ctx->hwsmp[i]); 384af69d88dSmrg } 385af69d88dSmrg 386af69d88dSmrg pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp); 387af69d88dSmrg} 388af69d88dSmrg 389af69d88dSmrgstatic void destroy_sampler_states(struct context *ctx) 390af69d88dSmrg{ 391af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 392af69d88dSmrg int i; 393af69d88dSmrg 394af69d88dSmrg pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 395af69d88dSmrg 0, MAX_RESOURCES, NULL); 396af69d88dSmrg 397af69d88dSmrg for (i = 0; i < MAX_RESOURCES; ++i) { 398af69d88dSmrg if (ctx->hwsmp[i]) { 399af69d88dSmrg pipe->delete_sampler_state(pipe, ctx->hwsmp[i]); 400af69d88dSmrg ctx->hwsmp[i] = NULL; 401af69d88dSmrg } 402af69d88dSmrg } 403af69d88dSmrg} 404af69d88dSmrg 405af69d88dSmrgstatic void init_globals(struct context *ctx, const int *slots, 406af69d88dSmrg uint32_t **handles) 407af69d88dSmrg{ 408af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 409af69d88dSmrg struct pipe_resource *res[MAX_RESOURCES]; 410af69d88dSmrg int i; 411af69d88dSmrg 412af69d88dSmrg for (i = 0; *slots >= 0; ++i, ++slots) 413af69d88dSmrg res[i] = ctx->tex[*slots]; 414af69d88dSmrg 415af69d88dSmrg pipe->set_global_binding(pipe, 0, i, res, handles); 416af69d88dSmrg} 417af69d88dSmrg 418af69d88dSmrgstatic void destroy_globals(struct context *ctx) 419af69d88dSmrg{ 420af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 421af69d88dSmrg 422af69d88dSmrg pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL); 423af69d88dSmrg} 424af69d88dSmrg 425af69d88dSmrgstatic void launch_grid(struct context *ctx, const uint *block_layout, 426af69d88dSmrg const uint *grid_layout, uint32_t pc, 42701e04c3fSmrg void *input) 428af69d88dSmrg{ 429af69d88dSmrg struct pipe_context *pipe = ctx->pipe; 43001e04c3fSmrg struct pipe_grid_info info; 43101e04c3fSmrg int i; 43201e04c3fSmrg 43301e04c3fSmrg for (i = 0; i < 3; i++) { 43401e04c3fSmrg info.block[i] = block_layout[i]; 43501e04c3fSmrg info.grid[i] = grid_layout[i]; 43601e04c3fSmrg } 43701e04c3fSmrg info.pc = pc; 43801e04c3fSmrg info.input = input; 439af69d88dSmrg 44001e04c3fSmrg pipe->launch_grid(pipe, &info); 44101e04c3fSmrg} 44201e04c3fSmrg 44301e04c3fSmrgstatic void test_default_init(void *p, int s, int x, int y) 44401e04c3fSmrg{ 44501e04c3fSmrg *(uint32_t *)p = 0xdeadbeef; 44601e04c3fSmrg} 44701e04c3fSmrg 44801e04c3fSmrg/* test_system_values */ 44901e04c3fSmrgstatic void test_system_values_expect(void *p, int s, int x, int y) 45001e04c3fSmrg{ 45101e04c3fSmrg int id = x / 16, sv = (x % 16) / 4, c = x % 4; 45201e04c3fSmrg int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 }; 45301e04c3fSmrg int bsz[] = { 4, 3, 5, 1}; 45401e04c3fSmrg int gsz[] = { 5, 4, 1, 1}; 45501e04c3fSmrg 45601e04c3fSmrg switch (sv) { 45701e04c3fSmrg case 0: 45801e04c3fSmrg *(uint32_t *)p = tid[c] / bsz[c]; 45901e04c3fSmrg break; 46001e04c3fSmrg case 1: 46101e04c3fSmrg *(uint32_t *)p = bsz[c]; 46201e04c3fSmrg break; 46301e04c3fSmrg case 2: 46401e04c3fSmrg *(uint32_t *)p = gsz[c]; 46501e04c3fSmrg break; 46601e04c3fSmrg case 3: 46701e04c3fSmrg *(uint32_t *)p = tid[c] % bsz[c]; 46801e04c3fSmrg break; 46901e04c3fSmrg } 470af69d88dSmrg} 471af69d88dSmrg 472af69d88dSmrgstatic void test_system_values(struct context *ctx) 473af69d88dSmrg{ 474af69d88dSmrg const char *src = "COMP\n" 475af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 476af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 477af69d88dSmrg "DCL SV[1], BLOCK_SIZE[0]\n" 478af69d88dSmrg "DCL SV[2], GRID_SIZE[0]\n" 479af69d88dSmrg "DCL SV[3], THREAD_ID[0]\n" 480af69d88dSmrg "DCL TEMP[0], LOCAL\n" 481af69d88dSmrg "DCL TEMP[1], LOCAL\n" 482af69d88dSmrg "IMM UINT32 { 64, 0, 0, 0 }\n" 483af69d88dSmrg "IMM UINT32 { 16, 0, 0, 0 }\n" 484af69d88dSmrg "IMM UINT32 { 0, 0, 0, 0 }\n" 485af69d88dSmrg "\n" 486af69d88dSmrg "BGNSUB" 487af69d88dSmrg " UMUL TEMP[0], SV[0], SV[1]\n" 488af69d88dSmrg " UADD TEMP[0], TEMP[0], SV[3]\n" 489af69d88dSmrg " UMUL TEMP[1], SV[1], SV[2]\n" 490af69d88dSmrg " UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n" 491af69d88dSmrg " UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n" 492af69d88dSmrg " UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n" 493af69d88dSmrg " UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n" 494af69d88dSmrg " UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n" 495af69d88dSmrg " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" 496af69d88dSmrg " STORE RES[0].xyzw, TEMP[0], SV[0]\n" 497af69d88dSmrg " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 498af69d88dSmrg " STORE RES[0].xyzw, TEMP[0], SV[1]\n" 499af69d88dSmrg " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 500af69d88dSmrg " STORE RES[0].xyzw, TEMP[0], SV[2]\n" 501af69d88dSmrg " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 502af69d88dSmrg " STORE RES[0].xyzw, TEMP[0], SV[3]\n" 503af69d88dSmrg " RET\n" 504af69d88dSmrg "ENDSUB\n"; 505af69d88dSmrg 506af69d88dSmrg printf("- %s\n", __func__); 507af69d88dSmrg 508af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 509af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 51001e04c3fSmrg 76800, 0, test_default_init); 511af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 512af69d88dSmrg launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL); 51301e04c3fSmrg check_tex(ctx, 0, test_system_values_expect, NULL); 514af69d88dSmrg destroy_compute_resources(ctx); 515af69d88dSmrg destroy_tex(ctx); 516af69d88dSmrg destroy_prog(ctx); 517af69d88dSmrg} 518af69d88dSmrg 51901e04c3fSmrg/* test_resource_access */ 52001e04c3fSmrgstatic void test_resource_access_init0(void *p, int s, int x, int y) 52101e04c3fSmrg{ 52201e04c3fSmrg *(float *)p = 8.0 - (float)x; 52301e04c3fSmrg} 52401e04c3fSmrg 52501e04c3fSmrgstatic void test_resource_access_expect(void *p, int s, int x, int y) 52601e04c3fSmrg{ 52701e04c3fSmrg *(float *)p = 8.0 - (float)((x + 4 * y) & 0x3f); 52801e04c3fSmrg} 52901e04c3fSmrg 530af69d88dSmrgstatic void test_resource_access(struct context *ctx) 531af69d88dSmrg{ 532af69d88dSmrg const char *src = "COMP\n" 533af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 534af69d88dSmrg "DCL RES[1], 2D, RAW, WR\n" 535af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 536af69d88dSmrg "DCL TEMP[0], LOCAL\n" 537af69d88dSmrg "DCL TEMP[1], LOCAL\n" 538af69d88dSmrg "IMM UINT32 { 15, 0, 0, 0 }\n" 539af69d88dSmrg "IMM UINT32 { 16, 1, 0, 0 }\n" 540af69d88dSmrg "\n" 541af69d88dSmrg " BGNSUB\n" 542af69d88dSmrg " UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n" 543af69d88dSmrg " AND TEMP[0].x, TEMP[0], IMM[0]\n" 544af69d88dSmrg " UMUL TEMP[0].x, TEMP[0], IMM[1]\n" 545af69d88dSmrg " LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n" 546af69d88dSmrg " UMUL TEMP[1], SV[0], IMM[1]\n" 547af69d88dSmrg " STORE RES[1].xyzw, TEMP[1], TEMP[0]\n" 548af69d88dSmrg " RET\n" 549af69d88dSmrg " ENDSUB\n"; 550af69d88dSmrg 551af69d88dSmrg printf("- %s\n", __func__); 552af69d88dSmrg 553af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 554af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 55501e04c3fSmrg 256, 0, test_resource_access_init0); 556af69d88dSmrg init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 55701e04c3fSmrg 60, 12, test_default_init); 558af69d88dSmrg init_compute_resources(ctx, (int []) { 0, 1, -1 }); 559af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL); 56001e04c3fSmrg check_tex(ctx, 1, test_resource_access_expect, NULL); 561af69d88dSmrg destroy_compute_resources(ctx); 562af69d88dSmrg destroy_tex(ctx); 563af69d88dSmrg destroy_prog(ctx); 564af69d88dSmrg} 565af69d88dSmrg 56601e04c3fSmrg/* test_function_calls */ 56701e04c3fSmrgstatic void test_function_calls_init(void *p, int s, int x, int y) 56801e04c3fSmrg{ 56901e04c3fSmrg *(uint32_t *)p = 15 * y + x; 57001e04c3fSmrg} 57101e04c3fSmrg 57201e04c3fSmrgstatic void test_function_calls_expect(void *p, int s, int x, int y) 57301e04c3fSmrg{ 57401e04c3fSmrg *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ; 57501e04c3fSmrg} 57601e04c3fSmrg 577af69d88dSmrgstatic void test_function_calls(struct context *ctx) 578af69d88dSmrg{ 579af69d88dSmrg const char *src = "COMP\n" 580af69d88dSmrg "DCL RES[0], 2D, RAW, WR\n" 581af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 582af69d88dSmrg "DCL SV[1], BLOCK_SIZE[0]\n" 583af69d88dSmrg "DCL SV[2], GRID_SIZE[0]\n" 584af69d88dSmrg "DCL SV[3], THREAD_ID[0]\n" 585af69d88dSmrg "DCL TEMP[0]\n" 586af69d88dSmrg "DCL TEMP[1]\n" 587af69d88dSmrg "DCL TEMP[2], LOCAL\n" 588af69d88dSmrg "IMM UINT32 { 0, 11, 22, 33 }\n" 589af69d88dSmrg "IMM FLT32 { 11, 33, 55, 99 }\n" 590af69d88dSmrg "IMM UINT32 { 4, 1, 0, 0 }\n" 591af69d88dSmrg "IMM UINT32 { 12, 0, 0, 0 }\n" 592af69d88dSmrg "\n" 593af69d88dSmrg "00: BGNSUB\n" 594af69d88dSmrg "01: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" 595af69d88dSmrg "02: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" 596af69d88dSmrg "03: USLT TEMP[0].x, TEMP[0], IMM[0]\n" 597af69d88dSmrg "04: RET\n" 598af69d88dSmrg "05: ENDSUB\n" 599af69d88dSmrg "06: BGNSUB\n" 600af69d88dSmrg "07: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" 601af69d88dSmrg "08: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" 602af69d88dSmrg "09: USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n" 603af69d88dSmrg "10: IF TEMP[0].xxxx\n" 604af69d88dSmrg "11: CAL :0\n" 605af69d88dSmrg "12: ENDIF\n" 606af69d88dSmrg "13: RET\n" 607af69d88dSmrg "14: ENDSUB\n" 608af69d88dSmrg "15: BGNSUB\n" 609af69d88dSmrg "16: UMUL TEMP[2], SV[0], SV[1]\n" 610af69d88dSmrg "17: UADD TEMP[2], TEMP[2], SV[3]\n" 611af69d88dSmrg "18: UMUL TEMP[2], TEMP[2], IMM[2]\n" 612af69d88dSmrg "00: MOV TEMP[1].x, IMM[2].wwww\n" 613af69d88dSmrg "19: LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n" 614af69d88dSmrg "20: CAL :6\n" 615af69d88dSmrg "21: STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n" 616af69d88dSmrg "22: RET\n" 617af69d88dSmrg "23: ENDSUB\n"; 618af69d88dSmrg 619af69d88dSmrg printf("- %s\n", __func__); 620af69d88dSmrg 621af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 622af69d88dSmrg init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 62301e04c3fSmrg 15, 12, test_function_calls_init); 624af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 625af69d88dSmrg launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL); 62601e04c3fSmrg check_tex(ctx, 0, test_function_calls_expect, NULL); 627af69d88dSmrg destroy_compute_resources(ctx); 628af69d88dSmrg destroy_tex(ctx); 629af69d88dSmrg destroy_prog(ctx); 630af69d88dSmrg} 631af69d88dSmrg 63201e04c3fSmrg/* test_input_global */ 63301e04c3fSmrgstatic void test_input_global_expect(void *p, int s, int x, int y) 63401e04c3fSmrg{ 63501e04c3fSmrg *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0); 63601e04c3fSmrg} 63701e04c3fSmrg 638af69d88dSmrgstatic void test_input_global(struct context *ctx) 639af69d88dSmrg{ 640af69d88dSmrg const char *src = "COMP\n" 641af69d88dSmrg "DCL SV[0], THREAD_ID[0]\n" 642af69d88dSmrg "DCL TEMP[0], LOCAL\n" 643af69d88dSmrg "DCL TEMP[1], LOCAL\n" 644af69d88dSmrg "IMM UINT32 { 8, 0, 0, 0 }\n" 645af69d88dSmrg "\n" 646af69d88dSmrg " BGNSUB\n" 647af69d88dSmrg " UMUL TEMP[0], SV[0], IMM[0]\n" 648af69d88dSmrg " LOAD TEMP[1].xy, RINPUT, TEMP[0]\n" 649af69d88dSmrg " LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n" 650af69d88dSmrg " UADD TEMP[1].x, TEMP[0], -TEMP[1]\n" 651af69d88dSmrg " STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n" 652af69d88dSmrg " RET\n" 653af69d88dSmrg " ENDSUB\n"; 654af69d88dSmrg uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004, 655af69d88dSmrg 0x10005, 0x10006, 0x10007, 0x10008 }; 656af69d88dSmrg 657af69d88dSmrg printf("- %s\n", __func__); 658af69d88dSmrg 659af69d88dSmrg init_prog(ctx, 0, 0, 32, src, NULL); 66001e04c3fSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, 66101e04c3fSmrg test_default_init); 66201e04c3fSmrg init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, 66301e04c3fSmrg test_default_init); 66401e04c3fSmrg init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, 66501e04c3fSmrg test_default_init); 66601e04c3fSmrg init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, 66701e04c3fSmrg test_default_init); 668af69d88dSmrg init_globals(ctx, (int []){ 0, 1, 2, 3, -1 }, 669af69d88dSmrg (uint32_t *[]){ &input[1], &input[3], 670af69d88dSmrg &input[5], &input[7] }); 671af69d88dSmrg launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input); 67201e04c3fSmrg check_tex(ctx, 0, test_input_global_expect, NULL); 67301e04c3fSmrg check_tex(ctx, 1, test_input_global_expect, NULL); 67401e04c3fSmrg check_tex(ctx, 2, test_input_global_expect, NULL); 67501e04c3fSmrg check_tex(ctx, 3, test_input_global_expect, NULL); 676af69d88dSmrg destroy_globals(ctx); 677af69d88dSmrg destroy_tex(ctx); 678af69d88dSmrg destroy_prog(ctx); 679af69d88dSmrg} 680af69d88dSmrg 68101e04c3fSmrg/* test_private */ 68201e04c3fSmrgstatic void test_private_expect(void *p, int s, int x, int y) 68301e04c3fSmrg{ 68401e04c3fSmrg *(uint32_t *)p = (x / 32) + x % 32; 68501e04c3fSmrg} 68601e04c3fSmrg 687af69d88dSmrgstatic void test_private(struct context *ctx) 688af69d88dSmrg{ 689af69d88dSmrg const char *src = "COMP\n" 690af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 691af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 692af69d88dSmrg "DCL SV[1], BLOCK_SIZE[0]\n" 693af69d88dSmrg "DCL SV[2], THREAD_ID[0]\n" 694af69d88dSmrg "DCL TEMP[0], LOCAL\n" 695af69d88dSmrg "DCL TEMP[1], LOCAL\n" 696af69d88dSmrg "DCL TEMP[2], LOCAL\n" 697af69d88dSmrg "IMM UINT32 { 128, 0, 0, 0 }\n" 698af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 699af69d88dSmrg "\n" 700af69d88dSmrg " BGNSUB\n" 701af69d88dSmrg " UMUL TEMP[0].x, SV[0], SV[1]\n" 702af69d88dSmrg " UADD TEMP[0].x, TEMP[0], SV[2]\n" 703af69d88dSmrg " MOV TEMP[1].x, IMM[0].wwww\n" 704af69d88dSmrg " BGNLOOP\n" 705af69d88dSmrg " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" 706af69d88dSmrg " IF TEMP[2]\n" 707af69d88dSmrg " BRK\n" 708af69d88dSmrg " ENDIF\n" 709af69d88dSmrg " UDIV TEMP[2].x, TEMP[1], IMM[1]\n" 710af69d88dSmrg " UADD TEMP[2].x, TEMP[2], TEMP[0]\n" 711af69d88dSmrg " STORE RPRIVATE.x, TEMP[1], TEMP[2]\n" 712af69d88dSmrg " UADD TEMP[1].x, TEMP[1], IMM[1]\n" 713af69d88dSmrg " ENDLOOP\n" 714af69d88dSmrg " MOV TEMP[1].x, IMM[0].wwww\n" 715af69d88dSmrg " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" 716af69d88dSmrg " BGNLOOP\n" 717af69d88dSmrg " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" 718af69d88dSmrg " IF TEMP[2]\n" 719af69d88dSmrg " BRK\n" 720af69d88dSmrg " ENDIF\n" 721af69d88dSmrg " LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n" 722af69d88dSmrg " STORE RES[0].x, TEMP[0], TEMP[2]\n" 723af69d88dSmrg " UADD TEMP[0].x, TEMP[0], IMM[1]\n" 724af69d88dSmrg " UADD TEMP[1].x, TEMP[1], IMM[1]\n" 725af69d88dSmrg " ENDLOOP\n" 726af69d88dSmrg " RET\n" 727af69d88dSmrg " ENDSUB\n"; 728af69d88dSmrg 729af69d88dSmrg printf("- %s\n", __func__); 730af69d88dSmrg 731af69d88dSmrg init_prog(ctx, 0, 128, 0, src, NULL); 732af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 73301e04c3fSmrg 32768, 0, test_default_init); 734af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 735af69d88dSmrg launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 73601e04c3fSmrg check_tex(ctx, 0, test_private_expect, NULL); 737af69d88dSmrg destroy_compute_resources(ctx); 738af69d88dSmrg destroy_tex(ctx); 739af69d88dSmrg destroy_prog(ctx); 740af69d88dSmrg} 741af69d88dSmrg 74201e04c3fSmrg/* test_local */ 74301e04c3fSmrgstatic void test_local_expect(void *p, int s, int x, int y) 74401e04c3fSmrg{ 74501e04c3fSmrg *(uint32_t *)p = x & 0x20 ? 2 : 1; 74601e04c3fSmrg} 74701e04c3fSmrg 748af69d88dSmrgstatic void test_local(struct context *ctx) 749af69d88dSmrg{ 750af69d88dSmrg const char *src = "COMP\n" 751af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 752af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 753af69d88dSmrg "DCL SV[1], BLOCK_SIZE[0]\n" 754af69d88dSmrg "DCL SV[2], THREAD_ID[0]\n" 755af69d88dSmrg "DCL TEMP[0], LOCAL\n" 756af69d88dSmrg "DCL TEMP[1], LOCAL\n" 757af69d88dSmrg "DCL TEMP[2], LOCAL\n" 758af69d88dSmrg "IMM UINT32 { 1, 0, 0, 0 }\n" 759af69d88dSmrg "IMM UINT32 { 2, 0, 0, 0 }\n" 760af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 761af69d88dSmrg "IMM UINT32 { 32, 0, 0, 0 }\n" 762af69d88dSmrg "IMM UINT32 { 128, 0, 0, 0 }\n" 763af69d88dSmrg "\n" 764af69d88dSmrg " BGNSUB\n" 765af69d88dSmrg " UMUL TEMP[0].x, SV[2], IMM[2]\n" 766af69d88dSmrg " STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n" 767af69d88dSmrg " MFENCE RLOCAL\n" 768af69d88dSmrg " USLT TEMP[1].x, SV[2], IMM[3]\n" 769af69d88dSmrg " IF TEMP[1]\n" 770af69d88dSmrg " UADD TEMP[1].x, TEMP[0], IMM[4]\n" 771af69d88dSmrg " BGNLOOP\n" 772af69d88dSmrg " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 773af69d88dSmrg " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" 774af69d88dSmrg " IF TEMP[2]\n" 775af69d88dSmrg " BRK\n" 776af69d88dSmrg " ENDIF\n" 777af69d88dSmrg " ENDLOOP\n" 778af69d88dSmrg " STORE RLOCAL.x, TEMP[0], IMM[0]\n" 779af69d88dSmrg " MFENCE RLOCAL\n" 780af69d88dSmrg " BGNLOOP\n" 781af69d88dSmrg " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 782af69d88dSmrg " USEQ TEMP[2].x, TEMP[2], IMM[1]\n" 783af69d88dSmrg " IF TEMP[2]\n" 784af69d88dSmrg " BRK\n" 785af69d88dSmrg " ENDIF\n" 786af69d88dSmrg " ENDLOOP\n" 787af69d88dSmrg " ELSE\n" 788af69d88dSmrg " UADD TEMP[1].x, TEMP[0], -IMM[4]\n" 789af69d88dSmrg " BGNLOOP\n" 790af69d88dSmrg " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 791af69d88dSmrg " USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n" 792af69d88dSmrg " IF TEMP[2]\n" 793af69d88dSmrg " BRK\n" 794af69d88dSmrg " ENDIF\n" 795af69d88dSmrg " ENDLOOP\n" 796af69d88dSmrg " STORE RLOCAL.x, TEMP[0], IMM[0]\n" 797af69d88dSmrg " MFENCE RLOCAL\n" 798af69d88dSmrg " BGNLOOP\n" 799af69d88dSmrg " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" 800af69d88dSmrg " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" 801af69d88dSmrg " IF TEMP[2]\n" 802af69d88dSmrg " BRK\n" 803af69d88dSmrg " ENDIF\n" 804af69d88dSmrg " ENDLOOP\n" 805af69d88dSmrg " STORE RLOCAL.x, TEMP[0], IMM[1]\n" 806af69d88dSmrg " MFENCE RLOCAL\n" 807af69d88dSmrg " ENDIF\n" 808af69d88dSmrg " UMUL TEMP[1].x, SV[0], SV[1]\n" 809af69d88dSmrg " UMUL TEMP[1].x, TEMP[1], IMM[2]\n" 810af69d88dSmrg " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" 811af69d88dSmrg " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" 812af69d88dSmrg " STORE RES[0].x, TEMP[1], TEMP[0]\n" 813af69d88dSmrg " RET\n" 814af69d88dSmrg " ENDSUB\n"; 815af69d88dSmrg 816af69d88dSmrg printf("- %s\n", __func__); 817af69d88dSmrg 818af69d88dSmrg init_prog(ctx, 256, 0, 0, src, NULL); 819af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 82001e04c3fSmrg 4096, 0, test_default_init); 821af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 822af69d88dSmrg launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 82301e04c3fSmrg check_tex(ctx, 0, test_local_expect, NULL); 824af69d88dSmrg destroy_compute_resources(ctx); 825af69d88dSmrg destroy_tex(ctx); 826af69d88dSmrg destroy_prog(ctx); 827af69d88dSmrg} 828af69d88dSmrg 82901e04c3fSmrg/* test_sample */ 83001e04c3fSmrgstatic void test_sample_init(void *p, int s, int x, int y) 83101e04c3fSmrg{ 83201e04c3fSmrg *(float *)p = s ? 1 : x * y; 83301e04c3fSmrg} 83401e04c3fSmrg 83501e04c3fSmrgstatic void test_sample_expect(void *p, int s, int x, int y) 83601e04c3fSmrg{ 83701e04c3fSmrg switch (x % 4) { 83801e04c3fSmrg case 0: 83901e04c3fSmrg *(float *)p = x / 4 * y; 84001e04c3fSmrg break; 84101e04c3fSmrg case 1: 84201e04c3fSmrg case 2: 84301e04c3fSmrg *(float *)p = 0; 84401e04c3fSmrg break; 84501e04c3fSmrg case 3: 84601e04c3fSmrg *(float *)p = 1; 84701e04c3fSmrg break; 84801e04c3fSmrg } 84901e04c3fSmrg} 85001e04c3fSmrg 851af69d88dSmrgstatic void test_sample(struct context *ctx) 852af69d88dSmrg{ 853af69d88dSmrg const char *src = "COMP\n" 854af69d88dSmrg "DCL SVIEW[0], 2D, FLOAT\n" 855af69d88dSmrg "DCL RES[0], 2D, RAW, WR\n" 856af69d88dSmrg "DCL SAMP[0]\n" 857af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 858af69d88dSmrg "DCL TEMP[0], LOCAL\n" 859af69d88dSmrg "DCL TEMP[1], LOCAL\n" 860af69d88dSmrg "IMM UINT32 { 16, 1, 0, 0 }\n" 861af69d88dSmrg "IMM FLT32 { 128, 32, 0, 0 }\n" 862af69d88dSmrg "\n" 863af69d88dSmrg " BGNSUB\n" 864af69d88dSmrg " I2F TEMP[1], SV[0]\n" 865af69d88dSmrg " DIV TEMP[1], TEMP[1], IMM[1]\n" 866af69d88dSmrg " SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n" 867af69d88dSmrg " UMUL TEMP[0], SV[0], IMM[0]\n" 868af69d88dSmrg " STORE RES[0].xyzw, TEMP[0], TEMP[1]\n" 869af69d88dSmrg " RET\n" 870af69d88dSmrg " ENDSUB\n"; 871af69d88dSmrg 872af69d88dSmrg printf("- %s\n", __func__); 873af69d88dSmrg 874af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 875af69d88dSmrg init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 87601e04c3fSmrg 128, 32, test_sample_init); 877af69d88dSmrg init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 87801e04c3fSmrg 512, 32, test_sample_init); 879af69d88dSmrg init_compute_resources(ctx, (int []) { 1, -1 }); 880af69d88dSmrg init_sampler_views(ctx, (int []) { 0, -1 }); 881af69d88dSmrg init_sampler_states(ctx, 2); 882af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL); 88301e04c3fSmrg check_tex(ctx, 1, test_sample_expect, NULL); 884af69d88dSmrg destroy_sampler_states(ctx); 885af69d88dSmrg destroy_sampler_views(ctx); 886af69d88dSmrg destroy_compute_resources(ctx); 887af69d88dSmrg destroy_tex(ctx); 888af69d88dSmrg destroy_prog(ctx); 889af69d88dSmrg} 890af69d88dSmrg 89101e04c3fSmrg/* test_many_kern */ 89201e04c3fSmrgstatic void test_many_kern_expect(void *p, int s, int x, int y) 89301e04c3fSmrg{ 89401e04c3fSmrg *(uint32_t *)p = x; 89501e04c3fSmrg} 89601e04c3fSmrg 897af69d88dSmrgstatic void test_many_kern(struct context *ctx) 898af69d88dSmrg{ 899af69d88dSmrg const char *src = "COMP\n" 900af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 901af69d88dSmrg "DCL TEMP[0], LOCAL\n" 902af69d88dSmrg "IMM UINT32 { 0, 1, 2, 3 }\n" 903af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 904af69d88dSmrg "\n" 905af69d88dSmrg " BGNSUB\n" 906af69d88dSmrg " UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n" 907af69d88dSmrg " STORE RES[0].x, TEMP[0], IMM[0].xxxx\n" 908af69d88dSmrg " RET\n" 909af69d88dSmrg " ENDSUB\n" 910af69d88dSmrg " BGNSUB\n" 911af69d88dSmrg " UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n" 912af69d88dSmrg " STORE RES[0].x, TEMP[0], IMM[0].yyyy\n" 913af69d88dSmrg " RET\n" 914af69d88dSmrg " ENDSUB\n" 915af69d88dSmrg " BGNSUB\n" 916af69d88dSmrg " UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n" 917af69d88dSmrg " STORE RES[0].x, TEMP[0], IMM[0].zzzz\n" 918af69d88dSmrg " RET\n" 919af69d88dSmrg " ENDSUB\n" 920af69d88dSmrg " BGNSUB\n" 921af69d88dSmrg " UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n" 922af69d88dSmrg " STORE RES[0].x, TEMP[0], IMM[0].wwww\n" 923af69d88dSmrg " RET\n" 924af69d88dSmrg " ENDSUB\n"; 925af69d88dSmrg 926af69d88dSmrg printf("- %s\n", __func__); 927af69d88dSmrg 928af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 929af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 93001e04c3fSmrg 16, 0, test_default_init); 931af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 932af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL); 933af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL); 934af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL); 935af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL); 93601e04c3fSmrg check_tex(ctx, 0, test_many_kern_expect, NULL); 937af69d88dSmrg destroy_compute_resources(ctx); 938af69d88dSmrg destroy_tex(ctx); 939af69d88dSmrg destroy_prog(ctx); 940af69d88dSmrg} 941af69d88dSmrg 94201e04c3fSmrg/* test_constant */ 94301e04c3fSmrgstatic void test_constant_init(void *p, int s, int x, int y) 94401e04c3fSmrg{ 94501e04c3fSmrg *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x; 94601e04c3fSmrg} 94701e04c3fSmrg 94801e04c3fSmrgstatic void test_constant_expect(void *p, int s, int x, int y) 94901e04c3fSmrg{ 95001e04c3fSmrg *(float *)p = 8.0 - (float)x; 95101e04c3fSmrg} 95201e04c3fSmrg 953af69d88dSmrgstatic void test_constant(struct context *ctx) 954af69d88dSmrg{ 955af69d88dSmrg const char *src = "COMP\n" 956af69d88dSmrg "DCL RES[0], BUFFER, RAW\n" 957af69d88dSmrg "DCL RES[1], BUFFER, RAW, WR\n" 958af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 959af69d88dSmrg "DCL TEMP[0], LOCAL\n" 960af69d88dSmrg "DCL TEMP[1], LOCAL\n" 961af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 962af69d88dSmrg "\n" 963af69d88dSmrg " BGNSUB\n" 964af69d88dSmrg " UMUL TEMP[0].x, SV[0], IMM[0]\n" 965af69d88dSmrg " LOAD TEMP[1].x, RES[0], TEMP[0]\n" 966af69d88dSmrg " STORE RES[1].x, TEMP[0], TEMP[1]\n" 967af69d88dSmrg " RET\n" 968af69d88dSmrg " ENDSUB\n"; 969af69d88dSmrg 970af69d88dSmrg printf("- %s\n", __func__); 971af69d88dSmrg 972af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 973af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 97401e04c3fSmrg 256, 0, test_constant_init); 975af69d88dSmrg init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 97601e04c3fSmrg 256, 0, test_constant_init); 977af69d88dSmrg init_compute_resources(ctx, (int []) { 0, 1, -1 }); 978af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); 97901e04c3fSmrg check_tex(ctx, 1, test_constant_expect, NULL); 980af69d88dSmrg destroy_compute_resources(ctx); 981af69d88dSmrg destroy_tex(ctx); 982af69d88dSmrg destroy_prog(ctx); 983af69d88dSmrg} 984af69d88dSmrg 98501e04c3fSmrg/* test_resource_indirect */ 98601e04c3fSmrgstatic void test_resource_indirect_init(void *p, int s, int x, int y) 98701e04c3fSmrg{ 98801e04c3fSmrg *(uint32_t *)p = s == 0 ? 0xdeadbeef : 98901e04c3fSmrg s == 1 ? x % 2 : 99001e04c3fSmrg s == 2 ? 2 * x : 99101e04c3fSmrg 2 * x + 1; 99201e04c3fSmrg} 99301e04c3fSmrg 99401e04c3fSmrgstatic void test_resource_indirect_expect(void *p, int s, int x, int y) 99501e04c3fSmrg{ 99601e04c3fSmrg *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0); 99701e04c3fSmrg} 99801e04c3fSmrg 999af69d88dSmrgstatic void test_resource_indirect(struct context *ctx) 1000af69d88dSmrg{ 1001af69d88dSmrg const char *src = "COMP\n" 1002af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 1003af69d88dSmrg "DCL RES[1..3], BUFFER, RAW\n" 1004af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 1005af69d88dSmrg "DCL TEMP[0], LOCAL\n" 1006af69d88dSmrg "DCL TEMP[1], LOCAL\n" 1007af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 1008af69d88dSmrg "\n" 1009af69d88dSmrg " BGNSUB\n" 1010af69d88dSmrg " UMUL TEMP[0].x, SV[0], IMM[0]\n" 1011af69d88dSmrg " LOAD TEMP[1].x, RES[1], TEMP[0]\n" 1012af69d88dSmrg " LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n" 1013af69d88dSmrg " STORE RES[0].x, TEMP[0], TEMP[1]\n" 1014af69d88dSmrg " RET\n" 1015af69d88dSmrg " ENDSUB\n"; 1016af69d88dSmrg 1017af69d88dSmrg printf("- %s\n", __func__); 1018af69d88dSmrg 1019af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 1020af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 102101e04c3fSmrg 256, 0, test_resource_indirect_init); 1022af69d88dSmrg init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 102301e04c3fSmrg 256, 0, test_resource_indirect_init); 1024af69d88dSmrg init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 102501e04c3fSmrg 256, 0, test_resource_indirect_init); 1026af69d88dSmrg init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, 102701e04c3fSmrg 256, 0, test_resource_indirect_init); 1028af69d88dSmrg init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 }); 1029af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); 103001e04c3fSmrg check_tex(ctx, 0, test_resource_indirect_expect, NULL); 1031af69d88dSmrg destroy_compute_resources(ctx); 1032af69d88dSmrg destroy_tex(ctx); 1033af69d88dSmrg destroy_prog(ctx); 1034af69d88dSmrg} 1035af69d88dSmrg 103601e04c3fSmrg/* test_surface_ld */ 1037af69d88dSmrgenum pipe_format surface_fmts[] = { 1038af69d88dSmrg PIPE_FORMAT_B8G8R8A8_UNORM, 1039af69d88dSmrg PIPE_FORMAT_B8G8R8X8_UNORM, 1040af69d88dSmrg PIPE_FORMAT_A8R8G8B8_UNORM, 1041af69d88dSmrg PIPE_FORMAT_X8R8G8B8_UNORM, 1042af69d88dSmrg PIPE_FORMAT_X8R8G8B8_UNORM, 1043af69d88dSmrg PIPE_FORMAT_L8_UNORM, 1044af69d88dSmrg PIPE_FORMAT_A8_UNORM, 1045af69d88dSmrg PIPE_FORMAT_I8_UNORM, 1046af69d88dSmrg PIPE_FORMAT_L8A8_UNORM, 1047af69d88dSmrg PIPE_FORMAT_R32_FLOAT, 1048af69d88dSmrg PIPE_FORMAT_R32G32_FLOAT, 1049af69d88dSmrg PIPE_FORMAT_R32G32B32A32_FLOAT, 1050af69d88dSmrg PIPE_FORMAT_R32_UNORM, 1051af69d88dSmrg PIPE_FORMAT_R32G32_UNORM, 1052af69d88dSmrg PIPE_FORMAT_R32G32B32A32_UNORM, 1053af69d88dSmrg PIPE_FORMAT_R32_SNORM, 1054af69d88dSmrg PIPE_FORMAT_R32G32_SNORM, 1055af69d88dSmrg PIPE_FORMAT_R32G32B32A32_SNORM, 1056af69d88dSmrg PIPE_FORMAT_R8_UINT, 1057af69d88dSmrg PIPE_FORMAT_R8G8_UINT, 1058af69d88dSmrg PIPE_FORMAT_R8G8B8A8_UINT, 1059af69d88dSmrg PIPE_FORMAT_R8_SINT, 1060af69d88dSmrg PIPE_FORMAT_R8G8_SINT, 1061af69d88dSmrg PIPE_FORMAT_R8G8B8A8_SINT, 1062af69d88dSmrg PIPE_FORMAT_R32_UINT, 1063af69d88dSmrg PIPE_FORMAT_R32G32_UINT, 1064af69d88dSmrg PIPE_FORMAT_R32G32B32A32_UINT, 1065af69d88dSmrg PIPE_FORMAT_R32_SINT, 1066af69d88dSmrg PIPE_FORMAT_R32G32_SINT, 1067af69d88dSmrg PIPE_FORMAT_R32G32B32A32_SINT 1068af69d88dSmrg}; 1069af69d88dSmrg 107001e04c3fSmrgstatic void test_surface_ld_init0f(void *p, int s, int x, int y) 107101e04c3fSmrg{ 107201e04c3fSmrg float v[] = { 1.0, -.75, .50, -.25 }; 107301e04c3fSmrg int i = 0; 107401e04c3fSmrg 10757ec681f3Smrg util_format_pack_rgba(surface_fmts[i], p, v, 1); 107601e04c3fSmrg} 107701e04c3fSmrg 107801e04c3fSmrgstatic void test_surface_ld_init0i(void *p, int s, int x, int y) 107901e04c3fSmrg{ 108001e04c3fSmrg int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; 108101e04c3fSmrg int i = 0; 108201e04c3fSmrg 10837ec681f3Smrg util_format_pack_rgba(surface_fmts[i], p, v, 1); 108401e04c3fSmrg} 108501e04c3fSmrg 108601e04c3fSmrgstatic void test_surface_ld_expectf(void *p, int s, int x, int y) 108701e04c3fSmrg{ 108801e04c3fSmrg float v[4], w[4]; 108901e04c3fSmrg int i = 0; 109001e04c3fSmrg 109101e04c3fSmrg test_surface_ld_init0f(v, s, x / 4, y); 10927ec681f3Smrg util_format_unpack_rgba(surface_fmts[i], w, v, 1); 109301e04c3fSmrg *(float *)p = w[x % 4]; 109401e04c3fSmrg} 109501e04c3fSmrg 109601e04c3fSmrgstatic void test_surface_ld_expecti(void *p, int s, int x, int y) 109701e04c3fSmrg{ 109801e04c3fSmrg int32_t v[4], w[4]; 109901e04c3fSmrg int i = 0; 110001e04c3fSmrg 110101e04c3fSmrg test_surface_ld_init0i(v, s, x / 4, y); 11027ec681f3Smrg util_format_unpack_rgba(surface_fmts[i], w, v, 1); 110301e04c3fSmrg *(uint32_t *)p = w[x % 4]; 110401e04c3fSmrg} 110501e04c3fSmrg 1106af69d88dSmrgstatic void test_surface_ld(struct context *ctx) 1107af69d88dSmrg{ 1108af69d88dSmrg const char *src = "COMP\n" 1109af69d88dSmrg "DCL RES[0], 2D\n" 1110af69d88dSmrg "DCL RES[1], 2D, RAW, WR\n" 1111af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 1112af69d88dSmrg "DCL TEMP[0], LOCAL\n" 1113af69d88dSmrg "DCL TEMP[1], LOCAL\n" 1114af69d88dSmrg "IMM UINT32 { 16, 1, 0, 0 }\n" 1115af69d88dSmrg "\n" 1116af69d88dSmrg " BGNSUB\n" 1117af69d88dSmrg " LOAD TEMP[1], RES[0], SV[0]\n" 1118af69d88dSmrg " UMUL TEMP[0], SV[0], IMM[0]\n" 1119af69d88dSmrg " STORE RES[1].xyzw, TEMP[0], TEMP[1]\n" 1120af69d88dSmrg " RET\n" 1121af69d88dSmrg " ENDSUB\n"; 1122af69d88dSmrg int i = 0; 1123af69d88dSmrg 1124af69d88dSmrg printf("- %s\n", __func__); 1125af69d88dSmrg 1126af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 1127af69d88dSmrg 112801e04c3fSmrg for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) { 1129af69d88dSmrg bool is_int = util_format_is_pure_integer(surface_fmts[i]); 1130af69d88dSmrg 1131af69d88dSmrg printf(" - %s\n", util_format_name(surface_fmts[i])); 1132af69d88dSmrg 1133af69d88dSmrg if (!ctx->screen->is_format_supported(ctx->screen, 113401e04c3fSmrg surface_fmts[i], PIPE_TEXTURE_2D, 1, 1, 1135af69d88dSmrg PIPE_BIND_COMPUTE_RESOURCE)) { 1136af69d88dSmrg printf("(unsupported)\n"); 1137af69d88dSmrg continue; 1138af69d88dSmrg } 1139af69d88dSmrg 1140af69d88dSmrg init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i], 114101e04c3fSmrg 128, 32, (is_int ? test_surface_ld_init0i : test_surface_ld_init0f)); 1142af69d88dSmrg init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 114301e04c3fSmrg 512, 32, test_default_init); 1144af69d88dSmrg init_compute_resources(ctx, (int []) { 0, 1, -1 }); 1145af69d88dSmrg init_sampler_states(ctx, 2); 1146af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, 1147af69d88dSmrg NULL); 114801e04c3fSmrg check_tex(ctx, 1, (is_int ? test_surface_ld_expecti : test_surface_ld_expectf), NULL); 1149af69d88dSmrg destroy_sampler_states(ctx); 1150af69d88dSmrg destroy_compute_resources(ctx); 1151af69d88dSmrg destroy_tex(ctx); 1152af69d88dSmrg } 1153af69d88dSmrg 1154af69d88dSmrg destroy_prog(ctx); 1155af69d88dSmrg} 1156af69d88dSmrg 115701e04c3fSmrg/* test_surface_st */ 115801e04c3fSmrgstatic void test_surface_st_init0f(void *p, int s, int x, int y) 115901e04c3fSmrg{ 116001e04c3fSmrg float v[] = { 1.0, -.75, 0.5, -.25 }; 116101e04c3fSmrg *(float *)p = v[x % 4]; 116201e04c3fSmrg} 116301e04c3fSmrg 116401e04c3fSmrgstatic void test_surface_st_init0i(void *p, int s, int x, int y) 116501e04c3fSmrg{ 116601e04c3fSmrg int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; 116701e04c3fSmrg *(int32_t *)p = v[x % 4]; 116801e04c3fSmrg} 116901e04c3fSmrg 117001e04c3fSmrgstatic void test_surface_st_init1(void *p, int s, int x, int y) 117101e04c3fSmrg{ 117201e04c3fSmrg int i = 0; 117301e04c3fSmrg memset(p, 1, util_format_get_blocksize(surface_fmts[i])); 117401e04c3fSmrg} 117501e04c3fSmrg 117601e04c3fSmrgstatic void test_surface_st_expectf(void *p, int s, int x, int y) 117701e04c3fSmrg{ 117801e04c3fSmrg float vf[4]; 117901e04c3fSmrg int i = 0, j; 118001e04c3fSmrg 118101e04c3fSmrg for (j = 0; j < 4; j++) 118201e04c3fSmrg test_surface_st_init0f(&vf[j], s, 4 * x + j, y); 11837ec681f3Smrg util_format_pack_rgba(surface_fmts[i], p, vf, 1); 118401e04c3fSmrg} 118501e04c3fSmrg 118601e04c3fSmrgstatic void test_surface_st_expects(void *p, int s, int x, int y) 118701e04c3fSmrg{ 118801e04c3fSmrg int32_t v[4]; 118901e04c3fSmrg int i = 0, j; 119001e04c3fSmrg 119101e04c3fSmrg for (j = 0; j < 4; j++) 119201e04c3fSmrg test_surface_st_init0i(&v[j], s, 4 * x + j, y); 11937ec681f3Smrg util_format_pack_rgba(surface_fmts[i], p, v, 1); 119401e04c3fSmrg} 119501e04c3fSmrg 119601e04c3fSmrgstatic void test_surface_st_expectu(void *p, int s, int x, int y) 119701e04c3fSmrg{ 119801e04c3fSmrg uint32_t v[4]; 119901e04c3fSmrg int i = 0, j; 120001e04c3fSmrg 120101e04c3fSmrg for (j = 0; j < 4; j++) 120201e04c3fSmrg test_surface_st_init0i(&v[j], s, 4 * x + j, y); 12037ec681f3Smrg util_format_pack_rgba(surface_fmts[i], p, v, 1); 12047ec681f3Smrg} 12057ec681f3Smrg 12067ec681f3Smrgstatic unsigned absdiff(uint32_t a, uint32_t b) 12077ec681f3Smrg{ 12087ec681f3Smrg return (a > b) ? (a - b) : (b - a); 120901e04c3fSmrg} 121001e04c3fSmrg 121101e04c3fSmrgstatic bool test_surface_st_check(void *x, void *y, int sz) 121201e04c3fSmrg{ 121301e04c3fSmrg int i = 0, j; 121401e04c3fSmrg 121501e04c3fSmrg if (util_format_is_float(surface_fmts[i])) { 121601e04c3fSmrg return fabs(*(float *)x - *(float *)y) < 3.92156863e-3; 121701e04c3fSmrg 121801e04c3fSmrg } else if ((sz % 4) == 0) { 121901e04c3fSmrg for (j = 0; j < sz / 4; j++) 12207ec681f3Smrg if (absdiff(((uint32_t *)x)[j], 12217ec681f3Smrg ((uint32_t *)y)[j]) > 1) 122201e04c3fSmrg return false; 122301e04c3fSmrg return true; 122401e04c3fSmrg } else { 122501e04c3fSmrg return !memcmp(x, y, sz); 122601e04c3fSmrg } 122701e04c3fSmrg} 122801e04c3fSmrg 1229af69d88dSmrgstatic void test_surface_st(struct context *ctx) 1230af69d88dSmrg{ 1231af69d88dSmrg const char *src = "COMP\n" 1232af69d88dSmrg "DCL RES[0], 2D, RAW\n" 1233af69d88dSmrg "DCL RES[1], 2D, WR\n" 1234af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 1235af69d88dSmrg "DCL TEMP[0], LOCAL\n" 1236af69d88dSmrg "DCL TEMP[1], LOCAL\n" 1237af69d88dSmrg "IMM UINT32 { 16, 1, 0, 0 }\n" 1238af69d88dSmrg "\n" 1239af69d88dSmrg " BGNSUB\n" 1240af69d88dSmrg " UMUL TEMP[0], SV[0], IMM[0]\n" 1241af69d88dSmrg " LOAD TEMP[1], RES[0], TEMP[0]\n" 1242af69d88dSmrg " STORE RES[1], SV[0], TEMP[1]\n" 1243af69d88dSmrg " RET\n" 1244af69d88dSmrg " ENDSUB\n"; 1245af69d88dSmrg int i = 0; 1246af69d88dSmrg 1247af69d88dSmrg printf("- %s\n", __func__); 1248af69d88dSmrg 1249af69d88dSmrg init_prog(ctx, 0, 0, 0, src, NULL); 1250af69d88dSmrg 125101e04c3fSmrg for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) { 1252af69d88dSmrg bool is_signed = (util_format_description(surface_fmts[i]) 1253af69d88dSmrg ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED); 1254af69d88dSmrg bool is_int = util_format_is_pure_integer(surface_fmts[i]); 1255af69d88dSmrg 1256af69d88dSmrg printf(" - %s\n", util_format_name(surface_fmts[i])); 1257af69d88dSmrg 1258af69d88dSmrg if (!ctx->screen->is_format_supported(ctx->screen, 125901e04c3fSmrg surface_fmts[i], PIPE_TEXTURE_2D, 1, 1, 1260af69d88dSmrg PIPE_BIND_COMPUTE_RESOURCE)) { 1261af69d88dSmrg printf("(unsupported)\n"); 1262af69d88dSmrg continue; 1263af69d88dSmrg } 1264af69d88dSmrg 1265af69d88dSmrg init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, 126601e04c3fSmrg 512, 32, (is_int ? test_surface_st_init0i : test_surface_st_init0f)); 1267af69d88dSmrg init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i], 126801e04c3fSmrg 128, 32, test_surface_st_init1); 1269af69d88dSmrg init_compute_resources(ctx, (int []) { 0, 1, -1 }); 1270af69d88dSmrg init_sampler_states(ctx, 2); 1271af69d88dSmrg launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, 1272af69d88dSmrg NULL); 127301e04c3fSmrg check_tex(ctx, 1, (is_int && is_signed ? test_surface_st_expects : 127401e04c3fSmrg is_int && !is_signed ? test_surface_st_expectu : 127501e04c3fSmrg test_surface_st_expectf), test_surface_st_check); 1276af69d88dSmrg destroy_sampler_states(ctx); 1277af69d88dSmrg destroy_compute_resources(ctx); 1278af69d88dSmrg destroy_tex(ctx); 1279af69d88dSmrg } 1280af69d88dSmrg 1281af69d88dSmrg destroy_prog(ctx); 1282af69d88dSmrg} 1283af69d88dSmrg 128401e04c3fSmrg/* test_barrier */ 128501e04c3fSmrgstatic void test_barrier_expect(void *p, int s, int x, int y) 128601e04c3fSmrg{ 128701e04c3fSmrg *(uint32_t *)p = 31; 128801e04c3fSmrg} 128901e04c3fSmrg 1290af69d88dSmrgstatic void test_barrier(struct context *ctx) 1291af69d88dSmrg{ 1292af69d88dSmrg const char *src = "COMP\n" 1293af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 1294af69d88dSmrg "DCL SV[0], BLOCK_ID[0]\n" 1295af69d88dSmrg "DCL SV[1], BLOCK_SIZE[0]\n" 1296af69d88dSmrg "DCL SV[2], THREAD_ID[0]\n" 1297af69d88dSmrg "DCL TEMP[0], LOCAL\n" 1298af69d88dSmrg "DCL TEMP[1], LOCAL\n" 1299af69d88dSmrg "DCL TEMP[2], LOCAL\n" 1300af69d88dSmrg "DCL TEMP[3], LOCAL\n" 1301af69d88dSmrg "IMM UINT32 { 1, 0, 0, 0 }\n" 1302af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 1303af69d88dSmrg "IMM UINT32 { 32, 0, 0, 0 }\n" 1304af69d88dSmrg "\n" 1305af69d88dSmrg " BGNSUB\n" 1306af69d88dSmrg " UMUL TEMP[0].x, SV[2], IMM[1]\n" 1307af69d88dSmrg " MOV TEMP[1].x, IMM[0].wwww\n" 1308af69d88dSmrg " BGNLOOP\n" 1309af69d88dSmrg " BARRIER\n" 1310af69d88dSmrg " STORE RLOCAL.x, TEMP[0], TEMP[1]\n" 1311af69d88dSmrg " BARRIER\n" 1312af69d88dSmrg " MOV TEMP[2].x, IMM[0].wwww\n" 1313af69d88dSmrg " BGNLOOP\n" 1314af69d88dSmrg " UMUL TEMP[3].x, TEMP[2], IMM[1]\n" 1315af69d88dSmrg " LOAD TEMP[3].x, RLOCAL, TEMP[3]\n" 1316af69d88dSmrg " USNE TEMP[3].x, TEMP[3], TEMP[1]\n" 1317af69d88dSmrg " IF TEMP[3]\n" 1318af69d88dSmrg " END\n" 1319af69d88dSmrg " ENDIF\n" 1320af69d88dSmrg " UADD TEMP[2].x, TEMP[2], IMM[0]\n" 1321af69d88dSmrg " USEQ TEMP[3].x, TEMP[2], SV[1]\n" 1322af69d88dSmrg " IF TEMP[3]\n" 1323af69d88dSmrg " BRK\n" 1324af69d88dSmrg " ENDIF\n" 1325af69d88dSmrg " ENDLOOP\n" 1326af69d88dSmrg " UADD TEMP[1].x, TEMP[1], IMM[0]\n" 1327af69d88dSmrg " USEQ TEMP[2].x, TEMP[1], IMM[2]\n" 1328af69d88dSmrg " IF TEMP[2]\n" 1329af69d88dSmrg " BRK\n" 1330af69d88dSmrg " ENDIF\n" 1331af69d88dSmrg " ENDLOOP\n" 1332af69d88dSmrg " UMUL TEMP[1].x, SV[0], SV[1]\n" 1333af69d88dSmrg " UMUL TEMP[1].x, TEMP[1], IMM[1]\n" 1334af69d88dSmrg " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" 1335af69d88dSmrg " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" 1336af69d88dSmrg " STORE RES[0].x, TEMP[1], TEMP[0]\n" 1337af69d88dSmrg " RET\n" 1338af69d88dSmrg " ENDSUB\n"; 1339af69d88dSmrg 1340af69d88dSmrg printf("- %s\n", __func__); 1341af69d88dSmrg 1342af69d88dSmrg init_prog(ctx, 256, 0, 0, src, NULL); 1343af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 134401e04c3fSmrg 4096, 0, test_default_init); 1345af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 1346af69d88dSmrg launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 134701e04c3fSmrg check_tex(ctx, 0, test_barrier_expect, NULL); 1348af69d88dSmrg destroy_compute_resources(ctx); 1349af69d88dSmrg destroy_tex(ctx); 1350af69d88dSmrg destroy_prog(ctx); 1351af69d88dSmrg} 1352af69d88dSmrg 135301e04c3fSmrg/* test_atom_ops */ 135401e04c3fSmrgstatic void test_atom_ops_init(void *p, int s, int x, int y) 135501e04c3fSmrg{ 135601e04c3fSmrg *(uint32_t *)p = 0xbad; 135701e04c3fSmrg} 135801e04c3fSmrg 135901e04c3fSmrgstatic void test_atom_ops_expect(void *p, int s, int x, int y) 136001e04c3fSmrg{ 136101e04c3fSmrg switch (x) { 136201e04c3fSmrg case 0: 136301e04c3fSmrg *(uint32_t *)p = 0xce6c8eef; 136401e04c3fSmrg break; 136501e04c3fSmrg case 1: 136601e04c3fSmrg *(uint32_t *)p = 0xdeadbeef; 136701e04c3fSmrg break; 136801e04c3fSmrg case 2: 136901e04c3fSmrg *(uint32_t *)p = 0x11111111; 137001e04c3fSmrg break; 137101e04c3fSmrg case 3: 137201e04c3fSmrg *(uint32_t *)p = 0x10011001; 137301e04c3fSmrg break; 137401e04c3fSmrg case 4: 137501e04c3fSmrg *(uint32_t *)p = 0xdfbdbfff; 137601e04c3fSmrg break; 137701e04c3fSmrg case 5: 137801e04c3fSmrg *(uint32_t *)p = 0x11111111; 137901e04c3fSmrg break; 138001e04c3fSmrg case 6: 138101e04c3fSmrg *(uint32_t *)p = 0x11111111; 138201e04c3fSmrg break; 138301e04c3fSmrg case 7: 138401e04c3fSmrg *(uint32_t *)p = 0xdeadbeef; 138501e04c3fSmrg break; 138601e04c3fSmrg case 8: 138701e04c3fSmrg *(uint32_t *)p = 0xdeadbeef; 138801e04c3fSmrg break; 138901e04c3fSmrg case 9: 139001e04c3fSmrg *(uint32_t *)p = 0x11111111; 139101e04c3fSmrg break; 139201e04c3fSmrg } 139301e04c3fSmrg} 139401e04c3fSmrg 1395af69d88dSmrgstatic void test_atom_ops(struct context *ctx, bool global) 1396af69d88dSmrg{ 1397af69d88dSmrg const char *src = "COMP\n" 1398af69d88dSmrg "#ifdef TARGET_GLOBAL\n" 1399af69d88dSmrg "#define target RES[0]\n" 1400af69d88dSmrg "#else\n" 1401af69d88dSmrg "#define target RLOCAL\n" 1402af69d88dSmrg "#endif\n" 1403af69d88dSmrg "" 1404af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 1405af69d88dSmrg "#define threadid SV[0]\n" 1406af69d88dSmrg "DCL threadid, THREAD_ID[0]\n" 1407af69d88dSmrg "" 1408af69d88dSmrg "#define offset TEMP[0]\n" 1409af69d88dSmrg "DCL offset, LOCAL\n" 1410af69d88dSmrg "#define tmp TEMP[1]\n" 1411af69d88dSmrg "DCL tmp, LOCAL\n" 1412af69d88dSmrg "" 1413af69d88dSmrg "#define k0 IMM[0]\n" 1414af69d88dSmrg "IMM UINT32 { 0, 0, 0, 0 }\n" 1415af69d88dSmrg "#define k1 IMM[1]\n" 1416af69d88dSmrg "IMM UINT32 { 1, 0, 0, 0 }\n" 1417af69d88dSmrg "#define k2 IMM[2]\n" 1418af69d88dSmrg "IMM UINT32 { 2, 0, 0, 0 }\n" 1419af69d88dSmrg "#define k3 IMM[3]\n" 1420af69d88dSmrg "IMM UINT32 { 3, 0, 0, 0 }\n" 1421af69d88dSmrg "#define k4 IMM[4]\n" 1422af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 1423af69d88dSmrg "#define k5 IMM[5]\n" 1424af69d88dSmrg "IMM UINT32 { 5, 0, 0, 0 }\n" 1425af69d88dSmrg "#define k6 IMM[6]\n" 1426af69d88dSmrg "IMM UINT32 { 6, 0, 0, 0 }\n" 1427af69d88dSmrg "#define k7 IMM[7]\n" 1428af69d88dSmrg "IMM UINT32 { 7, 0, 0, 0 }\n" 1429af69d88dSmrg "#define k8 IMM[8]\n" 1430af69d88dSmrg "IMM UINT32 { 8, 0, 0, 0 }\n" 1431af69d88dSmrg "#define k9 IMM[9]\n" 1432af69d88dSmrg "IMM UINT32 { 9, 0, 0, 0 }\n" 1433af69d88dSmrg "#define korig IMM[10].xxxx\n" 1434af69d88dSmrg "#define karg IMM[10].yyyy\n" 1435af69d88dSmrg "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n" 1436af69d88dSmrg "\n" 1437af69d88dSmrg " BGNSUB\n" 1438af69d88dSmrg " UMUL offset.x, threadid, k4\n" 1439af69d88dSmrg " STORE target.x, offset, korig\n" 1440af69d88dSmrg " USEQ tmp.x, threadid, k0\n" 1441af69d88dSmrg " IF tmp\n" 1442af69d88dSmrg " ATOMUADD tmp.x, target, offset, karg\n" 1443af69d88dSmrg " ATOMUADD tmp.x, target, offset, tmp\n" 1444af69d88dSmrg " ENDIF\n" 1445af69d88dSmrg " USEQ tmp.x, threadid, k1\n" 1446af69d88dSmrg " IF tmp\n" 1447af69d88dSmrg " ATOMXCHG tmp.x, target, offset, karg\n" 1448af69d88dSmrg " ATOMXCHG tmp.x, target, offset, tmp\n" 1449af69d88dSmrg " ENDIF\n" 1450af69d88dSmrg " USEQ tmp.x, threadid, k2\n" 1451af69d88dSmrg " IF tmp\n" 1452af69d88dSmrg " ATOMCAS tmp.x, target, offset, korig, karg\n" 1453af69d88dSmrg " ATOMCAS tmp.x, target, offset, tmp, k0\n" 1454af69d88dSmrg " ENDIF\n" 1455af69d88dSmrg " USEQ tmp.x, threadid, k3\n" 1456af69d88dSmrg " IF tmp\n" 1457af69d88dSmrg " ATOMAND tmp.x, target, offset, karg\n" 1458af69d88dSmrg " ATOMAND tmp.x, target, offset, tmp\n" 1459af69d88dSmrg " ENDIF\n" 1460af69d88dSmrg " USEQ tmp.x, threadid, k4\n" 1461af69d88dSmrg " IF tmp\n" 1462af69d88dSmrg " ATOMOR tmp.x, target, offset, karg\n" 1463af69d88dSmrg " ATOMOR tmp.x, target, offset, tmp\n" 1464af69d88dSmrg " ENDIF\n" 1465af69d88dSmrg " USEQ tmp.x, threadid, k5\n" 1466af69d88dSmrg " IF tmp\n" 1467af69d88dSmrg " ATOMXOR tmp.x, target, offset, karg\n" 1468af69d88dSmrg " ATOMXOR tmp.x, target, offset, tmp\n" 1469af69d88dSmrg " ENDIF\n" 1470af69d88dSmrg " USEQ tmp.x, threadid, k6\n" 1471af69d88dSmrg " IF tmp\n" 1472af69d88dSmrg " ATOMUMIN tmp.x, target, offset, karg\n" 1473af69d88dSmrg " ATOMUMIN tmp.x, target, offset, tmp\n" 1474af69d88dSmrg " ENDIF\n" 1475af69d88dSmrg " USEQ tmp.x, threadid, k7\n" 1476af69d88dSmrg " IF tmp\n" 1477af69d88dSmrg " ATOMUMAX tmp.x, target, offset, karg\n" 1478af69d88dSmrg " ATOMUMAX tmp.x, target, offset, tmp\n" 1479af69d88dSmrg " ENDIF\n" 1480af69d88dSmrg " USEQ tmp.x, threadid, k8\n" 1481af69d88dSmrg " IF tmp\n" 1482af69d88dSmrg " ATOMIMIN tmp.x, target, offset, karg\n" 1483af69d88dSmrg " ATOMIMIN tmp.x, target, offset, tmp\n" 1484af69d88dSmrg " ENDIF\n" 1485af69d88dSmrg " USEQ tmp.x, threadid, k9\n" 1486af69d88dSmrg " IF tmp\n" 1487af69d88dSmrg " ATOMIMAX tmp.x, target, offset, karg\n" 1488af69d88dSmrg " ATOMIMAX tmp.x, target, offset, tmp\n" 1489af69d88dSmrg " ENDIF\n" 1490af69d88dSmrg "#ifdef TARGET_LOCAL\n" 1491af69d88dSmrg " LOAD tmp.x, RLOCAL, offset\n" 1492af69d88dSmrg " STORE RES[0].x, offset, tmp\n" 1493af69d88dSmrg "#endif\n" 1494af69d88dSmrg " RET\n" 1495af69d88dSmrg " ENDSUB\n"; 1496af69d88dSmrg 1497af69d88dSmrg printf("- %s (%s)\n", __func__, global ? "global" : "local"); 1498af69d88dSmrg 1499af69d88dSmrg init_prog(ctx, 40, 0, 0, src, 1500af69d88dSmrg (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); 1501af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 150201e04c3fSmrg 40, 0, test_atom_ops_init); 1503af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 1504af69d88dSmrg launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL); 150501e04c3fSmrg check_tex(ctx, 0, test_atom_ops_expect, NULL); 1506af69d88dSmrg destroy_compute_resources(ctx); 1507af69d88dSmrg destroy_tex(ctx); 1508af69d88dSmrg destroy_prog(ctx); 1509af69d88dSmrg} 1510af69d88dSmrg 151101e04c3fSmrg/* test_atom_race */ 151201e04c3fSmrgstatic void test_atom_race_expect(void *p, int s, int x, int y) 151301e04c3fSmrg{ 151401e04c3fSmrg *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff; 151501e04c3fSmrg} 151601e04c3fSmrg 1517af69d88dSmrgstatic void test_atom_race(struct context *ctx, bool global) 1518af69d88dSmrg{ 1519af69d88dSmrg const char *src = "COMP\n" 1520af69d88dSmrg "#ifdef TARGET_GLOBAL\n" 1521af69d88dSmrg "#define target RES[0]\n" 1522af69d88dSmrg "#else\n" 1523af69d88dSmrg "#define target RLOCAL\n" 1524af69d88dSmrg "#endif\n" 1525af69d88dSmrg "" 1526af69d88dSmrg "DCL RES[0], BUFFER, RAW, WR\n" 1527af69d88dSmrg "" 1528af69d88dSmrg "#define blockid SV[0]\n" 1529af69d88dSmrg "DCL blockid, BLOCK_ID[0]\n" 1530af69d88dSmrg "#define blocksz SV[1]\n" 1531af69d88dSmrg "DCL blocksz, BLOCK_SIZE[0]\n" 1532af69d88dSmrg "#define threadid SV[2]\n" 1533af69d88dSmrg "DCL threadid, THREAD_ID[0]\n" 1534af69d88dSmrg "" 1535af69d88dSmrg "#define offset TEMP[0]\n" 1536af69d88dSmrg "DCL offset, LOCAL\n" 1537af69d88dSmrg "#define arg TEMP[1]\n" 1538af69d88dSmrg "DCL arg, LOCAL\n" 1539af69d88dSmrg "#define count TEMP[2]\n" 1540af69d88dSmrg "DCL count, LOCAL\n" 1541af69d88dSmrg "#define vlocal TEMP[3]\n" 1542af69d88dSmrg "DCL vlocal, LOCAL\n" 1543af69d88dSmrg "#define vshared TEMP[4]\n" 1544af69d88dSmrg "DCL vshared, LOCAL\n" 1545af69d88dSmrg "#define last TEMP[5]\n" 1546af69d88dSmrg "DCL last, LOCAL\n" 1547af69d88dSmrg "#define tmp0 TEMP[6]\n" 1548af69d88dSmrg "DCL tmp0, LOCAL\n" 1549af69d88dSmrg "#define tmp1 TEMP[7]\n" 1550af69d88dSmrg "DCL tmp1, LOCAL\n" 1551af69d88dSmrg "" 1552af69d88dSmrg "#define k0 IMM[0]\n" 1553af69d88dSmrg "IMM UINT32 { 0, 0, 0, 0 }\n" 1554af69d88dSmrg "#define k1 IMM[1]\n" 1555af69d88dSmrg "IMM UINT32 { 1, 0, 0, 0 }\n" 1556af69d88dSmrg "#define k4 IMM[2]\n" 1557af69d88dSmrg "IMM UINT32 { 4, 0, 0, 0 }\n" 1558af69d88dSmrg "#define k32 IMM[3]\n" 1559af69d88dSmrg "IMM UINT32 { 32, 0, 0, 0 }\n" 1560af69d88dSmrg "#define k128 IMM[4]\n" 1561af69d88dSmrg "IMM UINT32 { 128, 0, 0, 0 }\n" 1562af69d88dSmrg "#define kdeadcafe IMM[5]\n" 1563af69d88dSmrg "IMM UINT32 { 3735931646, 0, 0, 0 }\n" 1564af69d88dSmrg "#define kallowed_set IMM[6]\n" 1565af69d88dSmrg "IMM UINT32 { 559035650, 0, 0, 0 }\n" 1566af69d88dSmrg "#define k11111111 IMM[7]\n" 1567af69d88dSmrg "IMM UINT32 { 286331153, 0, 0, 0 }\n" 1568af69d88dSmrg "\n" 1569af69d88dSmrg " BGNSUB\n" 1570af69d88dSmrg " MOV offset.x, threadid\n" 1571af69d88dSmrg "#ifdef TARGET_GLOBAL\n" 1572af69d88dSmrg " UMUL tmp0.x, blockid, blocksz\n" 1573af69d88dSmrg " UADD offset.x, offset, tmp0\n" 1574af69d88dSmrg "#endif\n" 1575af69d88dSmrg " UMUL offset.x, offset, k4\n" 1576af69d88dSmrg " USLT tmp0.x, threadid, k32\n" 1577af69d88dSmrg " STORE target.x, offset, k0\n" 1578af69d88dSmrg " BARRIER\n" 1579af69d88dSmrg " IF tmp0\n" 1580af69d88dSmrg " MOV vlocal.x, k0\n" 1581af69d88dSmrg " MOV arg.x, kdeadcafe\n" 1582af69d88dSmrg " BGNLOOP\n" 1583af69d88dSmrg " INEG arg.x, arg\n" 1584af69d88dSmrg " ATOMUADD vshared.x, target, offset, arg\n" 1585af69d88dSmrg " SFENCE target\n" 1586af69d88dSmrg " USNE tmp0.x, vshared, vlocal\n" 1587af69d88dSmrg " IF tmp0\n" 1588af69d88dSmrg " BRK\n" 1589af69d88dSmrg " ENDIF\n" 1590af69d88dSmrg " UADD vlocal.x, vlocal, arg\n" 1591af69d88dSmrg " ENDLOOP\n" 1592af69d88dSmrg " UADD vlocal.x, vshared, arg\n" 1593af69d88dSmrg " LOAD vshared.x, target, offset\n" 1594af69d88dSmrg " USEQ tmp0.x, vshared, vlocal\n" 1595af69d88dSmrg " STORE target.x, offset, tmp0\n" 1596af69d88dSmrg " ELSE\n" 1597af69d88dSmrg " UADD offset.x, offset, -k128\n" 1598af69d88dSmrg " MOV count.x, k0\n" 1599af69d88dSmrg " MOV last.x, k0\n" 1600af69d88dSmrg " BGNLOOP\n" 1601af69d88dSmrg " LOAD vshared.x, target, offset\n" 1602af69d88dSmrg " USEQ tmp0.x, vshared, kallowed_set.xxxx\n" 1603af69d88dSmrg " USEQ tmp1.x, vshared, kallowed_set.yyyy\n" 1604af69d88dSmrg " OR tmp0.x, tmp0, tmp1\n" 1605af69d88dSmrg " IF tmp0\n" 1606af69d88dSmrg " USEQ tmp0.x, vshared, last\n" 1607af69d88dSmrg " IF tmp0\n" 1608af69d88dSmrg " CONT\n" 1609af69d88dSmrg " ENDIF\n" 1610af69d88dSmrg " MOV last.x, vshared\n" 1611af69d88dSmrg " ELSE\n" 1612af69d88dSmrg " END\n" 1613af69d88dSmrg " ENDIF\n" 1614af69d88dSmrg " UADD count.x, count, k1\n" 1615af69d88dSmrg " USEQ tmp0.x, count, k128\n" 1616af69d88dSmrg " IF tmp0\n" 1617af69d88dSmrg " BRK\n" 1618af69d88dSmrg " ENDIF\n" 1619af69d88dSmrg " ENDLOOP\n" 1620af69d88dSmrg " ATOMXCHG tmp0.x, target, offset, k11111111\n" 1621af69d88dSmrg " UADD offset.x, offset, k128\n" 1622af69d88dSmrg " ATOMXCHG tmp0.x, target, offset, k11111111\n" 1623af69d88dSmrg " SFENCE target\n" 1624af69d88dSmrg " ENDIF\n" 1625af69d88dSmrg "#ifdef TARGET_LOCAL\n" 1626af69d88dSmrg " LOAD tmp0.x, RLOCAL, offset\n" 1627af69d88dSmrg " UMUL tmp1.x, blockid, blocksz\n" 1628af69d88dSmrg " UMUL tmp1.x, tmp1, k4\n" 1629af69d88dSmrg " UADD offset.x, offset, tmp1\n" 1630af69d88dSmrg " STORE RES[0].x, offset, tmp0\n" 1631af69d88dSmrg "#endif\n" 1632af69d88dSmrg " RET\n" 1633af69d88dSmrg " ENDSUB\n"; 1634af69d88dSmrg 1635af69d88dSmrg printf("- %s (%s)\n", __func__, global ? "global" : "local"); 1636af69d88dSmrg 1637af69d88dSmrg init_prog(ctx, 256, 0, 0, src, 1638af69d88dSmrg (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); 1639af69d88dSmrg init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 164001e04c3fSmrg 4096, 0, test_default_init); 1641af69d88dSmrg init_compute_resources(ctx, (int []) { 0, -1 }); 1642af69d88dSmrg launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); 164301e04c3fSmrg check_tex(ctx, 0, test_atom_race_expect, NULL); 1644af69d88dSmrg destroy_compute_resources(ctx); 1645af69d88dSmrg destroy_tex(ctx); 1646af69d88dSmrg destroy_prog(ctx); 1647af69d88dSmrg} 1648af69d88dSmrg 1649af69d88dSmrgint main(int argc, char *argv[]) 1650af69d88dSmrg{ 1651af69d88dSmrg struct context *ctx = CALLOC_STRUCT(context); 1652af69d88dSmrg 1653af69d88dSmrg unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0; 1654af69d88dSmrg 1655af69d88dSmrg init_ctx(ctx); 1656af69d88dSmrg 1657af69d88dSmrg if (tests & (1 << 0)) 1658af69d88dSmrg test_system_values(ctx); 1659af69d88dSmrg if (tests & (1 << 1)) 1660af69d88dSmrg test_resource_access(ctx); 1661af69d88dSmrg if (tests & (1 << 2)) 1662af69d88dSmrg test_function_calls(ctx); 1663af69d88dSmrg if (tests & (1 << 3)) 1664af69d88dSmrg test_input_global(ctx); 1665af69d88dSmrg if (tests & (1 << 4)) 1666af69d88dSmrg test_private(ctx); 1667af69d88dSmrg if (tests & (1 << 5)) 1668af69d88dSmrg test_local(ctx); 1669af69d88dSmrg if (tests & (1 << 6)) 1670af69d88dSmrg test_sample(ctx); 1671af69d88dSmrg if (tests & (1 << 7)) 1672af69d88dSmrg test_many_kern(ctx); 1673af69d88dSmrg if (tests & (1 << 8)) 1674af69d88dSmrg test_constant(ctx); 1675af69d88dSmrg if (tests & (1 << 9)) 1676af69d88dSmrg test_resource_indirect(ctx); 1677af69d88dSmrg if (tests & (1 << 10)) 1678af69d88dSmrg test_surface_ld(ctx); 1679af69d88dSmrg if (tests & (1 << 11)) 1680af69d88dSmrg test_surface_st(ctx); 1681af69d88dSmrg if (tests & (1 << 12)) 1682af69d88dSmrg test_barrier(ctx); 1683af69d88dSmrg if (tests & (1 << 13)) 1684af69d88dSmrg test_atom_ops(ctx, true); 1685af69d88dSmrg if (tests & (1 << 14)) 1686af69d88dSmrg test_atom_race(ctx, true); 1687af69d88dSmrg if (tests & (1 << 15)) 1688af69d88dSmrg test_atom_ops(ctx, false); 1689af69d88dSmrg if (tests & (1 << 16)) 1690af69d88dSmrg test_atom_race(ctx, false); 1691af69d88dSmrg 1692af69d88dSmrg destroy_ctx(ctx); 1693af69d88dSmrg 1694af69d88dSmrg return 0; 1695af69d88dSmrg} 1696