1af69d88dSmrg/*
2af69d88dSmrg * Copyright (C) 2011 Francisco Jerez.
3af69d88dSmrg * All Rights Reserved.
4af69d88dSmrg *
5af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining
6af69d88dSmrg * a copy of this software and associated documentation files (the
7af69d88dSmrg * "Software"), to deal in the Software without restriction, including
8af69d88dSmrg * without limitation the rights to use, copy, modify, merge, publish,
9af69d88dSmrg * distribute, sublicense, and/or sell copies of the Software, and to
10af69d88dSmrg * permit persons to whom the Software is furnished to do so, subject to
11af69d88dSmrg * the following conditions:
12af69d88dSmrg *
13af69d88dSmrg * The above copyright notice and this permission notice (including the
14af69d88dSmrg * next paragraph) shall be included in all copies or substantial
15af69d88dSmrg * portions of the Software.
16af69d88dSmrg *
17af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18af69d88dSmrg * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19af69d88dSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20af69d88dSmrg * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21af69d88dSmrg * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22af69d88dSmrg * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23af69d88dSmrg * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24af69d88dSmrg *
25af69d88dSmrg */
26af69d88dSmrg
27af69d88dSmrg#include <fcntl.h>
28af69d88dSmrg#include <stdio.h>
29af69d88dSmrg#include <sys/stat.h>
30af69d88dSmrg#include <inttypes.h>
31af69d88dSmrg#include "pipe/p_state.h"
32af69d88dSmrg#include "pipe/p_context.h"
33af69d88dSmrg#include "pipe/p_screen.h"
34af69d88dSmrg#include "pipe/p_defines.h"
35af69d88dSmrg#include "pipe/p_shader_tokens.h"
36af69d88dSmrg#include "util/u_memory.h"
37af69d88dSmrg#include "util/u_inlines.h"
38af69d88dSmrg#include "util/u_sampler.h"
397ec681f3Smrg#include "util/format/u_format.h"
40af69d88dSmrg#include "tgsi/tgsi_text.h"
41af69d88dSmrg#include "pipe-loader/pipe_loader.h"
42af69d88dSmrg
43af69d88dSmrg#define MAX_RESOURCES 4
44af69d88dSmrg
45af69d88dSmrgstruct context {
46af69d88dSmrg        struct pipe_loader_device *dev;
47af69d88dSmrg        struct pipe_screen *screen;
48af69d88dSmrg        struct pipe_context *pipe;
49af69d88dSmrg        void *hwcs;
50af69d88dSmrg        void *hwsmp[MAX_RESOURCES];
51af69d88dSmrg        struct pipe_resource *tex[MAX_RESOURCES];
52af69d88dSmrg        bool tex_rw[MAX_RESOURCES];
53af69d88dSmrg        struct pipe_sampler_view *view[MAX_RESOURCES];
54af69d88dSmrg        struct pipe_surface *surf[MAX_RESOURCES];
55af69d88dSmrg};
56af69d88dSmrg
57af69d88dSmrg#define DUMP_COMPUTE_PARAM(p, c) do {                                   \
58af69d88dSmrg                uint64_t __v[4];                                        \
59af69d88dSmrg                int __i, __n;                                           \
60af69d88dSmrg                                                                        \
6101e04c3fSmrg                __n = ctx->screen->get_compute_param(ctx->screen,       \
6201e04c3fSmrg                                                     PIPE_SHADER_IR_TGSI, \
6301e04c3fSmrg                                                     c, __v);           \
64af69d88dSmrg                printf("%s: {", #c);                                    \
65af69d88dSmrg                                                                        \
66af69d88dSmrg                for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
67af69d88dSmrg                        printf(" %"PRIu64, __v[__i]);                   \
68af69d88dSmrg                                                                        \
69af69d88dSmrg                printf(" }\n");                                         \
70af69d88dSmrg        } while (0)
71af69d88dSmrg
72af69d88dSmrgstatic void init_ctx(struct context *ctx)
73af69d88dSmrg{
747ec681f3Smrg        ASSERTED int ret;
75af69d88dSmrg
76af69d88dSmrg        ret = pipe_loader_probe(&ctx->dev, 1);
77af69d88dSmrg        assert(ret);
78af69d88dSmrg
7901e04c3fSmrg        ctx->screen = pipe_loader_create_screen(ctx->dev);
80af69d88dSmrg        assert(ctx->screen);
81af69d88dSmrg
8201e04c3fSmrg        ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);
83af69d88dSmrg        assert(ctx->pipe);
84af69d88dSmrg
85af69d88dSmrg        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
86af69d88dSmrg        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
87af69d88dSmrg        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
88af69d88dSmrg}
89af69d88dSmrg
90af69d88dSmrgstatic void destroy_ctx(struct context *ctx)
91af69d88dSmrg{
92af69d88dSmrg        ctx->pipe->destroy(ctx->pipe);
93af69d88dSmrg        ctx->screen->destroy(ctx->screen);
94af69d88dSmrg        pipe_loader_release(&ctx->dev, 1);
95af69d88dSmrg        FREE(ctx);
96af69d88dSmrg}
97af69d88dSmrg
98af69d88dSmrgstatic char *
99af69d88dSmrgpreprocess_prog(struct context *ctx, const char *src, const char *defs)
100af69d88dSmrg{
101af69d88dSmrg        const char header[] =
102af69d88dSmrg                "#define RGLOBAL        RES[32767]\n"
103af69d88dSmrg                "#define RLOCAL         RES[32766]\n"
104af69d88dSmrg                "#define RPRIVATE       RES[32765]\n"
105af69d88dSmrg                "#define RINPUT         RES[32764]\n";
106af69d88dSmrg        char cmd[512];
107af69d88dSmrg        char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
108af69d88dSmrg        char *buf;
109af69d88dSmrg        int fd, ret;
110af69d88dSmrg        struct stat st;
111af69d88dSmrg        FILE *p;
112af69d88dSmrg
113af69d88dSmrg        /* Open a temporary file */
114af69d88dSmrg        fd = mkstemp(tmp);
115af69d88dSmrg        assert(fd >= 0);
116af69d88dSmrg        snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
117af69d88dSmrg                 defs ? defs : "", tmp);
118af69d88dSmrg
119af69d88dSmrg        /* Preprocess */
120af69d88dSmrg        p = popen(cmd, "w");
121af69d88dSmrg        fwrite(header, strlen(header), 1, p);
122af69d88dSmrg        fwrite(src, strlen(src), 1, p);
123af69d88dSmrg        ret = pclose(p);
124af69d88dSmrg        assert(!ret);
125af69d88dSmrg
126af69d88dSmrg        /* Read back */
127af69d88dSmrg        ret = fstat(fd, &st);
128af69d88dSmrg        assert(!ret);
129af69d88dSmrg
130af69d88dSmrg        buf = malloc(st.st_size + 1);
131af69d88dSmrg        ret = read(fd, buf, st.st_size);
132af69d88dSmrg        assert(ret == st.st_size);
133af69d88dSmrg        buf[ret] = 0;
134af69d88dSmrg
135af69d88dSmrg        /* Clean up */
136af69d88dSmrg        close(fd);
137af69d88dSmrg        unlink(tmp);
138af69d88dSmrg
139af69d88dSmrg        return buf;
140af69d88dSmrg}
141af69d88dSmrg
142af69d88dSmrgstatic void init_prog(struct context *ctx, unsigned local_sz,
143af69d88dSmrg                      unsigned private_sz, unsigned input_sz,
144af69d88dSmrg                      const char *src, const char *defs)
145af69d88dSmrg{
146af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
147af69d88dSmrg        struct tgsi_token prog[1024];
148af69d88dSmrg        struct pipe_compute_state cs = {
14901e04c3fSmrg                .ir_type = PIPE_SHADER_IR_TGSI,
150af69d88dSmrg                .prog = prog,
151af69d88dSmrg                .req_local_mem = local_sz,
152af69d88dSmrg                .req_private_mem = private_sz,
153af69d88dSmrg                .req_input_mem = input_sz
154af69d88dSmrg        };
155af69d88dSmrg        char *psrc = preprocess_prog(ctx, src, defs);
1567ec681f3Smrg        ASSERTED int ret;
157af69d88dSmrg
15801e04c3fSmrg        ret = tgsi_text_translate(psrc, prog, ARRAY_SIZE(prog));
159af69d88dSmrg        assert(ret);
160af69d88dSmrg        free(psrc);
161af69d88dSmrg
162af69d88dSmrg        ctx->hwcs = pipe->create_compute_state(pipe, &cs);
163af69d88dSmrg        assert(ctx->hwcs);
164af69d88dSmrg
165af69d88dSmrg        pipe->bind_compute_state(pipe, ctx->hwcs);
166af69d88dSmrg}
167af69d88dSmrg
168af69d88dSmrgstatic void destroy_prog(struct context *ctx)
169af69d88dSmrg{
170af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
171af69d88dSmrg
172af69d88dSmrg        pipe->delete_compute_state(pipe, ctx->hwcs);
173af69d88dSmrg        ctx->hwcs = NULL;
174af69d88dSmrg}
175af69d88dSmrg
176af69d88dSmrgstatic void init_tex(struct context *ctx, int slot,
177af69d88dSmrg                     enum pipe_texture_target target, bool rw,
178af69d88dSmrg                     enum pipe_format format, int w, int h,
179af69d88dSmrg                     void (*init)(void *, int, int, int))
180af69d88dSmrg{
181af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
182af69d88dSmrg        struct pipe_resource **tex = &ctx->tex[slot];
183af69d88dSmrg        struct pipe_resource ttex = {
184af69d88dSmrg                .target = target,
185af69d88dSmrg                .format = format,
186af69d88dSmrg                .width0 = w,
187af69d88dSmrg                .height0 = h,
188af69d88dSmrg                .depth0 = 1,
189af69d88dSmrg                .array_size = 1,
190af69d88dSmrg                .bind = (PIPE_BIND_SAMPLER_VIEW |
191af69d88dSmrg                         PIPE_BIND_COMPUTE_RESOURCE |
192af69d88dSmrg                         PIPE_BIND_GLOBAL)
193af69d88dSmrg        };
194af69d88dSmrg        int dx = util_format_get_blocksize(format);
195af69d88dSmrg        int dy = util_format_get_stride(format, w);
196af69d88dSmrg        int nx = (target == PIPE_BUFFER ? (w / dx) :
197af69d88dSmrg                  util_format_get_nblocksx(format, w));
198af69d88dSmrg        int ny = (target == PIPE_BUFFER ? 1 :
199af69d88dSmrg                  util_format_get_nblocksy(format, h));
200af69d88dSmrg        struct pipe_transfer *xfer;
201af69d88dSmrg        char *map;
202af69d88dSmrg        int x, y;
203af69d88dSmrg
204af69d88dSmrg        *tex = ctx->screen->resource_create(ctx->screen, &ttex);
205af69d88dSmrg        assert(*tex);
206af69d88dSmrg
2077ec681f3Smrg        map = pipe->texture_map(pipe, *tex, 0, PIPE_MAP_WRITE,
208af69d88dSmrg                                  &(struct pipe_box) { .width = w,
209af69d88dSmrg                                                  .height = h,
210af69d88dSmrg                                                  .depth = 1 }, &xfer);
211af69d88dSmrg        assert(xfer);
212af69d88dSmrg        assert(map);
213af69d88dSmrg
214af69d88dSmrg        for (y = 0; y < ny; ++y) {
215af69d88dSmrg                for (x = 0; x < nx; ++x) {
216af69d88dSmrg                        init(map + y * dy + x * dx, slot, x, y);
217af69d88dSmrg                }
218af69d88dSmrg        }
219af69d88dSmrg
2207ec681f3Smrg        pipe->texture_unmap(pipe, xfer);
221af69d88dSmrg
222af69d88dSmrg        ctx->tex_rw[slot] = rw;
223af69d88dSmrg}
224af69d88dSmrg
225af69d88dSmrgstatic bool default_check(void *x, void *y, int sz) {
226af69d88dSmrg        return !memcmp(x, y, sz);
227af69d88dSmrg}
228af69d88dSmrg
229af69d88dSmrgstatic void check_tex(struct context *ctx, int slot,
230af69d88dSmrg                      void (*expect)(void *, int, int, int),
231af69d88dSmrg                      bool (*check)(void *, void *, int))
232af69d88dSmrg{
233af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
234af69d88dSmrg        struct pipe_resource *tex = ctx->tex[slot];
235af69d88dSmrg        int dx = util_format_get_blocksize(tex->format);
236af69d88dSmrg        int dy = util_format_get_stride(tex->format, tex->width0);
237af69d88dSmrg        int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
238af69d88dSmrg                  util_format_get_nblocksx(tex->format, tex->width0));
239af69d88dSmrg        int ny = (tex->target == PIPE_BUFFER ? 1 :
240af69d88dSmrg                  util_format_get_nblocksy(tex->format, tex->height0));
241af69d88dSmrg        struct pipe_transfer *xfer;
242af69d88dSmrg        char *map;
2439f464c52Smaya        int x = 0, y, i;
244af69d88dSmrg        int err = 0;
245af69d88dSmrg
246af69d88dSmrg        if (!check)
247af69d88dSmrg                check = default_check;
248af69d88dSmrg
2497ec681f3Smrg        map = pipe->texture_map(pipe, tex, 0, PIPE_MAP_READ,
250af69d88dSmrg                                  &(struct pipe_box) { .width = tex->width0,
251af69d88dSmrg                                        .height = tex->height0,
252af69d88dSmrg                                        .depth = 1 }, &xfer);
253af69d88dSmrg        assert(xfer);
254af69d88dSmrg        assert(map);
255af69d88dSmrg
256af69d88dSmrg        for (y = 0; y < ny; ++y) {
257af69d88dSmrg                for (x = 0; x < nx; ++x) {
258af69d88dSmrg                        uint32_t exp[4];
259af69d88dSmrg                        uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
260af69d88dSmrg
261af69d88dSmrg                        expect(exp, slot, x, y);
262af69d88dSmrg                        if (check(res, exp, dx) || (++err) > 20)
263af69d88dSmrg                                continue;
264af69d88dSmrg
265af69d88dSmrg                        if (dx < 4) {
266af69d88dSmrg                                uint32_t u = 0, v = 0;
267af69d88dSmrg
268af69d88dSmrg                                for (i = 0; i < dx; i++) {
269af69d88dSmrg                                        u |= ((uint8_t *)exp)[i] << (8 * i);
270af69d88dSmrg                                        v |= ((uint8_t *)res)[i] << (8 * i);
271af69d88dSmrg                                }
272af69d88dSmrg                                printf("(%d, %d): got 0x%x, expected 0x%x\n",
273af69d88dSmrg                                       x, y, v, u);
274af69d88dSmrg                        } else {
275af69d88dSmrg                                for (i = 0; i < dx / 4; i++) {
276af69d88dSmrg                                        printf("(%d, %d)[%d]: got 0x%x/%f,"
277af69d88dSmrg                                               " expected 0x%x/%f\n", x, y, i,
278af69d88dSmrg                                               res[i], ((float *)res)[i],
279af69d88dSmrg                                               exp[i], ((float *)exp)[i]);
280af69d88dSmrg                                }
281af69d88dSmrg                        }
282af69d88dSmrg                }
283af69d88dSmrg        }
284af69d88dSmrg
2857ec681f3Smrg        pipe->texture_unmap(pipe, xfer);
286af69d88dSmrg
287af69d88dSmrg        if (err)
288af69d88dSmrg                printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
289af69d88dSmrg        else
290af69d88dSmrg                printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
291af69d88dSmrg}
292af69d88dSmrg
293af69d88dSmrgstatic void destroy_tex(struct context *ctx)
294af69d88dSmrg{
295af69d88dSmrg        int i;
296af69d88dSmrg
297af69d88dSmrg        for (i = 0; i < MAX_RESOURCES; ++i) {
298af69d88dSmrg                if (ctx->tex[i])
299af69d88dSmrg                        pipe_resource_reference(&ctx->tex[i], NULL);
300af69d88dSmrg        }
301af69d88dSmrg}
302af69d88dSmrg
303af69d88dSmrgstatic void init_sampler_views(struct context *ctx, const int *slots)
304af69d88dSmrg{
305af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
306af69d88dSmrg        struct pipe_sampler_view tview;
307af69d88dSmrg        int i;
308af69d88dSmrg
309af69d88dSmrg        for (i = 0; *slots >= 0; ++i, ++slots) {
310af69d88dSmrg                u_sampler_view_default_template(&tview, ctx->tex[*slots],
311af69d88dSmrg                                                ctx->tex[*slots]->format);
312af69d88dSmrg
313af69d88dSmrg                ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
314af69d88dSmrg                                                         &tview);
315af69d88dSmrg                assert(ctx->view[i]);
316af69d88dSmrg        }
317af69d88dSmrg
3187ec681f3Smrg        pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, 0, false, ctx->view);
319af69d88dSmrg}
320af69d88dSmrg
321af69d88dSmrgstatic void destroy_sampler_views(struct context *ctx)
322af69d88dSmrg{
323af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
324af69d88dSmrg        int i;
325af69d88dSmrg
3267ec681f3Smrg        pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, MAX_RESOURCES, false, NULL);
327af69d88dSmrg
328af69d88dSmrg        for (i = 0; i < MAX_RESOURCES; ++i) {
329af69d88dSmrg                if (ctx->view[i]) {
330af69d88dSmrg                        pipe->sampler_view_destroy(pipe, ctx->view[i]);
331af69d88dSmrg                        ctx->view[i] = NULL;
332af69d88dSmrg                }
333af69d88dSmrg        }
334af69d88dSmrg}
335af69d88dSmrg
336af69d88dSmrgstatic void init_compute_resources(struct context *ctx, const int *slots)
337af69d88dSmrg{
338af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
339af69d88dSmrg        int i;
340af69d88dSmrg
341af69d88dSmrg        for (i = 0; *slots >= 0; ++i, ++slots) {
342af69d88dSmrg                struct pipe_surface tsurf = {
343af69d88dSmrg                        .format = ctx->tex[*slots]->format,
344af69d88dSmrg                        .writable = ctx->tex_rw[*slots]
345af69d88dSmrg                };
346af69d88dSmrg
347af69d88dSmrg                if (ctx->tex[*slots]->target == PIPE_BUFFER)
348af69d88dSmrg                        tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
349af69d88dSmrg
350af69d88dSmrg                ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
351af69d88dSmrg                                                    &tsurf);
352af69d88dSmrg                assert(ctx->surf[i]);
353af69d88dSmrg        }
354af69d88dSmrg
355af69d88dSmrg        pipe->set_compute_resources(pipe, 0, i, ctx->surf);
356af69d88dSmrg}
357af69d88dSmrg
358af69d88dSmrgstatic void destroy_compute_resources(struct context *ctx)
359af69d88dSmrg{
360af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
361af69d88dSmrg        int i;
362af69d88dSmrg
363af69d88dSmrg        pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
364af69d88dSmrg
365af69d88dSmrg        for (i = 0; i < MAX_RESOURCES; ++i) {
366af69d88dSmrg                if (ctx->surf[i]) {
367af69d88dSmrg                        pipe->surface_destroy(pipe, ctx->surf[i]);
368af69d88dSmrg                        ctx->surf[i] = NULL;
369af69d88dSmrg                }
370af69d88dSmrg        }
371af69d88dSmrg}
372af69d88dSmrg
373af69d88dSmrgstatic void init_sampler_states(struct context *ctx, int n)
374af69d88dSmrg{
375af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
376af69d88dSmrg        struct pipe_sampler_state smp = {
377af69d88dSmrg                .normalized_coords = 1,
378af69d88dSmrg        };
379af69d88dSmrg        int i;
380af69d88dSmrg
381af69d88dSmrg        for (i = 0; i < n; ++i) {
382af69d88dSmrg                ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
383af69d88dSmrg                assert(ctx->hwsmp[i]);
384af69d88dSmrg        }
385af69d88dSmrg
386af69d88dSmrg        pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp);
387af69d88dSmrg}
388af69d88dSmrg
389af69d88dSmrgstatic void destroy_sampler_states(struct context *ctx)
390af69d88dSmrg{
391af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
392af69d88dSmrg        int i;
393af69d88dSmrg
394af69d88dSmrg        pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
395af69d88dSmrg				  0, MAX_RESOURCES, NULL);
396af69d88dSmrg
397af69d88dSmrg        for (i = 0; i < MAX_RESOURCES; ++i) {
398af69d88dSmrg                if (ctx->hwsmp[i]) {
399af69d88dSmrg                        pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
400af69d88dSmrg                        ctx->hwsmp[i] = NULL;
401af69d88dSmrg                }
402af69d88dSmrg        }
403af69d88dSmrg}
404af69d88dSmrg
405af69d88dSmrgstatic void init_globals(struct context *ctx, const int *slots,
406af69d88dSmrg                         uint32_t **handles)
407af69d88dSmrg{
408af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
409af69d88dSmrg        struct pipe_resource *res[MAX_RESOURCES];
410af69d88dSmrg        int i;
411af69d88dSmrg
412af69d88dSmrg        for (i = 0; *slots >= 0; ++i, ++slots)
413af69d88dSmrg                res[i] = ctx->tex[*slots];
414af69d88dSmrg
415af69d88dSmrg        pipe->set_global_binding(pipe, 0, i, res, handles);
416af69d88dSmrg}
417af69d88dSmrg
418af69d88dSmrgstatic void destroy_globals(struct context *ctx)
419af69d88dSmrg{
420af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
421af69d88dSmrg
422af69d88dSmrg        pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
423af69d88dSmrg}
424af69d88dSmrg
425af69d88dSmrgstatic void launch_grid(struct context *ctx, const uint *block_layout,
426af69d88dSmrg                        const uint *grid_layout, uint32_t pc,
42701e04c3fSmrg                        void *input)
428af69d88dSmrg{
429af69d88dSmrg        struct pipe_context *pipe = ctx->pipe;
43001e04c3fSmrg        struct pipe_grid_info info;
43101e04c3fSmrg        int i;
43201e04c3fSmrg
43301e04c3fSmrg        for (i = 0; i < 3; i++) {
43401e04c3fSmrg                info.block[i] = block_layout[i];
43501e04c3fSmrg                info.grid[i] = grid_layout[i];
43601e04c3fSmrg        }
43701e04c3fSmrg        info.pc = pc;
43801e04c3fSmrg        info.input = input;
439af69d88dSmrg
44001e04c3fSmrg        pipe->launch_grid(pipe, &info);
44101e04c3fSmrg}
44201e04c3fSmrg
44301e04c3fSmrgstatic void test_default_init(void *p, int s, int x, int y)
44401e04c3fSmrg{
44501e04c3fSmrg        *(uint32_t *)p = 0xdeadbeef;
44601e04c3fSmrg}
44701e04c3fSmrg
44801e04c3fSmrg/* test_system_values */
44901e04c3fSmrgstatic void test_system_values_expect(void *p, int s, int x, int y)
45001e04c3fSmrg{
45101e04c3fSmrg        int id = x / 16, sv = (x % 16) / 4, c = x % 4;
45201e04c3fSmrg        int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
45301e04c3fSmrg        int bsz[] = { 4, 3, 5, 1};
45401e04c3fSmrg        int gsz[] = { 5, 4, 1, 1};
45501e04c3fSmrg
45601e04c3fSmrg        switch (sv) {
45701e04c3fSmrg        case 0:
45801e04c3fSmrg                *(uint32_t *)p = tid[c] / bsz[c];
45901e04c3fSmrg                break;
46001e04c3fSmrg        case 1:
46101e04c3fSmrg                *(uint32_t *)p = bsz[c];
46201e04c3fSmrg                break;
46301e04c3fSmrg        case 2:
46401e04c3fSmrg                *(uint32_t *)p = gsz[c];
46501e04c3fSmrg                break;
46601e04c3fSmrg        case 3:
46701e04c3fSmrg                *(uint32_t *)p = tid[c] % bsz[c];
46801e04c3fSmrg                break;
46901e04c3fSmrg        }
470af69d88dSmrg}
471af69d88dSmrg
472af69d88dSmrgstatic void test_system_values(struct context *ctx)
473af69d88dSmrg{
474af69d88dSmrg        const char *src = "COMP\n"
475af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
476af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
477af69d88dSmrg                "DCL SV[1], BLOCK_SIZE[0]\n"
478af69d88dSmrg                "DCL SV[2], GRID_SIZE[0]\n"
479af69d88dSmrg                "DCL SV[3], THREAD_ID[0]\n"
480af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
481af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
482af69d88dSmrg                "IMM UINT32 { 64, 0, 0, 0 }\n"
483af69d88dSmrg                "IMM UINT32 { 16, 0, 0, 0 }\n"
484af69d88dSmrg                "IMM UINT32 { 0, 0, 0, 0 }\n"
485af69d88dSmrg                "\n"
486af69d88dSmrg                "BGNSUB"
487af69d88dSmrg                "  UMUL TEMP[0], SV[0], SV[1]\n"
488af69d88dSmrg                "  UADD TEMP[0], TEMP[0], SV[3]\n"
489af69d88dSmrg                "  UMUL TEMP[1], SV[1], SV[2]\n"
490af69d88dSmrg                "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
491af69d88dSmrg                "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
492af69d88dSmrg                "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
493af69d88dSmrg                "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
494af69d88dSmrg                "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
495af69d88dSmrg                "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
496af69d88dSmrg                "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
497af69d88dSmrg                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
498af69d88dSmrg                "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
499af69d88dSmrg                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
500af69d88dSmrg                "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
501af69d88dSmrg                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
502af69d88dSmrg                "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
503af69d88dSmrg                "  RET\n"
504af69d88dSmrg                "ENDSUB\n";
505af69d88dSmrg
506af69d88dSmrg        printf("- %s\n", __func__);
507af69d88dSmrg
508af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
509af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
51001e04c3fSmrg                 76800, 0, test_default_init);
511af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
512af69d88dSmrg        launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
51301e04c3fSmrg        check_tex(ctx, 0, test_system_values_expect, NULL);
514af69d88dSmrg        destroy_compute_resources(ctx);
515af69d88dSmrg        destroy_tex(ctx);
516af69d88dSmrg        destroy_prog(ctx);
517af69d88dSmrg}
518af69d88dSmrg
51901e04c3fSmrg/* test_resource_access */
52001e04c3fSmrgstatic void test_resource_access_init0(void *p, int s, int x, int y)
52101e04c3fSmrg{
52201e04c3fSmrg        *(float *)p = 8.0 - (float)x;
52301e04c3fSmrg}
52401e04c3fSmrg
52501e04c3fSmrgstatic void test_resource_access_expect(void *p, int s, int x, int y)
52601e04c3fSmrg{
52701e04c3fSmrg        *(float *)p = 8.0 - (float)((x + 4 * y) & 0x3f);
52801e04c3fSmrg}
52901e04c3fSmrg
530af69d88dSmrgstatic void test_resource_access(struct context *ctx)
531af69d88dSmrg{
532af69d88dSmrg        const char *src = "COMP\n"
533af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
534af69d88dSmrg                "DCL RES[1], 2D, RAW, WR\n"
535af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
536af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
537af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
538af69d88dSmrg                "IMM UINT32 { 15, 0, 0, 0 }\n"
539af69d88dSmrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
540af69d88dSmrg                "\n"
541af69d88dSmrg                "    BGNSUB\n"
542af69d88dSmrg                "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
543af69d88dSmrg                "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
544af69d88dSmrg                "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
545af69d88dSmrg                "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
546af69d88dSmrg                "       UMUL TEMP[1], SV[0], IMM[1]\n"
547af69d88dSmrg                "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
548af69d88dSmrg                "       RET\n"
549af69d88dSmrg                "    ENDSUB\n";
550af69d88dSmrg
551af69d88dSmrg        printf("- %s\n", __func__);
552af69d88dSmrg
553af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
554af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
55501e04c3fSmrg                 256, 0, test_resource_access_init0);
556af69d88dSmrg        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
55701e04c3fSmrg                 60, 12, test_default_init);
558af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, 1, -1 });
559af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
56001e04c3fSmrg        check_tex(ctx, 1, test_resource_access_expect, NULL);
561af69d88dSmrg        destroy_compute_resources(ctx);
562af69d88dSmrg        destroy_tex(ctx);
563af69d88dSmrg        destroy_prog(ctx);
564af69d88dSmrg}
565af69d88dSmrg
56601e04c3fSmrg/* test_function_calls */
56701e04c3fSmrgstatic void test_function_calls_init(void *p, int s, int x, int y)
56801e04c3fSmrg{
56901e04c3fSmrg        *(uint32_t *)p = 15 * y + x;
57001e04c3fSmrg}
57101e04c3fSmrg
57201e04c3fSmrgstatic void test_function_calls_expect(void *p, int s, int x, int y)
57301e04c3fSmrg{
57401e04c3fSmrg        *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
57501e04c3fSmrg}
57601e04c3fSmrg
577af69d88dSmrgstatic void test_function_calls(struct context *ctx)
578af69d88dSmrg{
579af69d88dSmrg        const char *src = "COMP\n"
580af69d88dSmrg                "DCL RES[0], 2D, RAW, WR\n"
581af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
582af69d88dSmrg                "DCL SV[1], BLOCK_SIZE[0]\n"
583af69d88dSmrg                "DCL SV[2], GRID_SIZE[0]\n"
584af69d88dSmrg                "DCL SV[3], THREAD_ID[0]\n"
585af69d88dSmrg                "DCL TEMP[0]\n"
586af69d88dSmrg                "DCL TEMP[1]\n"
587af69d88dSmrg                "DCL TEMP[2], LOCAL\n"
588af69d88dSmrg                "IMM UINT32 { 0, 11, 22, 33 }\n"
589af69d88dSmrg                "IMM FLT32 { 11, 33, 55, 99 }\n"
590af69d88dSmrg                "IMM UINT32 { 4, 1, 0, 0 }\n"
591af69d88dSmrg                "IMM UINT32 { 12, 0, 0, 0 }\n"
592af69d88dSmrg                "\n"
593af69d88dSmrg                "00: BGNSUB\n"
594af69d88dSmrg                "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
595af69d88dSmrg                "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
596af69d88dSmrg                "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
597af69d88dSmrg                "04:  RET\n"
598af69d88dSmrg                "05: ENDSUB\n"
599af69d88dSmrg                "06: BGNSUB\n"
600af69d88dSmrg                "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
601af69d88dSmrg                "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
602af69d88dSmrg                "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
603af69d88dSmrg                "10:  IF TEMP[0].xxxx\n"
604af69d88dSmrg                "11:   CAL :0\n"
605af69d88dSmrg                "12:  ENDIF\n"
606af69d88dSmrg                "13:  RET\n"
607af69d88dSmrg                "14: ENDSUB\n"
608af69d88dSmrg                "15: BGNSUB\n"
609af69d88dSmrg                "16:  UMUL TEMP[2], SV[0], SV[1]\n"
610af69d88dSmrg                "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
611af69d88dSmrg                "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
612af69d88dSmrg                "00:  MOV TEMP[1].x, IMM[2].wwww\n"
613af69d88dSmrg                "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
614af69d88dSmrg                "20:  CAL :6\n"
615af69d88dSmrg                "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
616af69d88dSmrg                "22:  RET\n"
617af69d88dSmrg                "23: ENDSUB\n";
618af69d88dSmrg
619af69d88dSmrg        printf("- %s\n", __func__);
620af69d88dSmrg
621af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
622af69d88dSmrg        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
62301e04c3fSmrg                 15, 12, test_function_calls_init);
624af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
625af69d88dSmrg        launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
62601e04c3fSmrg        check_tex(ctx, 0, test_function_calls_expect, NULL);
627af69d88dSmrg        destroy_compute_resources(ctx);
628af69d88dSmrg        destroy_tex(ctx);
629af69d88dSmrg        destroy_prog(ctx);
630af69d88dSmrg}
631af69d88dSmrg
63201e04c3fSmrg/* test_input_global */
63301e04c3fSmrgstatic void test_input_global_expect(void *p, int s, int x, int y)
63401e04c3fSmrg{
63501e04c3fSmrg        *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
63601e04c3fSmrg}
63701e04c3fSmrg
638af69d88dSmrgstatic void test_input_global(struct context *ctx)
639af69d88dSmrg{
640af69d88dSmrg        const char *src = "COMP\n"
641af69d88dSmrg                "DCL SV[0], THREAD_ID[0]\n"
642af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
643af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
644af69d88dSmrg                "IMM UINT32 { 8, 0, 0, 0 }\n"
645af69d88dSmrg                "\n"
646af69d88dSmrg                "    BGNSUB\n"
647af69d88dSmrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
648af69d88dSmrg                "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
649af69d88dSmrg                "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
650af69d88dSmrg                "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
651af69d88dSmrg                "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
652af69d88dSmrg                "       RET\n"
653af69d88dSmrg                "    ENDSUB\n";
654af69d88dSmrg        uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
655af69d88dSmrg                              0x10005, 0x10006, 0x10007, 0x10008 };
656af69d88dSmrg
657af69d88dSmrg        printf("- %s\n", __func__);
658af69d88dSmrg
659af69d88dSmrg        init_prog(ctx, 0, 0, 32, src, NULL);
66001e04c3fSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
66101e04c3fSmrg                 test_default_init);
66201e04c3fSmrg        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
66301e04c3fSmrg                 test_default_init);
66401e04c3fSmrg        init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
66501e04c3fSmrg                 test_default_init);
66601e04c3fSmrg        init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,
66701e04c3fSmrg                 test_default_init);
668af69d88dSmrg        init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
669af69d88dSmrg                     (uint32_t *[]){ &input[1], &input[3],
670af69d88dSmrg                                     &input[5], &input[7] });
671af69d88dSmrg        launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
67201e04c3fSmrg        check_tex(ctx, 0, test_input_global_expect, NULL);
67301e04c3fSmrg        check_tex(ctx, 1, test_input_global_expect, NULL);
67401e04c3fSmrg        check_tex(ctx, 2, test_input_global_expect, NULL);
67501e04c3fSmrg        check_tex(ctx, 3, test_input_global_expect, NULL);
676af69d88dSmrg        destroy_globals(ctx);
677af69d88dSmrg        destroy_tex(ctx);
678af69d88dSmrg        destroy_prog(ctx);
679af69d88dSmrg}
680af69d88dSmrg
68101e04c3fSmrg/* test_private */
68201e04c3fSmrgstatic void test_private_expect(void *p, int s, int x, int y)
68301e04c3fSmrg{
68401e04c3fSmrg        *(uint32_t *)p = (x / 32) + x % 32;
68501e04c3fSmrg}
68601e04c3fSmrg
687af69d88dSmrgstatic void test_private(struct context *ctx)
688af69d88dSmrg{
689af69d88dSmrg        const char *src = "COMP\n"
690af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
691af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
692af69d88dSmrg                "DCL SV[1], BLOCK_SIZE[0]\n"
693af69d88dSmrg                "DCL SV[2], THREAD_ID[0]\n"
694af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
695af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
696af69d88dSmrg                "DCL TEMP[2], LOCAL\n"
697af69d88dSmrg                "IMM UINT32 { 128, 0, 0, 0 }\n"
698af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
699af69d88dSmrg                "\n"
700af69d88dSmrg                "    BGNSUB\n"
701af69d88dSmrg                "       UMUL TEMP[0].x, SV[0], SV[1]\n"
702af69d88dSmrg                "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
703af69d88dSmrg                "       MOV TEMP[1].x, IMM[0].wwww\n"
704af69d88dSmrg                "       BGNLOOP\n"
705af69d88dSmrg                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
706af69d88dSmrg                "               IF TEMP[2]\n"
707af69d88dSmrg                "                       BRK\n"
708af69d88dSmrg                "               ENDIF\n"
709af69d88dSmrg                "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
710af69d88dSmrg                "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
711af69d88dSmrg                "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
712af69d88dSmrg                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
713af69d88dSmrg                "       ENDLOOP\n"
714af69d88dSmrg                "       MOV TEMP[1].x, IMM[0].wwww\n"
715af69d88dSmrg                "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
716af69d88dSmrg                "       BGNLOOP\n"
717af69d88dSmrg                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
718af69d88dSmrg                "               IF TEMP[2]\n"
719af69d88dSmrg                "                       BRK\n"
720af69d88dSmrg                "               ENDIF\n"
721af69d88dSmrg                "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
722af69d88dSmrg                "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
723af69d88dSmrg                "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
724af69d88dSmrg                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
725af69d88dSmrg                "       ENDLOOP\n"
726af69d88dSmrg                "       RET\n"
727af69d88dSmrg                "    ENDSUB\n";
728af69d88dSmrg
729af69d88dSmrg        printf("- %s\n", __func__);
730af69d88dSmrg
731af69d88dSmrg        init_prog(ctx, 0, 128, 0, src, NULL);
732af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
73301e04c3fSmrg                 32768, 0, test_default_init);
734af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
735af69d88dSmrg        launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
73601e04c3fSmrg        check_tex(ctx, 0, test_private_expect, NULL);
737af69d88dSmrg        destroy_compute_resources(ctx);
738af69d88dSmrg        destroy_tex(ctx);
739af69d88dSmrg        destroy_prog(ctx);
740af69d88dSmrg}
741af69d88dSmrg
74201e04c3fSmrg/* test_local */
74301e04c3fSmrgstatic void test_local_expect(void *p, int s, int x, int y)
74401e04c3fSmrg{
74501e04c3fSmrg        *(uint32_t *)p = x & 0x20 ? 2 : 1;
74601e04c3fSmrg}
74701e04c3fSmrg
748af69d88dSmrgstatic void test_local(struct context *ctx)
749af69d88dSmrg{
750af69d88dSmrg        const char *src = "COMP\n"
751af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
752af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
753af69d88dSmrg                "DCL SV[1], BLOCK_SIZE[0]\n"
754af69d88dSmrg                "DCL SV[2], THREAD_ID[0]\n"
755af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
756af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
757af69d88dSmrg                "DCL TEMP[2], LOCAL\n"
758af69d88dSmrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
759af69d88dSmrg                "IMM UINT32 { 2, 0, 0, 0 }\n"
760af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
761af69d88dSmrg                "IMM UINT32 { 32, 0, 0, 0 }\n"
762af69d88dSmrg                "IMM UINT32 { 128, 0, 0, 0 }\n"
763af69d88dSmrg                "\n"
764af69d88dSmrg                "    BGNSUB\n"
765af69d88dSmrg                "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
766af69d88dSmrg                "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
767af69d88dSmrg                "       MFENCE RLOCAL\n"
768af69d88dSmrg                "       USLT TEMP[1].x, SV[2], IMM[3]\n"
769af69d88dSmrg                "       IF TEMP[1]\n"
770af69d88dSmrg                "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
771af69d88dSmrg                "               BGNLOOP\n"
772af69d88dSmrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
773af69d88dSmrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
774af69d88dSmrg                "                       IF TEMP[2]\n"
775af69d88dSmrg                "                               BRK\n"
776af69d88dSmrg                "                       ENDIF\n"
777af69d88dSmrg                "               ENDLOOP\n"
778af69d88dSmrg                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
779af69d88dSmrg                "               MFENCE RLOCAL\n"
780af69d88dSmrg                "               BGNLOOP\n"
781af69d88dSmrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
782af69d88dSmrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
783af69d88dSmrg                "                       IF TEMP[2]\n"
784af69d88dSmrg                "                               BRK\n"
785af69d88dSmrg                "                       ENDIF\n"
786af69d88dSmrg                "               ENDLOOP\n"
787af69d88dSmrg                "       ELSE\n"
788af69d88dSmrg                "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
789af69d88dSmrg                "               BGNLOOP\n"
790af69d88dSmrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
791af69d88dSmrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
792af69d88dSmrg                "                       IF TEMP[2]\n"
793af69d88dSmrg                "                               BRK\n"
794af69d88dSmrg                "                       ENDIF\n"
795af69d88dSmrg                "               ENDLOOP\n"
796af69d88dSmrg                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
797af69d88dSmrg                "               MFENCE RLOCAL\n"
798af69d88dSmrg                "               BGNLOOP\n"
799af69d88dSmrg                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
800af69d88dSmrg                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
801af69d88dSmrg                "                       IF TEMP[2]\n"
802af69d88dSmrg                "                               BRK\n"
803af69d88dSmrg                "                       ENDIF\n"
804af69d88dSmrg                "               ENDLOOP\n"
805af69d88dSmrg                "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
806af69d88dSmrg                "               MFENCE RLOCAL\n"
807af69d88dSmrg                "       ENDIF\n"
808af69d88dSmrg                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
809af69d88dSmrg                "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
810af69d88dSmrg                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
811af69d88dSmrg                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
812af69d88dSmrg                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
813af69d88dSmrg                "       RET\n"
814af69d88dSmrg                "    ENDSUB\n";
815af69d88dSmrg
816af69d88dSmrg        printf("- %s\n", __func__);
817af69d88dSmrg
818af69d88dSmrg        init_prog(ctx, 256, 0, 0, src, NULL);
819af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
82001e04c3fSmrg                 4096, 0, test_default_init);
821af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
822af69d88dSmrg        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
82301e04c3fSmrg        check_tex(ctx, 0, test_local_expect, NULL);
824af69d88dSmrg        destroy_compute_resources(ctx);
825af69d88dSmrg        destroy_tex(ctx);
826af69d88dSmrg        destroy_prog(ctx);
827af69d88dSmrg}
828af69d88dSmrg
82901e04c3fSmrg/* test_sample */
83001e04c3fSmrgstatic void test_sample_init(void *p, int s, int x, int y)
83101e04c3fSmrg{
83201e04c3fSmrg        *(float *)p = s ? 1 : x * y;
83301e04c3fSmrg}
83401e04c3fSmrg
83501e04c3fSmrgstatic void test_sample_expect(void *p, int s, int x, int y)
83601e04c3fSmrg{
83701e04c3fSmrg        switch (x % 4) {
83801e04c3fSmrg        case 0:
83901e04c3fSmrg                *(float *)p = x / 4 * y;
84001e04c3fSmrg                break;
84101e04c3fSmrg        case 1:
84201e04c3fSmrg        case 2:
84301e04c3fSmrg                *(float *)p = 0;
84401e04c3fSmrg                break;
84501e04c3fSmrg        case 3:
84601e04c3fSmrg                *(float *)p = 1;
84701e04c3fSmrg                break;
84801e04c3fSmrg        }
84901e04c3fSmrg}
85001e04c3fSmrg
851af69d88dSmrgstatic void test_sample(struct context *ctx)
852af69d88dSmrg{
853af69d88dSmrg        const char *src = "COMP\n"
854af69d88dSmrg                "DCL SVIEW[0], 2D, FLOAT\n"
855af69d88dSmrg                "DCL RES[0], 2D, RAW, WR\n"
856af69d88dSmrg                "DCL SAMP[0]\n"
857af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
858af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
859af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
860af69d88dSmrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
861af69d88dSmrg                "IMM FLT32 { 128, 32, 0, 0 }\n"
862af69d88dSmrg                "\n"
863af69d88dSmrg                "    BGNSUB\n"
864af69d88dSmrg                "       I2F TEMP[1], SV[0]\n"
865af69d88dSmrg                "       DIV TEMP[1], TEMP[1], IMM[1]\n"
866af69d88dSmrg                "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
867af69d88dSmrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
868af69d88dSmrg                "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
869af69d88dSmrg                "       RET\n"
870af69d88dSmrg                "    ENDSUB\n";
871af69d88dSmrg
872af69d88dSmrg        printf("- %s\n", __func__);
873af69d88dSmrg
874af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
875af69d88dSmrg        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
87601e04c3fSmrg                 128, 32, test_sample_init);
877af69d88dSmrg        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
87801e04c3fSmrg                 512, 32, test_sample_init);
879af69d88dSmrg        init_compute_resources(ctx, (int []) { 1, -1 });
880af69d88dSmrg        init_sampler_views(ctx, (int []) { 0, -1 });
881af69d88dSmrg        init_sampler_states(ctx, 2);
882af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
88301e04c3fSmrg        check_tex(ctx, 1, test_sample_expect, NULL);
884af69d88dSmrg        destroy_sampler_states(ctx);
885af69d88dSmrg        destroy_sampler_views(ctx);
886af69d88dSmrg        destroy_compute_resources(ctx);
887af69d88dSmrg        destroy_tex(ctx);
888af69d88dSmrg        destroy_prog(ctx);
889af69d88dSmrg}
890af69d88dSmrg
89101e04c3fSmrg/* test_many_kern */
89201e04c3fSmrgstatic void test_many_kern_expect(void *p, int s, int x, int y)
89301e04c3fSmrg{
89401e04c3fSmrg        *(uint32_t *)p = x;
89501e04c3fSmrg}
89601e04c3fSmrg
897af69d88dSmrgstatic void test_many_kern(struct context *ctx)
898af69d88dSmrg{
899af69d88dSmrg        const char *src = "COMP\n"
900af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
901af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
902af69d88dSmrg                "IMM UINT32 { 0, 1, 2, 3 }\n"
903af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
904af69d88dSmrg                "\n"
905af69d88dSmrg                "    BGNSUB\n"
906af69d88dSmrg                "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
907af69d88dSmrg                "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
908af69d88dSmrg                "       RET\n"
909af69d88dSmrg                "    ENDSUB\n"
910af69d88dSmrg                "    BGNSUB\n"
911af69d88dSmrg                "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
912af69d88dSmrg                "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
913af69d88dSmrg                "       RET\n"
914af69d88dSmrg                "    ENDSUB\n"
915af69d88dSmrg                "    BGNSUB\n"
916af69d88dSmrg                "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
917af69d88dSmrg                "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
918af69d88dSmrg                "       RET\n"
919af69d88dSmrg                "    ENDSUB\n"
920af69d88dSmrg                "    BGNSUB\n"
921af69d88dSmrg                "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
922af69d88dSmrg                "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
923af69d88dSmrg                "       RET\n"
924af69d88dSmrg                "    ENDSUB\n";
925af69d88dSmrg
926af69d88dSmrg        printf("- %s\n", __func__);
927af69d88dSmrg
928af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
929af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
93001e04c3fSmrg                 16, 0, test_default_init);
931af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
932af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
933af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
934af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
935af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
93601e04c3fSmrg        check_tex(ctx, 0, test_many_kern_expect, NULL);
937af69d88dSmrg        destroy_compute_resources(ctx);
938af69d88dSmrg        destroy_tex(ctx);
939af69d88dSmrg        destroy_prog(ctx);
940af69d88dSmrg}
941af69d88dSmrg
94201e04c3fSmrg/* test_constant */
94301e04c3fSmrgstatic void test_constant_init(void *p, int s, int x, int y)
94401e04c3fSmrg{
94501e04c3fSmrg        *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
94601e04c3fSmrg}
94701e04c3fSmrg
94801e04c3fSmrgstatic void test_constant_expect(void *p, int s, int x, int y)
94901e04c3fSmrg{
95001e04c3fSmrg        *(float *)p = 8.0 - (float)x;
95101e04c3fSmrg}
95201e04c3fSmrg
953af69d88dSmrgstatic void test_constant(struct context *ctx)
954af69d88dSmrg{
955af69d88dSmrg        const char *src = "COMP\n"
956af69d88dSmrg                "DCL RES[0], BUFFER, RAW\n"
957af69d88dSmrg                "DCL RES[1], BUFFER, RAW, WR\n"
958af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
959af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
960af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
961af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
962af69d88dSmrg                "\n"
963af69d88dSmrg                "    BGNSUB\n"
964af69d88dSmrg                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
965af69d88dSmrg                "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
966af69d88dSmrg                "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
967af69d88dSmrg                "       RET\n"
968af69d88dSmrg                "    ENDSUB\n";
969af69d88dSmrg
970af69d88dSmrg        printf("- %s\n", __func__);
971af69d88dSmrg
972af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
973af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
97401e04c3fSmrg                 256, 0, test_constant_init);
975af69d88dSmrg        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
97601e04c3fSmrg                 256, 0, test_constant_init);
977af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, 1, -1 });
978af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
97901e04c3fSmrg        check_tex(ctx, 1, test_constant_expect, NULL);
980af69d88dSmrg        destroy_compute_resources(ctx);
981af69d88dSmrg        destroy_tex(ctx);
982af69d88dSmrg        destroy_prog(ctx);
983af69d88dSmrg}
984af69d88dSmrg
98501e04c3fSmrg/* test_resource_indirect */
98601e04c3fSmrgstatic void test_resource_indirect_init(void *p, int s, int x, int y)
98701e04c3fSmrg{
98801e04c3fSmrg        *(uint32_t *)p = s == 0 ? 0xdeadbeef :
98901e04c3fSmrg                s == 1 ? x % 2 :
99001e04c3fSmrg                s == 2 ? 2 * x :
99101e04c3fSmrg                2 * x + 1;
99201e04c3fSmrg}
99301e04c3fSmrg
99401e04c3fSmrgstatic void test_resource_indirect_expect(void *p, int s, int x, int y)
99501e04c3fSmrg{
99601e04c3fSmrg        *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
99701e04c3fSmrg}
99801e04c3fSmrg
999af69d88dSmrgstatic void test_resource_indirect(struct context *ctx)
1000af69d88dSmrg{
1001af69d88dSmrg        const char *src = "COMP\n"
1002af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
1003af69d88dSmrg                "DCL RES[1..3], BUFFER, RAW\n"
1004af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
1005af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
1006af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
1007af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1008af69d88dSmrg                "\n"
1009af69d88dSmrg                "    BGNSUB\n"
1010af69d88dSmrg                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
1011af69d88dSmrg                "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
1012af69d88dSmrg                "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
1013af69d88dSmrg                "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
1014af69d88dSmrg                "       RET\n"
1015af69d88dSmrg                "    ENDSUB\n";
1016af69d88dSmrg
1017af69d88dSmrg        printf("- %s\n", __func__);
1018af69d88dSmrg
1019af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
1020af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
102101e04c3fSmrg                 256, 0, test_resource_indirect_init);
1022af69d88dSmrg        init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
102301e04c3fSmrg                 256, 0, test_resource_indirect_init);
1024af69d88dSmrg        init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
102501e04c3fSmrg                 256, 0, test_resource_indirect_init);
1026af69d88dSmrg        init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
102701e04c3fSmrg                 256, 0, test_resource_indirect_init);
1028af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
1029af69d88dSmrg        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
103001e04c3fSmrg        check_tex(ctx, 0, test_resource_indirect_expect, NULL);
1031af69d88dSmrg        destroy_compute_resources(ctx);
1032af69d88dSmrg        destroy_tex(ctx);
1033af69d88dSmrg        destroy_prog(ctx);
1034af69d88dSmrg}
1035af69d88dSmrg
103601e04c3fSmrg/* test_surface_ld */
1037af69d88dSmrgenum pipe_format surface_fmts[] = {
1038af69d88dSmrg        PIPE_FORMAT_B8G8R8A8_UNORM,
1039af69d88dSmrg        PIPE_FORMAT_B8G8R8X8_UNORM,
1040af69d88dSmrg        PIPE_FORMAT_A8R8G8B8_UNORM,
1041af69d88dSmrg        PIPE_FORMAT_X8R8G8B8_UNORM,
1042af69d88dSmrg        PIPE_FORMAT_X8R8G8B8_UNORM,
1043af69d88dSmrg        PIPE_FORMAT_L8_UNORM,
1044af69d88dSmrg        PIPE_FORMAT_A8_UNORM,
1045af69d88dSmrg        PIPE_FORMAT_I8_UNORM,
1046af69d88dSmrg        PIPE_FORMAT_L8A8_UNORM,
1047af69d88dSmrg        PIPE_FORMAT_R32_FLOAT,
1048af69d88dSmrg        PIPE_FORMAT_R32G32_FLOAT,
1049af69d88dSmrg        PIPE_FORMAT_R32G32B32A32_FLOAT,
1050af69d88dSmrg        PIPE_FORMAT_R32_UNORM,
1051af69d88dSmrg        PIPE_FORMAT_R32G32_UNORM,
1052af69d88dSmrg        PIPE_FORMAT_R32G32B32A32_UNORM,
1053af69d88dSmrg        PIPE_FORMAT_R32_SNORM,
1054af69d88dSmrg        PIPE_FORMAT_R32G32_SNORM,
1055af69d88dSmrg        PIPE_FORMAT_R32G32B32A32_SNORM,
1056af69d88dSmrg        PIPE_FORMAT_R8_UINT,
1057af69d88dSmrg        PIPE_FORMAT_R8G8_UINT,
1058af69d88dSmrg        PIPE_FORMAT_R8G8B8A8_UINT,
1059af69d88dSmrg        PIPE_FORMAT_R8_SINT,
1060af69d88dSmrg        PIPE_FORMAT_R8G8_SINT,
1061af69d88dSmrg        PIPE_FORMAT_R8G8B8A8_SINT,
1062af69d88dSmrg        PIPE_FORMAT_R32_UINT,
1063af69d88dSmrg        PIPE_FORMAT_R32G32_UINT,
1064af69d88dSmrg        PIPE_FORMAT_R32G32B32A32_UINT,
1065af69d88dSmrg        PIPE_FORMAT_R32_SINT,
1066af69d88dSmrg        PIPE_FORMAT_R32G32_SINT,
1067af69d88dSmrg        PIPE_FORMAT_R32G32B32A32_SINT
1068af69d88dSmrg};
1069af69d88dSmrg
107001e04c3fSmrgstatic void test_surface_ld_init0f(void *p, int s, int x, int y)
107101e04c3fSmrg{
107201e04c3fSmrg        float v[] = { 1.0, -.75, .50, -.25 };
107301e04c3fSmrg        int i = 0;
107401e04c3fSmrg
10757ec681f3Smrg        util_format_pack_rgba(surface_fmts[i], p, v, 1);
107601e04c3fSmrg}
107701e04c3fSmrg
107801e04c3fSmrgstatic void test_surface_ld_init0i(void *p, int s, int x, int y)
107901e04c3fSmrg{
108001e04c3fSmrg        int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
108101e04c3fSmrg        int i = 0;
108201e04c3fSmrg
10837ec681f3Smrg        util_format_pack_rgba(surface_fmts[i], p, v, 1);
108401e04c3fSmrg}
108501e04c3fSmrg
108601e04c3fSmrgstatic void test_surface_ld_expectf(void *p, int s, int x, int y)
108701e04c3fSmrg{
108801e04c3fSmrg        float v[4], w[4];
108901e04c3fSmrg        int i = 0;
109001e04c3fSmrg
109101e04c3fSmrg        test_surface_ld_init0f(v, s, x / 4, y);
10927ec681f3Smrg        util_format_unpack_rgba(surface_fmts[i], w, v, 1);
109301e04c3fSmrg        *(float *)p = w[x % 4];
109401e04c3fSmrg}
109501e04c3fSmrg
109601e04c3fSmrgstatic void test_surface_ld_expecti(void *p, int s, int x, int y)
109701e04c3fSmrg{
109801e04c3fSmrg        int32_t v[4], w[4];
109901e04c3fSmrg        int i = 0;
110001e04c3fSmrg
110101e04c3fSmrg        test_surface_ld_init0i(v, s, x / 4, y);
11027ec681f3Smrg        util_format_unpack_rgba(surface_fmts[i], w, v, 1);
110301e04c3fSmrg        *(uint32_t *)p = w[x % 4];
110401e04c3fSmrg}
110501e04c3fSmrg
1106af69d88dSmrgstatic void test_surface_ld(struct context *ctx)
1107af69d88dSmrg{
1108af69d88dSmrg        const char *src = "COMP\n"
1109af69d88dSmrg                "DCL RES[0], 2D\n"
1110af69d88dSmrg                "DCL RES[1], 2D, RAW, WR\n"
1111af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
1112af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
1113af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
1114af69d88dSmrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
1115af69d88dSmrg                "\n"
1116af69d88dSmrg                "    BGNSUB\n"
1117af69d88dSmrg                "       LOAD TEMP[1], RES[0], SV[0]\n"
1118af69d88dSmrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1119af69d88dSmrg                "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
1120af69d88dSmrg                "       RET\n"
1121af69d88dSmrg                "    ENDSUB\n";
1122af69d88dSmrg        int i = 0;
1123af69d88dSmrg
1124af69d88dSmrg        printf("- %s\n", __func__);
1125af69d88dSmrg
1126af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
1127af69d88dSmrg
112801e04c3fSmrg        for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
1129af69d88dSmrg                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1130af69d88dSmrg
1131af69d88dSmrg                printf("   - %s\n", util_format_name(surface_fmts[i]));
1132af69d88dSmrg
1133af69d88dSmrg                if (!ctx->screen->is_format_supported(ctx->screen,
113401e04c3fSmrg                       surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,
1135af69d88dSmrg                       PIPE_BIND_COMPUTE_RESOURCE)) {
1136af69d88dSmrg                   printf("(unsupported)\n");
1137af69d88dSmrg                   continue;
1138af69d88dSmrg                }
1139af69d88dSmrg
1140af69d88dSmrg                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
114101e04c3fSmrg                         128, 32, (is_int ? test_surface_ld_init0i : test_surface_ld_init0f));
1142af69d88dSmrg                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
114301e04c3fSmrg                         512, 32, test_default_init);
1144af69d88dSmrg                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1145af69d88dSmrg                init_sampler_states(ctx, 2);
1146af69d88dSmrg                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1147af69d88dSmrg                            NULL);
114801e04c3fSmrg                check_tex(ctx, 1, (is_int ? test_surface_ld_expecti : test_surface_ld_expectf), NULL);
1149af69d88dSmrg                destroy_sampler_states(ctx);
1150af69d88dSmrg                destroy_compute_resources(ctx);
1151af69d88dSmrg                destroy_tex(ctx);
1152af69d88dSmrg        }
1153af69d88dSmrg
1154af69d88dSmrg        destroy_prog(ctx);
1155af69d88dSmrg}
1156af69d88dSmrg
115701e04c3fSmrg/* test_surface_st */
115801e04c3fSmrgstatic void test_surface_st_init0f(void *p, int s, int x, int y)
115901e04c3fSmrg{
116001e04c3fSmrg        float v[] = { 1.0, -.75, 0.5, -.25 };
116101e04c3fSmrg        *(float *)p = v[x % 4];
116201e04c3fSmrg}
116301e04c3fSmrg
116401e04c3fSmrgstatic void test_surface_st_init0i(void *p, int s, int x, int y)
116501e04c3fSmrg{
116601e04c3fSmrg        int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
116701e04c3fSmrg        *(int32_t *)p = v[x % 4];
116801e04c3fSmrg}
116901e04c3fSmrg
117001e04c3fSmrgstatic void test_surface_st_init1(void *p, int s, int x, int y)
117101e04c3fSmrg{
117201e04c3fSmrg        int i = 0;
117301e04c3fSmrg        memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
117401e04c3fSmrg}
117501e04c3fSmrg
117601e04c3fSmrgstatic void test_surface_st_expectf(void *p, int s, int x, int y)
117701e04c3fSmrg{
117801e04c3fSmrg        float vf[4];
117901e04c3fSmrg        int i = 0, j;
118001e04c3fSmrg
118101e04c3fSmrg        for (j = 0; j < 4; j++)
118201e04c3fSmrg                test_surface_st_init0f(&vf[j], s, 4 * x + j, y);
11837ec681f3Smrg        util_format_pack_rgba(surface_fmts[i], p, vf, 1);
118401e04c3fSmrg}
118501e04c3fSmrg
118601e04c3fSmrgstatic void test_surface_st_expects(void *p, int s, int x, int y)
118701e04c3fSmrg{
118801e04c3fSmrg        int32_t v[4];
118901e04c3fSmrg        int i = 0, j;
119001e04c3fSmrg
119101e04c3fSmrg        for (j = 0; j < 4; j++)
119201e04c3fSmrg                test_surface_st_init0i(&v[j], s, 4 * x + j, y);
11937ec681f3Smrg        util_format_pack_rgba(surface_fmts[i], p, v, 1);
119401e04c3fSmrg}
119501e04c3fSmrg
119601e04c3fSmrgstatic void test_surface_st_expectu(void *p, int s, int x, int y)
119701e04c3fSmrg{
119801e04c3fSmrg        uint32_t v[4];
119901e04c3fSmrg        int i = 0, j;
120001e04c3fSmrg
120101e04c3fSmrg        for (j = 0; j < 4; j++)
120201e04c3fSmrg                test_surface_st_init0i(&v[j], s, 4 * x + j, y);
12037ec681f3Smrg        util_format_pack_rgba(surface_fmts[i], p, v, 1);
12047ec681f3Smrg}
12057ec681f3Smrg
12067ec681f3Smrgstatic unsigned absdiff(uint32_t a, uint32_t b)
12077ec681f3Smrg{
12087ec681f3Smrg        return (a > b) ? (a - b) : (b - a);
120901e04c3fSmrg}
121001e04c3fSmrg
121101e04c3fSmrgstatic bool test_surface_st_check(void *x, void *y, int sz)
121201e04c3fSmrg{
121301e04c3fSmrg        int i = 0, j;
121401e04c3fSmrg
121501e04c3fSmrg        if (util_format_is_float(surface_fmts[i])) {
121601e04c3fSmrg                return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
121701e04c3fSmrg
121801e04c3fSmrg        } else if ((sz % 4) == 0) {
121901e04c3fSmrg                for (j = 0; j < sz / 4; j++)
12207ec681f3Smrg                        if (absdiff(((uint32_t *)x)[j],
12217ec681f3Smrg                                    ((uint32_t *)y)[j]) > 1)
122201e04c3fSmrg                                return false;
122301e04c3fSmrg                return true;
122401e04c3fSmrg        } else {
122501e04c3fSmrg                return !memcmp(x, y, sz);
122601e04c3fSmrg        }
122701e04c3fSmrg}
122801e04c3fSmrg
1229af69d88dSmrgstatic void test_surface_st(struct context *ctx)
1230af69d88dSmrg{
1231af69d88dSmrg        const char *src = "COMP\n"
1232af69d88dSmrg                "DCL RES[0], 2D, RAW\n"
1233af69d88dSmrg                "DCL RES[1], 2D, WR\n"
1234af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
1235af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
1236af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
1237af69d88dSmrg                "IMM UINT32 { 16, 1, 0, 0 }\n"
1238af69d88dSmrg                "\n"
1239af69d88dSmrg                "    BGNSUB\n"
1240af69d88dSmrg                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1241af69d88dSmrg                "       LOAD TEMP[1], RES[0], TEMP[0]\n"
1242af69d88dSmrg                "       STORE RES[1], SV[0], TEMP[1]\n"
1243af69d88dSmrg                "       RET\n"
1244af69d88dSmrg                "    ENDSUB\n";
1245af69d88dSmrg        int i = 0;
1246af69d88dSmrg
1247af69d88dSmrg        printf("- %s\n", __func__);
1248af69d88dSmrg
1249af69d88dSmrg        init_prog(ctx, 0, 0, 0, src, NULL);
1250af69d88dSmrg
125101e04c3fSmrg        for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {
1252af69d88dSmrg                bool is_signed = (util_format_description(surface_fmts[i])
1253af69d88dSmrg                                  ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
1254af69d88dSmrg                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1255af69d88dSmrg
1256af69d88dSmrg                printf("   - %s\n", util_format_name(surface_fmts[i]));
1257af69d88dSmrg
1258af69d88dSmrg                if (!ctx->screen->is_format_supported(ctx->screen,
125901e04c3fSmrg                       surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,
1260af69d88dSmrg                       PIPE_BIND_COMPUTE_RESOURCE)) {
1261af69d88dSmrg                   printf("(unsupported)\n");
1262af69d88dSmrg                   continue;
1263af69d88dSmrg                }
1264af69d88dSmrg
1265af69d88dSmrg                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
126601e04c3fSmrg                         512, 32, (is_int ? test_surface_st_init0i : test_surface_st_init0f));
1267af69d88dSmrg                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
126801e04c3fSmrg                         128, 32, test_surface_st_init1);
1269af69d88dSmrg                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1270af69d88dSmrg                init_sampler_states(ctx, 2);
1271af69d88dSmrg                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1272af69d88dSmrg                            NULL);
127301e04c3fSmrg                check_tex(ctx, 1, (is_int && is_signed ? test_surface_st_expects :
127401e04c3fSmrg                                   is_int && !is_signed ? test_surface_st_expectu :
127501e04c3fSmrg                                   test_surface_st_expectf), test_surface_st_check);
1276af69d88dSmrg                destroy_sampler_states(ctx);
1277af69d88dSmrg                destroy_compute_resources(ctx);
1278af69d88dSmrg                destroy_tex(ctx);
1279af69d88dSmrg        }
1280af69d88dSmrg
1281af69d88dSmrg        destroy_prog(ctx);
1282af69d88dSmrg}
1283af69d88dSmrg
128401e04c3fSmrg/* test_barrier */
128501e04c3fSmrgstatic void test_barrier_expect(void *p, int s, int x, int y)
128601e04c3fSmrg{
128701e04c3fSmrg        *(uint32_t *)p = 31;
128801e04c3fSmrg}
128901e04c3fSmrg
1290af69d88dSmrgstatic void test_barrier(struct context *ctx)
1291af69d88dSmrg{
1292af69d88dSmrg        const char *src = "COMP\n"
1293af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
1294af69d88dSmrg                "DCL SV[0], BLOCK_ID[0]\n"
1295af69d88dSmrg                "DCL SV[1], BLOCK_SIZE[0]\n"
1296af69d88dSmrg                "DCL SV[2], THREAD_ID[0]\n"
1297af69d88dSmrg                "DCL TEMP[0], LOCAL\n"
1298af69d88dSmrg                "DCL TEMP[1], LOCAL\n"
1299af69d88dSmrg                "DCL TEMP[2], LOCAL\n"
1300af69d88dSmrg                "DCL TEMP[3], LOCAL\n"
1301af69d88dSmrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
1302af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1303af69d88dSmrg                "IMM UINT32 { 32, 0, 0, 0 }\n"
1304af69d88dSmrg                "\n"
1305af69d88dSmrg                "    BGNSUB\n"
1306af69d88dSmrg                "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
1307af69d88dSmrg                "       MOV TEMP[1].x, IMM[0].wwww\n"
1308af69d88dSmrg                "       BGNLOOP\n"
1309af69d88dSmrg                "               BARRIER\n"
1310af69d88dSmrg                "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
1311af69d88dSmrg                "               BARRIER\n"
1312af69d88dSmrg                "               MOV TEMP[2].x, IMM[0].wwww\n"
1313af69d88dSmrg                "               BGNLOOP\n"
1314af69d88dSmrg                "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
1315af69d88dSmrg                "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
1316af69d88dSmrg                "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
1317af69d88dSmrg                "                       IF TEMP[3]\n"
1318af69d88dSmrg                "                               END\n"
1319af69d88dSmrg                "                       ENDIF\n"
1320af69d88dSmrg                "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
1321af69d88dSmrg                "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
1322af69d88dSmrg                "                       IF TEMP[3]\n"
1323af69d88dSmrg                "                               BRK\n"
1324af69d88dSmrg                "                       ENDIF\n"
1325af69d88dSmrg                "               ENDLOOP\n"
1326af69d88dSmrg                "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
1327af69d88dSmrg                "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
1328af69d88dSmrg                "               IF TEMP[2]\n"
1329af69d88dSmrg                "                       BRK\n"
1330af69d88dSmrg                "               ENDIF\n"
1331af69d88dSmrg                "       ENDLOOP\n"
1332af69d88dSmrg                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
1333af69d88dSmrg                "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
1334af69d88dSmrg                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
1335af69d88dSmrg                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
1336af69d88dSmrg                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
1337af69d88dSmrg                "       RET\n"
1338af69d88dSmrg                "    ENDSUB\n";
1339af69d88dSmrg
1340af69d88dSmrg        printf("- %s\n", __func__);
1341af69d88dSmrg
1342af69d88dSmrg        init_prog(ctx, 256, 0, 0, src, NULL);
1343af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
134401e04c3fSmrg                 4096, 0, test_default_init);
1345af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
1346af69d88dSmrg        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
134701e04c3fSmrg        check_tex(ctx, 0, test_barrier_expect, NULL);
1348af69d88dSmrg        destroy_compute_resources(ctx);
1349af69d88dSmrg        destroy_tex(ctx);
1350af69d88dSmrg        destroy_prog(ctx);
1351af69d88dSmrg}
1352af69d88dSmrg
135301e04c3fSmrg/* test_atom_ops */
135401e04c3fSmrgstatic void test_atom_ops_init(void *p, int s, int x, int y)
135501e04c3fSmrg{
135601e04c3fSmrg        *(uint32_t *)p = 0xbad;
135701e04c3fSmrg}
135801e04c3fSmrg
135901e04c3fSmrgstatic void test_atom_ops_expect(void *p, int s, int x, int y)
136001e04c3fSmrg{
136101e04c3fSmrg        switch (x) {
136201e04c3fSmrg        case 0:
136301e04c3fSmrg                *(uint32_t *)p = 0xce6c8eef;
136401e04c3fSmrg                break;
136501e04c3fSmrg        case 1:
136601e04c3fSmrg                *(uint32_t *)p = 0xdeadbeef;
136701e04c3fSmrg                break;
136801e04c3fSmrg        case 2:
136901e04c3fSmrg                *(uint32_t *)p = 0x11111111;
137001e04c3fSmrg                break;
137101e04c3fSmrg        case 3:
137201e04c3fSmrg                *(uint32_t *)p = 0x10011001;
137301e04c3fSmrg                break;
137401e04c3fSmrg        case 4:
137501e04c3fSmrg                *(uint32_t *)p = 0xdfbdbfff;
137601e04c3fSmrg                break;
137701e04c3fSmrg        case 5:
137801e04c3fSmrg                *(uint32_t *)p = 0x11111111;
137901e04c3fSmrg                break;
138001e04c3fSmrg        case 6:
138101e04c3fSmrg                *(uint32_t *)p = 0x11111111;
138201e04c3fSmrg                break;
138301e04c3fSmrg        case 7:
138401e04c3fSmrg                *(uint32_t *)p = 0xdeadbeef;
138501e04c3fSmrg                break;
138601e04c3fSmrg        case 8:
138701e04c3fSmrg                *(uint32_t *)p = 0xdeadbeef;
138801e04c3fSmrg                break;
138901e04c3fSmrg        case 9:
139001e04c3fSmrg                *(uint32_t *)p = 0x11111111;
139101e04c3fSmrg                break;
139201e04c3fSmrg        }
139301e04c3fSmrg}
139401e04c3fSmrg
1395af69d88dSmrgstatic void test_atom_ops(struct context *ctx, bool global)
1396af69d88dSmrg{
1397af69d88dSmrg        const char *src = "COMP\n"
1398af69d88dSmrg                "#ifdef TARGET_GLOBAL\n"
1399af69d88dSmrg                "#define target RES[0]\n"
1400af69d88dSmrg                "#else\n"
1401af69d88dSmrg                "#define target RLOCAL\n"
1402af69d88dSmrg                "#endif\n"
1403af69d88dSmrg                ""
1404af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
1405af69d88dSmrg                "#define threadid SV[0]\n"
1406af69d88dSmrg                "DCL threadid, THREAD_ID[0]\n"
1407af69d88dSmrg                ""
1408af69d88dSmrg                "#define offset TEMP[0]\n"
1409af69d88dSmrg                "DCL offset, LOCAL\n"
1410af69d88dSmrg                "#define tmp TEMP[1]\n"
1411af69d88dSmrg                "DCL tmp, LOCAL\n"
1412af69d88dSmrg                ""
1413af69d88dSmrg                "#define k0 IMM[0]\n"
1414af69d88dSmrg                "IMM UINT32 { 0, 0, 0, 0 }\n"
1415af69d88dSmrg                "#define k1 IMM[1]\n"
1416af69d88dSmrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
1417af69d88dSmrg                "#define k2 IMM[2]\n"
1418af69d88dSmrg                "IMM UINT32 { 2, 0, 0, 0 }\n"
1419af69d88dSmrg                "#define k3 IMM[3]\n"
1420af69d88dSmrg                "IMM UINT32 { 3, 0, 0, 0 }\n"
1421af69d88dSmrg                "#define k4 IMM[4]\n"
1422af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1423af69d88dSmrg                "#define k5 IMM[5]\n"
1424af69d88dSmrg                "IMM UINT32 { 5, 0, 0, 0 }\n"
1425af69d88dSmrg                "#define k6 IMM[6]\n"
1426af69d88dSmrg                "IMM UINT32 { 6, 0, 0, 0 }\n"
1427af69d88dSmrg                "#define k7 IMM[7]\n"
1428af69d88dSmrg                "IMM UINT32 { 7, 0, 0, 0 }\n"
1429af69d88dSmrg                "#define k8 IMM[8]\n"
1430af69d88dSmrg                "IMM UINT32 { 8, 0, 0, 0 }\n"
1431af69d88dSmrg                "#define k9 IMM[9]\n"
1432af69d88dSmrg                "IMM UINT32 { 9, 0, 0, 0 }\n"
1433af69d88dSmrg                "#define korig IMM[10].xxxx\n"
1434af69d88dSmrg                "#define karg IMM[10].yyyy\n"
1435af69d88dSmrg                "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
1436af69d88dSmrg                "\n"
1437af69d88dSmrg                "    BGNSUB\n"
1438af69d88dSmrg                "       UMUL offset.x, threadid, k4\n"
1439af69d88dSmrg                "       STORE target.x, offset, korig\n"
1440af69d88dSmrg                "       USEQ tmp.x, threadid, k0\n"
1441af69d88dSmrg                "       IF tmp\n"
1442af69d88dSmrg                "               ATOMUADD tmp.x, target, offset, karg\n"
1443af69d88dSmrg                "               ATOMUADD tmp.x, target, offset, tmp\n"
1444af69d88dSmrg                "       ENDIF\n"
1445af69d88dSmrg                "       USEQ tmp.x, threadid, k1\n"
1446af69d88dSmrg                "       IF tmp\n"
1447af69d88dSmrg                "               ATOMXCHG tmp.x, target, offset, karg\n"
1448af69d88dSmrg                "               ATOMXCHG tmp.x, target, offset, tmp\n"
1449af69d88dSmrg                "       ENDIF\n"
1450af69d88dSmrg                "       USEQ tmp.x, threadid, k2\n"
1451af69d88dSmrg                "       IF tmp\n"
1452af69d88dSmrg                "               ATOMCAS tmp.x, target, offset, korig, karg\n"
1453af69d88dSmrg                "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
1454af69d88dSmrg                "       ENDIF\n"
1455af69d88dSmrg                "       USEQ tmp.x, threadid, k3\n"
1456af69d88dSmrg                "       IF tmp\n"
1457af69d88dSmrg                "               ATOMAND tmp.x, target, offset, karg\n"
1458af69d88dSmrg                "               ATOMAND tmp.x, target, offset, tmp\n"
1459af69d88dSmrg                "       ENDIF\n"
1460af69d88dSmrg                "       USEQ tmp.x, threadid, k4\n"
1461af69d88dSmrg                "       IF tmp\n"
1462af69d88dSmrg                "               ATOMOR tmp.x, target, offset, karg\n"
1463af69d88dSmrg                "               ATOMOR tmp.x, target, offset, tmp\n"
1464af69d88dSmrg                "       ENDIF\n"
1465af69d88dSmrg                "       USEQ tmp.x, threadid, k5\n"
1466af69d88dSmrg                "       IF tmp\n"
1467af69d88dSmrg                "               ATOMXOR tmp.x, target, offset, karg\n"
1468af69d88dSmrg                "               ATOMXOR tmp.x, target, offset, tmp\n"
1469af69d88dSmrg                "       ENDIF\n"
1470af69d88dSmrg                "       USEQ tmp.x, threadid, k6\n"
1471af69d88dSmrg                "       IF tmp\n"
1472af69d88dSmrg                "               ATOMUMIN tmp.x, target, offset, karg\n"
1473af69d88dSmrg                "               ATOMUMIN tmp.x, target, offset, tmp\n"
1474af69d88dSmrg                "       ENDIF\n"
1475af69d88dSmrg                "       USEQ tmp.x, threadid, k7\n"
1476af69d88dSmrg                "       IF tmp\n"
1477af69d88dSmrg                "               ATOMUMAX tmp.x, target, offset, karg\n"
1478af69d88dSmrg                "               ATOMUMAX tmp.x, target, offset, tmp\n"
1479af69d88dSmrg                "       ENDIF\n"
1480af69d88dSmrg                "       USEQ tmp.x, threadid, k8\n"
1481af69d88dSmrg                "       IF tmp\n"
1482af69d88dSmrg                "               ATOMIMIN tmp.x, target, offset, karg\n"
1483af69d88dSmrg                "               ATOMIMIN tmp.x, target, offset, tmp\n"
1484af69d88dSmrg                "       ENDIF\n"
1485af69d88dSmrg                "       USEQ tmp.x, threadid, k9\n"
1486af69d88dSmrg                "       IF tmp\n"
1487af69d88dSmrg                "               ATOMIMAX tmp.x, target, offset, karg\n"
1488af69d88dSmrg                "               ATOMIMAX tmp.x, target, offset, tmp\n"
1489af69d88dSmrg                "       ENDIF\n"
1490af69d88dSmrg                "#ifdef TARGET_LOCAL\n"
1491af69d88dSmrg                "       LOAD tmp.x, RLOCAL, offset\n"
1492af69d88dSmrg                "       STORE RES[0].x, offset, tmp\n"
1493af69d88dSmrg                "#endif\n"
1494af69d88dSmrg                "       RET\n"
1495af69d88dSmrg                "    ENDSUB\n";
1496af69d88dSmrg
1497af69d88dSmrg        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1498af69d88dSmrg
1499af69d88dSmrg        init_prog(ctx, 40, 0, 0, src,
1500af69d88dSmrg                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1501af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
150201e04c3fSmrg                 40, 0, test_atom_ops_init);
1503af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
1504af69d88dSmrg        launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
150501e04c3fSmrg        check_tex(ctx, 0, test_atom_ops_expect, NULL);
1506af69d88dSmrg        destroy_compute_resources(ctx);
1507af69d88dSmrg        destroy_tex(ctx);
1508af69d88dSmrg        destroy_prog(ctx);
1509af69d88dSmrg}
1510af69d88dSmrg
151101e04c3fSmrg/* test_atom_race */
151201e04c3fSmrgstatic void test_atom_race_expect(void *p, int s, int x, int y)
151301e04c3fSmrg{
151401e04c3fSmrg        *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
151501e04c3fSmrg}
151601e04c3fSmrg
1517af69d88dSmrgstatic void test_atom_race(struct context *ctx, bool global)
1518af69d88dSmrg{
1519af69d88dSmrg        const char *src = "COMP\n"
1520af69d88dSmrg                "#ifdef TARGET_GLOBAL\n"
1521af69d88dSmrg                "#define target RES[0]\n"
1522af69d88dSmrg                "#else\n"
1523af69d88dSmrg                "#define target RLOCAL\n"
1524af69d88dSmrg                "#endif\n"
1525af69d88dSmrg                ""
1526af69d88dSmrg                "DCL RES[0], BUFFER, RAW, WR\n"
1527af69d88dSmrg                ""
1528af69d88dSmrg                "#define blockid SV[0]\n"
1529af69d88dSmrg                "DCL blockid, BLOCK_ID[0]\n"
1530af69d88dSmrg                "#define blocksz SV[1]\n"
1531af69d88dSmrg                "DCL blocksz, BLOCK_SIZE[0]\n"
1532af69d88dSmrg                "#define threadid SV[2]\n"
1533af69d88dSmrg                "DCL threadid, THREAD_ID[0]\n"
1534af69d88dSmrg                ""
1535af69d88dSmrg                "#define offset TEMP[0]\n"
1536af69d88dSmrg                "DCL offset, LOCAL\n"
1537af69d88dSmrg                "#define arg TEMP[1]\n"
1538af69d88dSmrg                "DCL arg, LOCAL\n"
1539af69d88dSmrg                "#define count TEMP[2]\n"
1540af69d88dSmrg                "DCL count, LOCAL\n"
1541af69d88dSmrg                "#define vlocal TEMP[3]\n"
1542af69d88dSmrg                "DCL vlocal, LOCAL\n"
1543af69d88dSmrg                "#define vshared TEMP[4]\n"
1544af69d88dSmrg                "DCL vshared, LOCAL\n"
1545af69d88dSmrg                "#define last TEMP[5]\n"
1546af69d88dSmrg                "DCL last, LOCAL\n"
1547af69d88dSmrg                "#define tmp0 TEMP[6]\n"
1548af69d88dSmrg                "DCL tmp0, LOCAL\n"
1549af69d88dSmrg                "#define tmp1 TEMP[7]\n"
1550af69d88dSmrg                "DCL tmp1, LOCAL\n"
1551af69d88dSmrg                ""
1552af69d88dSmrg                "#define k0 IMM[0]\n"
1553af69d88dSmrg                "IMM UINT32 { 0, 0, 0, 0 }\n"
1554af69d88dSmrg                "#define k1 IMM[1]\n"
1555af69d88dSmrg                "IMM UINT32 { 1, 0, 0, 0 }\n"
1556af69d88dSmrg                "#define k4 IMM[2]\n"
1557af69d88dSmrg                "IMM UINT32 { 4, 0, 0, 0 }\n"
1558af69d88dSmrg                "#define k32 IMM[3]\n"
1559af69d88dSmrg                "IMM UINT32 { 32, 0, 0, 0 }\n"
1560af69d88dSmrg                "#define k128 IMM[4]\n"
1561af69d88dSmrg                "IMM UINT32 { 128, 0, 0, 0 }\n"
1562af69d88dSmrg                "#define kdeadcafe IMM[5]\n"
1563af69d88dSmrg                "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
1564af69d88dSmrg                "#define kallowed_set IMM[6]\n"
1565af69d88dSmrg                "IMM UINT32 { 559035650, 0, 0, 0 }\n"
1566af69d88dSmrg                "#define k11111111 IMM[7]\n"
1567af69d88dSmrg                "IMM UINT32 { 286331153, 0, 0, 0 }\n"
1568af69d88dSmrg                "\n"
1569af69d88dSmrg                "    BGNSUB\n"
1570af69d88dSmrg                "       MOV offset.x, threadid\n"
1571af69d88dSmrg                "#ifdef TARGET_GLOBAL\n"
1572af69d88dSmrg                "       UMUL tmp0.x, blockid, blocksz\n"
1573af69d88dSmrg                "       UADD offset.x, offset, tmp0\n"
1574af69d88dSmrg                "#endif\n"
1575af69d88dSmrg                "       UMUL offset.x, offset, k4\n"
1576af69d88dSmrg                "       USLT tmp0.x, threadid, k32\n"
1577af69d88dSmrg                "       STORE target.x, offset, k0\n"
1578af69d88dSmrg                "       BARRIER\n"
1579af69d88dSmrg                "       IF tmp0\n"
1580af69d88dSmrg                "               MOV vlocal.x, k0\n"
1581af69d88dSmrg                "               MOV arg.x, kdeadcafe\n"
1582af69d88dSmrg                "               BGNLOOP\n"
1583af69d88dSmrg                "                       INEG arg.x, arg\n"
1584af69d88dSmrg                "                       ATOMUADD vshared.x, target, offset, arg\n"
1585af69d88dSmrg                "                       SFENCE target\n"
1586af69d88dSmrg                "                       USNE tmp0.x, vshared, vlocal\n"
1587af69d88dSmrg                "                       IF tmp0\n"
1588af69d88dSmrg                "                               BRK\n"
1589af69d88dSmrg                "                       ENDIF\n"
1590af69d88dSmrg                "                       UADD vlocal.x, vlocal, arg\n"
1591af69d88dSmrg                "               ENDLOOP\n"
1592af69d88dSmrg                "               UADD vlocal.x, vshared, arg\n"
1593af69d88dSmrg                "               LOAD vshared.x, target, offset\n"
1594af69d88dSmrg                "               USEQ tmp0.x, vshared, vlocal\n"
1595af69d88dSmrg                "               STORE target.x, offset, tmp0\n"
1596af69d88dSmrg                "       ELSE\n"
1597af69d88dSmrg                "               UADD offset.x, offset, -k128\n"
1598af69d88dSmrg                "               MOV count.x, k0\n"
1599af69d88dSmrg                "               MOV last.x, k0\n"
1600af69d88dSmrg                "               BGNLOOP\n"
1601af69d88dSmrg                "                       LOAD vshared.x, target, offset\n"
1602af69d88dSmrg                "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
1603af69d88dSmrg                "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
1604af69d88dSmrg                "                       OR tmp0.x, tmp0, tmp1\n"
1605af69d88dSmrg                "                       IF tmp0\n"
1606af69d88dSmrg                "                               USEQ tmp0.x, vshared, last\n"
1607af69d88dSmrg                "                               IF tmp0\n"
1608af69d88dSmrg                "                                       CONT\n"
1609af69d88dSmrg                "                               ENDIF\n"
1610af69d88dSmrg                "                               MOV last.x, vshared\n"
1611af69d88dSmrg                "                       ELSE\n"
1612af69d88dSmrg                "                               END\n"
1613af69d88dSmrg                "                       ENDIF\n"
1614af69d88dSmrg                "                       UADD count.x, count, k1\n"
1615af69d88dSmrg                "                       USEQ tmp0.x, count, k128\n"
1616af69d88dSmrg                "                       IF tmp0\n"
1617af69d88dSmrg                "                               BRK\n"
1618af69d88dSmrg                "                       ENDIF\n"
1619af69d88dSmrg                "               ENDLOOP\n"
1620af69d88dSmrg                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1621af69d88dSmrg                "               UADD offset.x, offset, k128\n"
1622af69d88dSmrg                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1623af69d88dSmrg                "               SFENCE target\n"
1624af69d88dSmrg                "       ENDIF\n"
1625af69d88dSmrg                "#ifdef TARGET_LOCAL\n"
1626af69d88dSmrg                "       LOAD tmp0.x, RLOCAL, offset\n"
1627af69d88dSmrg                "       UMUL tmp1.x, blockid, blocksz\n"
1628af69d88dSmrg                "       UMUL tmp1.x, tmp1, k4\n"
1629af69d88dSmrg                "       UADD offset.x, offset, tmp1\n"
1630af69d88dSmrg                "       STORE RES[0].x, offset, tmp0\n"
1631af69d88dSmrg                "#endif\n"
1632af69d88dSmrg                "       RET\n"
1633af69d88dSmrg                "    ENDSUB\n";
1634af69d88dSmrg
1635af69d88dSmrg        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1636af69d88dSmrg
1637af69d88dSmrg        init_prog(ctx, 256, 0, 0, src,
1638af69d88dSmrg                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1639af69d88dSmrg        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
164001e04c3fSmrg                 4096, 0, test_default_init);
1641af69d88dSmrg        init_compute_resources(ctx, (int []) { 0, -1 });
1642af69d88dSmrg        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
164301e04c3fSmrg        check_tex(ctx, 0, test_atom_race_expect, NULL);
1644af69d88dSmrg        destroy_compute_resources(ctx);
1645af69d88dSmrg        destroy_tex(ctx);
1646af69d88dSmrg        destroy_prog(ctx);
1647af69d88dSmrg}
1648af69d88dSmrg
1649af69d88dSmrgint main(int argc, char *argv[])
1650af69d88dSmrg{
1651af69d88dSmrg        struct context *ctx = CALLOC_STRUCT(context);
1652af69d88dSmrg
1653af69d88dSmrg        unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0;
1654af69d88dSmrg
1655af69d88dSmrg        init_ctx(ctx);
1656af69d88dSmrg
1657af69d88dSmrg        if (tests & (1 << 0))
1658af69d88dSmrg           test_system_values(ctx);
1659af69d88dSmrg        if (tests & (1 << 1))
1660af69d88dSmrg           test_resource_access(ctx);
1661af69d88dSmrg        if (tests & (1 << 2))
1662af69d88dSmrg           test_function_calls(ctx);
1663af69d88dSmrg        if (tests & (1 << 3))
1664af69d88dSmrg           test_input_global(ctx);
1665af69d88dSmrg        if (tests & (1 << 4))
1666af69d88dSmrg           test_private(ctx);
1667af69d88dSmrg        if (tests & (1 << 5))
1668af69d88dSmrg           test_local(ctx);
1669af69d88dSmrg        if (tests & (1 << 6))
1670af69d88dSmrg           test_sample(ctx);
1671af69d88dSmrg        if (tests & (1 << 7))
1672af69d88dSmrg           test_many_kern(ctx);
1673af69d88dSmrg        if (tests & (1 << 8))
1674af69d88dSmrg           test_constant(ctx);
1675af69d88dSmrg        if (tests & (1 << 9))
1676af69d88dSmrg           test_resource_indirect(ctx);
1677af69d88dSmrg        if (tests & (1 << 10))
1678af69d88dSmrg           test_surface_ld(ctx);
1679af69d88dSmrg        if (tests & (1 << 11))
1680af69d88dSmrg           test_surface_st(ctx);
1681af69d88dSmrg        if (tests & (1 << 12))
1682af69d88dSmrg           test_barrier(ctx);
1683af69d88dSmrg        if (tests & (1 << 13))
1684af69d88dSmrg           test_atom_ops(ctx, true);
1685af69d88dSmrg        if (tests & (1 << 14))
1686af69d88dSmrg           test_atom_race(ctx, true);
1687af69d88dSmrg        if (tests & (1 << 15))
1688af69d88dSmrg           test_atom_ops(ctx, false);
1689af69d88dSmrg        if (tests & (1 << 16))
1690af69d88dSmrg           test_atom_race(ctx, false);
1691af69d88dSmrg
1692af69d88dSmrg        destroy_ctx(ctx);
1693af69d88dSmrg
1694af69d88dSmrg        return 0;
1695af69d88dSmrg}
1696